def create_feed_dict(batch, wLabels=True, dtype='train'): X, Y, DepEdges, SrlEdges = zip(*batch) x_paragraph, x_sentences, x_entity, x_len_paragraph, x_len_sentences, x_len_entity, x_mask_sentence, para_id_list, entity_list = get_placeholder_values( X) # print (x_entity) feed_dict = {} feed_dict[input_x_paragraph] = x_paragraph feed_dict[input_x_sentence] = x_sentences feed_dict[input_x_entity] = x_entity feed_dict[input_x_len_paragraph] = x_len_paragraph feed_dict[input_x_len_sentence] = x_len_sentences feed_dict[input_x_len_entity] = x_len_entity feed_dict[input_x_mask_sentence] = x_mask_sentence if wLabels: y_known, y_start, y_end = get_placeholder_labels(Y) feed_dict[input_y_known] = y_known feed_dict[input_y_start] = y_start feed_dict[input_y_end] = y_end de_adj_in, de_adj_out = get_adj(DepEdges, batch_size, paragraph_size, num_deLabel) srl_adj_in, srl_adj_out = get_adj(SrlEdges, batch_size, paragraph_size, num_srlLabel) for i in range(batch_size): for lbl in range(num_deLabel): feed_dict[de_adj_mat_in[i][lbl]] = tf.SparseTensorValue( indices=np.array( [de_adj_in[i][lbl].row, de_adj_in[i][lbl].col]).T, values=de_adj_in[i][lbl].data, dense_shape=de_adj_in[i][lbl].shape) feed_dict[de_adj_mat_out[i][lbl]] = tf.SparseTensorValue( indices=np.array( [de_adj_out[i][lbl].row, de_adj_out[i][lbl].col]).T, values=de_adj_out[i][lbl].data, dense_shape=de_adj_out[i][lbl].shape) for i in range(batch_size): for lbl in range(num_srlLabel): feed_dict[srl_adj_mat_in[i][lbl]] = tf.SparseTensorValue( indices=np.array( [srl_adj_in[i][lbl].row, srl_adj_in[i][lbl].col]).T, values=srl_adj_in[i][lbl].data, dense_shape=srl_adj_in[i][lbl].shape) feed_dict[srl_adj_mat_out[i][lbl]] = tf.SparseTensorValue( indices=np.array( [srl_adj_out[i][lbl].row, srl_adj_out[i][lbl].col]).T, values=srl_adj_out[i][lbl].data, dense_shape=srl_adj_out[i][lbl].shape) if dtype != 'train': feed_dict[dropout] = 1.0 feed_dict[rec_dropout] = 1.0 return feed_dict, para_id_list, entity_list
def _convert_sparse_matrix_to_sparse_tensor(X, got_limit = False, limit = 5): coo = X.tocoo() indices = np.mat([coo.row, coo.col]).transpose() if got_limit: coo.data[coo.data > limit] = limit return ( tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape), )
def testMakeOutputDictErrorSparse(self): schema = self.toSchema({'a': tf.VarLenFeature(tf.string)}) # SparseTensor that cannot be represented as VarLenFeature. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 20)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): impl_helper.to_instance_dicts(schema, fetches) # SparseTensor of invalid rank. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 10, 10)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): impl_helper.to_instance_dicts(schema, fetches) # SparseTensor with indices that are out of order. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(3, 20)) } with self.assertRaisesRegexp(ValueError, 'Encountered out-of-order sparse index'): impl_helper.to_instance_dicts(schema, fetches) # SparseTensors with different batch dimension sizes. schema = self.toSchema({ 'a': tf.VarLenFeature(tf.string), 'b': tf.VarLenFeature(tf.string) }) fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 0)]), values=np.array([10.0]), dense_shape=(1, 20)), 'b': tf.SparseTensorValue(indices=np.array([(0, 0)]), values=np.array([10.0]), dense_shape=(2, 20)) } with self.assertRaisesRegexp( ValueError, r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch' r' dimension \d'): impl_helper.to_instance_dicts(schema, fetches)
def create_feed_dict(self, batch, wLabels=True, dtype='train'): X, Y, et_idx, ETEdges, DepEdges = zip(*batch) x_pad, x_len, et_pad, et_mask, seq_len, max_et = self.pad_dynamic( X, et_idx) feed_dict = {} feed_dict[self.input_x] = np.array(x_pad) feed_dict[self.x_len] = np.array(x_len) if wLabels: feed_dict[self.input_y] = np.array(Y) feed_dict[self.et_idx] = np.array(et_pad) feed_dict[self.et_mask] = np.array(et_mask) feed_dict[self.seq_len] = seq_len feed_dict[self.max_et] = max_et et_adj_in, et_adj_out = self.get_adj( ETEdges, self.p.batch_size, max_et + 1, self.num_etLabel) # max_et + 1(DCT) de_adj_in, de_adj_out = self.get_adj(DepEdges, self.p.batch_size, seq_len, self.num_deLabel) for i in range(self.p.batch_size): for lbl in range(self.num_etLabel): feed_dict[self.et_adj_mat_in[i][lbl]] = tf.SparseTensorValue( indices=np.array( [et_adj_in[i][lbl].row, et_adj_in[i][lbl].col]).T, values=et_adj_in[i][lbl].data, dense_shape=et_adj_in[i][lbl].shape) feed_dict[self.et_adj_mat_out[i][lbl]] = tf.SparseTensorValue( indices=np.array( [et_adj_out[i][lbl].row, et_adj_out[i][lbl].col]).T, values=et_adj_out[i][lbl].data, dense_shape=et_adj_out[i][lbl].shape) for lbl in range(self.num_deLabel): feed_dict[self.de_adj_mat_in[i][lbl]] = tf.SparseTensorValue( indices=np.array( [de_adj_in[i][lbl].row, de_adj_in[i][lbl].col]).T, values=de_adj_in[i][lbl].data, dense_shape=de_adj_in[i][lbl].shape) feed_dict[self.de_adj_mat_out[i][lbl]] = tf.SparseTensorValue( indices=np.array( [de_adj_out[i][lbl].row, de_adj_out[i][lbl].col]).T, values=de_adj_out[i][lbl].data, dense_shape=de_adj_out[i][lbl].shape) if dtype != 'train': feed_dict[self.dropout] = 1.0 feed_dict[self.rec_dropout] = 1.0 return feed_dict
def create_feed_dict(self, batch, wLabels=True, dtype='train'): """ Creates a feed dictionary for the batch Parameters ---------- batch: contains a batch of bags wLabels: Whether batch contains labels or not split: Indicates the split of the data - train/valid/test Returns ------- feed_dict Feed dictionary to be fed during sess.run """ X, Y, et_idx, ETEdges, DepEdges = zip(*batch) x_pad, x_len, et_pad, et_mask, seq_len, max_et = self.pad_dynamic( X, et_idx) feed_dict = {} feed_dict[self.input_x] = np.array(x_pad) feed_dict[self.x_len] = np.array(x_len) if wLabels: feed_dict[self.input_y] = np.array(Y) feed_dict[self.et_idx] = np.array(et_pad) feed_dict[self.et_mask] = np.array(et_mask) feed_dict[self.seq_len] = seq_len feed_dict[self.max_et] = max_et et_adj = self.get_adj(ETEdges, self.p.batch_size, max_et + 1, self.num_etLabel) # max_et + 1(DCT) de_adj = self.get_adj(DepEdges, self.p.batch_size, seq_len, self.num_deLabel) for i in range(self.p.batch_size): for lbl in range(self.num_etLabel): feed_dict[self.et_adj_mat[i][lbl]] = tf.SparseTensorValue( indices=np.array([et_adj[i][lbl].row, et_adj[i][lbl].col]).T, values=et_adj[i][lbl].data, dense_shape=et_adj[i][lbl].shape) for lbl in range(self.num_deLabel): feed_dict[self.de_adj_mat[i][lbl]] = tf.SparseTensorValue( indices=np.array([de_adj[i][lbl].row, de_adj[i][lbl].col]).T, values=de_adj[i][lbl].data, dense_shape=de_adj[i][lbl].shape) if dtype != 'train': feed_dict[self.dropout] = 1.0 feed_dict[self.rec_dropout] = 1.0 return feed_dict
def test_evaluate(self): table = np.array([[1, 2, 1, 0, 0, 0], [0, 1, 2, 1, 0, 0], [0, 0, 1, 2, 1, 0], [0, 0, 0, 1, 2, 1]]) md = np.array([[1, 2, 3, 4]]).T md_holdout = np.array([[1.5, 2.5]]).T table_holdout = np.array([[1, 2, 1, 0, 0, 0], [0, 1, 2, 1, 0, 0]], dtype=np.float32) N, D = table.shape M, D = table_holdout.shape p = md.shape[1] table = coo_matrix(table) table_holdout = coo_matrix(table_holdout) opts = Options(batch_size=5, num_neg_samples=3, learning_rate=1e-1, clipping_size=10, beta_mean=0, beta_scale=1, gamma_mean=0, gamma_scale=1) for _ in range(10): with tf.Graph().as_default(), tf.Session() as sess: y_data = tf.SparseTensorValue(indices=np.array( [table.row, table.col]).T, values=table.data, dense_shape=(N, D)) y_holdout = tf.SparseTensorValue( indices=np.array([table_holdout.row, table_holdout.col]).T, values=table_holdout.data, dense_shape=table_holdout.shape) G_data = tf.constant(md, dtype=tf.float32) G_holdout = tf.constant(md_holdout, dtype=tf.float32) model = PoissonRegression(opts, sess) model.N = N model.M = N model.D = D model.p = p model.num_nonzero = table.nnz batch = model.sample(y_data) log_loss = model.loss(G_data, y_data, batch) train = model.optimize(log_loss) mad = model.evaluate(G_holdout, y_holdout) tf.global_variables_initializer().run() train_, mad_, loss_, beta, gamma = sess.run( [train, mad, log_loss, model.qbeta, model.qgamma]) self.assertIsNotNone(beta) self.assertIsNotNone(gamma) # Look at mean absolute error self.assertFalse(np.isnan(mad_))
def __init__(self, sess, n, filename, jump_prob=0.05, drop_tol=1e-8, verbose=False): """ Computes PPR using LU decomposition. Args: sess (Session): tensorflow session. n (int): Number of nodes. filename (str): A csv file denoting the graph. jump_prob (float): Jumping probability of PPR. drop_tol (float): Drops entries with absolute value lower than this value when computing inverse of LU. verbose (bool): Prints step messages if True. """ self.alias = 'ludc' self.verbose = verbose self.pp("initializing") self.sess = sess self.n = n self.c = jump_prob d = 1 - self.c t = drop_tol exact = False if t is None: t = np.power(n, -0.5) elif t == 0: exact = True self.pp("reading") self.node2index, H = read_matrix(filename, d=-d, add_identity=True) self.pp("sorting H") self.perm = degree_reverse_rank_perm(H) H = reorder_matrix(H, self.perm).tocsc() self.pp("computing LU decomposition") if exact: self.LU = splu(H) else: self.LU = spilu(H, drop_tol=t) Linv = inv(self.LU.L).tocoo() Uinv = inv(self.LU.U).tocoo() self.pp("tf init") with tf.variable_scope('ppr_lu_decomposition_tf'): t_Linv = tf.SparseTensorValue(list(zip(Linv.row, Linv.col)), Linv.data, dense_shape=self.LU.L.shape) t_Uinv = tf.SparseTensorValue(list(zip(Uinv.row, Uinv.col)), Uinv.data, dense_shape=self.LU.U.shape) self.t_q = tf.placeholder(tf.float64, shape=[self.n, 1]) self.t_r = _sdmm(t_Uinv, _sdmm(t_Linv, self.c * self.t_q))
def Train(self, inputs, ArcNode, target, step, nodegraph=0.0, mask=None): ''' train methods: has to receive the inputs, arch-node matrix conversion, target, and optionally nodegraph indicator ''' # Creating a SparseTEnsor with the feeded ArcNode Matrix arcnode_ = tf.SparseTensorValue(indices=ArcNode.indices, values=ArcNode.values, dense_shape=ArcNode.dense_shape) if self.graph_based: # lizx changed: indices1, indices2 = nodegraph.nonzero() nodegraph = tf.SparseTensorValue( indices=np.stack([indices1, indices2], axis=1), values=nodegraph[indices1, indices2], dense_shape=nodegraph.shape) if self.mask_flag: fd = { self.NodeGraph: nodegraph, self.comp_inp: inputs, self.state: np.zeros((ArcNode.dense_shape[0], self.state_dim)), self.state_old: np.ones( (ArcNode.dense_shape[0], self.state_dim)), self.ArcNode: arcnode_, self.y: target, self.mask: mask } else: fd = { self.NodeGraph: nodegraph, self.comp_inp: inputs, self.state: np.zeros((ArcNode.dense_shape[0], self.state_dim)), self.state_old: np.ones( (ArcNode.dense_shape[0], self.state_dim)), self.ArcNode: arcnode_, self.y: target } if self.tensorboard: _, loss, loop, merge_all, merge_tr = self.session.run([ self.train_op, self.loss, self.loss_op, self.merged_all, self.merged_train ], feed_dict=fd) if step % 100 == 0: self.writer.add_summary(merge_all, step) self.writer.add_summary(merge_tr, step) else: _, loss, loop = self.session.run( [self.train_op, self.loss, self.loss_op], feed_dict=fd) return loss, loop[1]
def _SparseTensorValue_3x50(self, indices_dtype, values_dtype): # NOTE: This input is intentionally not sorted to validate the # already_sorted flag below. ind = np.array([[0, 0], [1, 0], [1, 2], [2, 0], [2, 1], [1, 1]]) # NB: these are not sorted indices = np.array([0, 13, 10, 33, 32, 14]) values = np.array([-3, 4, 1, 9, 5, 1]) shape = np.array([3, 3]) indices = tf.SparseTensorValue(np.array(ind, np.int64), np.array(indices, indices_dtype), np.array(shape, np.int64)) values = tf.SparseTensorValue(np.array(ind, np.int64), np.array(values, values_dtype), np.array(shape, np.int64)) return indices, values
def evaluate_simfunc(self, W_sparse_vals): if self.eval_get_data is None: self.eval_ids_i = tf.placeholder(tf.int32, shape=[None]) self.eval_ids_j = tf.placeholder(tf.int32, shape=[None]) self.eval_X_descr = tf.placeholder(tf.float32) self.eval_get_data = self._sim_func( X1=tf.gather(self.eval_X_descr, self.eval_ids_i), X2=tf.gather(self.eval_X_descr, self.eval_ids_j)) ids_i = np.reshape(np.asarray(W_sparse_vals.indices[:, 0]), (-1, )) ids_j = np.reshape(np.asarray(W_sparse_vals.indices[:, 1]), (-1, )) data = np.zeros(ids_i.shape) i = 0 while i < ids_i.shape[0]: nxt_i = min(i + self.X.shape[0], ids_i.shape[0]) data[i:nxt_i] = self.sess.run(self.eval_get_data, feed_dict={ self.W.indices: W_sparse_vals.indices, self.W.values: W_sparse_vals.values, self.eval_ids_i: ids_i[i:nxt_i], self.eval_ids_j: ids_j[i:nxt_i], self.eval_X_descr: self.X_descr }) i = nxt_i updated_W = tf.SparseTensorValue(W_sparse_vals.indices, data, W_sparse_vals.dense_shape) return updated_W
def testCopyTensorsProducesEquivalentTensors(self): tensors = { 'dense': tf.placeholder(tf.int64, (None, ), name='my_dense_input'), 'sparse': tf.sparse_placeholder(tf.int64, name='my_sparse_input') } copied_tensors = impl_helper.copy_tensors(tensors) with tf.Session() as session: dense_value = [1, 2] sparse_value = tf.SparseTensorValue(indices=[[0, 0], [0, 2], [1, 1]], values=[3, 4, 5], dense_shape=[2, 3]) sample_tensors = session.run(copied_tensors, feed_dict={ tensors['dense']: dense_value, tensors['sparse']: sparse_value }) self.assertAllEqual(sample_tensors['dense'], dense_value) self.assertAllEqual(sample_tensors['sparse'].indices, sparse_value.indices) self.assertAllEqual(sample_tensors['sparse'].values, sparse_value.values) self.assertAllEqual(sample_tensors['sparse'].dense_shape, sparse_value.dense_shape)
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = cast(Iterable[List[str]], dataset.maybe_get_series(self.data_id)) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T # Need to convert the data to a sparse representation bool_mask = (paddings > 0.5) indices = np.stack(np.where(bool_mask), axis=1) values = vectors[bool_mask] fd[self.train_targets] = tf.SparseTensorValue( indices=indices, values=values, dense_shape=vectors.shape) return fd
def testMaterializeFeaturesWithExcludes(self): example1 = self._makeExample(age=3.0, language='english', label=1.0, slice_key='first_slice') features = { 'f': { encoding.NODE_SUFFIX: np.array([1]) }, 's': { encoding.NODE_SUFFIX: tf.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]], values=[100., 200., 300.], dense_shape=[4, 10]) } } predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}} labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}} extracts = { constants.INPUT_KEY: example1.SerializeToString(), constants.FEATURES_PREDICTIONS_LABELS_KEY: types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) } result = feature_extractor._MaterializeFeatures(extracts, excludes=['s']) self.assertFalse('features__s' in result)
def as_tf_sparse(a): """Convert a to tf.SparseTensorValue Args: a: input array Returns: SparseTensorValue: converted object. Examples: >>> # numpy input >>> a = np.random.random((3,4)) >>> a2 = as_tf_sparse(a) >>> import tensorflow as tf >>> isinstance(a2, tf.SparseTensorValue) True >>> np.testing.assert_array_equal(a, as_numpy_array(a2)) >>> a2 is as_tf_sparse(a2) True """ _config.assert_has_package('tensorflow') import tensorflow as tf if isinstance(a, tf.SparseTensorValue): return a a = as_scipy_coo(a) indices = np.asarray(np.mat([a.row, a.col]).transpose()) return tf.SparseTensorValue(indices, a.data, a.shape)
def sp2tf(sp_t, shape=None): t = sparse_to_tuple(sp_t) if shape is not None: t[2] = shape tensor = tf.SparseTensorValue(t[0], t[1].astype(np.float32), t[2]) return tensor
def load_training_labels(random_images_path): """ load labels :param random_images_path: :return: """ labels = [] max_size = 0 for p in random_images_path: code_label = [] _, image_name = os.path.split(p) l = str(image_name).split('-')[0].replace('#', '') label_length = len(l) if max_size < label_length: max_size = label_length for c in l: code = char_mapping[c] code_label.append(code) labels.append(code_label) indices = [] values = [] size = [len(labels), max_size] for x in range(len(labels)): for y in range(len(labels[x])): indices.append([x, y]) values.append(labels[x][y]) ten = tf.SparseTensorValue(indices, values, size) return ten
def feed_dict_predict(sentence, doc_positive_spt, on_training=True): """ input: data_sets is a dict and the value type is numpy describe: to match the text classification the data_sets's content is the doc in df """ #该地方插入函数,把query_iin,doc_positive_in,doc_negative_in转化成one_hot,再转化成coo_matrix query_in = query_input_list[0] doc_positive_in = doc_positive_input_list[0] doc_negative_in = doc_negative_input_list[0] query = train_data_set.get_one_hot_from_sentence(sentence) query = coo_matrix(query) query = tf.SparseTensorValue( np.transpose([ np.array(query.row, dtype=np.int64), np.array(query.col, dtype=np.int64) ]), np.array(query.data, dtype=np.float), np.array(query.shape, dtype=np.int64)) return { query_in: query, doc_positive_in: doc_positive_spt, on_train: on_training }
def shape_test_stack(self, feat): np.random.seed(abs(myhash(feat.key)) % (2**31 - 1)) phs = [ feat.get_placeholder_and_feature(batch=False)[1] for i in range(self.NTESTS) ] vals_dense = [ np.random.binomial(1, 0.1, size=feat.shape) for i in range(self.NTESTS) ] vals_sparse = [] for A in vals_dense: idx, vals = basefeat.np_dense_to_sparse(A) vals_sparse.append( tf.SparseTensorValue(np.stack(idx, -1), vals, feat.shape)) X = feat.stack(phs) X_ = self.sess.run(X, feed_dict=dict(zip(phs, vals_sparse))) total_len = sum([len(v.values) for v in vals_sparse]) run_len = len(X_.values) msg = 'Differing number of values: {} vs {}'.format(run_len, total_len) self.assertEqual(run_len, total_len, msg) for i, v in enumerate(vals_sparse): for vval, vinds in zip(v.values, v.indices): value_found = False for xval, xinds in zip(X_.values, X_.indices): if xinds[0] == i and np.allclose(xinds[1:], vinds): msg = 'Values {} and {} not equal with inds {}'.format( vval, xval, xinds) self.assertEqual(xval, vval) value_found = True self.assertTrue(value_found, msg='Values {}, indices {} not found'.format( vval, vinds))
def sparse_to_tensor(value): """Convert a scipy sparse matrix to a tensorflow SparseTensorValue.""" row = np.reshape(value.row, (-1, 1)) col = np.reshape(value.col, (-1, 1)) indices = np.concatenate((row, col), axis=1) return tf.SparseTensorValue(indices, value.data, value.shape)
def _merge_models(self, m1, m2): sparse_merges = ['context', 'features'] dense_merges = ['seq_len', 'label', 'forloeb'] ignores = ['test', 'doc_ids', 'text_len'] new_model = {} for key in sparse_merges: new_shape = np.concatenate( [[m1[key].dense_shape[0] + m2[key].dense_shape[0]], m1[key].dense_shape[1:]], axis=0) new_indices = m1[key].indices to_append = m2[key].indices to_append[:, 0] += m1[key].dense_shape[0] new_indices = np.concatenate([new_indices, to_append], axis=0) new_values = np.concatenate([m1[key].values, m2[key].values], axis=0) new_model[key] = tf.SparseTensorValue(new_indices, new_values, new_shape) for key in dense_merges: new_model[key] = np.concatenate([m1[key], m2[key]], axis=0) for key in ignores: new_model[key] = None return new_model
def batch_to_feed_dict(batch, is_train, add_noise=False): ''' Create the dictionnary that is fed into the Session.run(..) calls. :param batch: :return: ''' features_np, phns_np, seq_len_np, wav_np = batch n_time = max([len(i) for i in wav_np]) wav_np = np.stack([pad_vector(w, n_time) for w in wav_np], axis=0) n_batch, n_time, n_features = features_np.shape relevance_mask_np = [(np.arange(n_time) < seq_len_np[i]) / seq_len_np[i] for i in range(n_batch)] relevance_mask_np = np.array(relevance_mask_np) if FLAGS.n_repeat > 1: # Extend sequences with the repeat in time features_np = np.repeat(features_np, FLAGS.n_repeat, axis=1) n_batch, n_time, n_features = features_np.shape phns_labels = tf.SparseTensorValue(phns_np['indices'], phns_np['values'], [n_batch, n_time]) return { features: features_np, phns: phns_labels, seq_len: seq_len_np, weighted_relevant_mask: relevance_mask_np, keep_prob: FLAGS.drop_out_probability if is_train else 1., batch_size: n_batch, noise_gate: 1. if is_train else 0., audio: wav_np, gd_noise: FLAGS.gd_noise if add_noise else 0. }
def _run_graph(self, sess, qq, hh, tt, mdb, to_fetch): feed = {} if not self.query_is_language: feed[self.queries] = [[q] * (self.num_step - 1) + [self.num_query] for q in qq] else: feed[self.queries] = [[q] * (self.num_step - 1) + [[self.num_vocab] * self.num_word] for q in qq] feed[self.heads] = hh feed[self.tails] = tt # In[232]: data.matrix_db[0][0][:7] # Out[232]: # [[0, 0], # [2675, 2698], # [2268, 2274], # [2713, 1240], # [1978, 2013], # [2417, 1881], # [2068, 701]] # # In[233]: data.matrix_db[0][1][:7] # Out[233]: [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # # In[234]: data.matrix_db[0][2] # Out[234]: [3007, 3007] for r in xrange(self.num_operator / 2): feed[self.database[r]] = tf.SparseTensorValue( *mdb[r] ) ## AJAY NOTE: constructs a sparse tensor from indices, values and dense shape for every relation ( by 2 is for symmetric relations) fetches = to_fetch graph_output = sess.run(fetches, feed) return graph_output
def _evaluate(self, sess, source='test'): kg = self.kg hp = self.hparams triples_eval = [] idx_tail_pred = [] for batch in kg.get_eval_batch(hp.batch_size, source=source): head, query, tail = kg.get_triples_from_batch(batch) kg_inputs = kg.get_kg(batch) feed = { self.head_input: head, self.tail_input: tail, self.query_sequence: [[q] * (hp.n_steps - 1) + kg.END_RELATION for q in query] } for r in range(kg.n_relations): feed[self.kg_inputs[r]] = tf.SparseTensorValue(*kg_inputs[r]) loss, predication = sess.run([self.total_loss, self.prediction], feed_dict=feed) idx_pred = np.argsort(-predication) idx_tail_pred.append(idx_pred) triples_eval.append(np.stack([head, query, tail], axis=-1)) idx_tail_pred = np.concatenate(idx_tail_pred, 0).tolist() triples_eval = np.concatenate(triples_eval, 0).tolist() _, eval_tail, _ = kg.calc_metrics(triples_eval, idx_tail_pred) return eval_tail
def convert_sparse_matrix_to_sparse_tensor(X): ''' code borrowed from https://stackoverflow.com/questions/40896157/scipy-sparse-csr-matrix-to-tensorflow-sparsetensor-mini-batch-gradient-descent ''' coo = X.tocoo() indices = np.mat([coo.row, coo.col]).transpose() return tf.SparseTensorValue(indices, coo.data, coo.shape)
def convert2SparseTensorValue(list_labels): # # list_labels: batch_major # # #print(list_labels) # num_samples = len(list_labels) num_maxlen = max(map(lambda x: len(x), list_labels)) # indices = [] values = [] shape = [num_samples, num_maxlen] # for idx in range(num_samples): # item = list_labels[idx] # values.extend(item) indices.extend([[idx, posi] for posi in range(len(item))]) # # return tf.SparseTensorValue(indices=indices, values=values, dense_shape=shape)
def Apply(self): audio = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][0]] audio_length = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][1]] x_id = self.feed_dict[self.model.get_data_layer().input_tensors["source_ids"][0]] internal_request = predict_pb2.PredictRequest() internal_request.model_spec.name = 'deepspeech2' internal_request.model_spec.signature_name = 'predict_output' internal_request.inputs['audio'].CopyFrom( tf.contrib.util.make_tensor_proto(audio, shape=list(audio.shape))) internal_request.inputs['audio_length'].CopyFrom( tf.contrib.util.make_tensor_proto(audio_length, shape=list(audio_length.shape))) internal_request.inputs['x_id'].CopyFrom( tf.contrib.util.make_tensor_proto(x_id, shape=list(x_id.shape))) internal_result = self.istub.Predict(internal_request, 10.0) # 5 seconds self.inputs = Deepspeech2.model.get_data_layer().input_tensors indices_decoded_sequence = tensor_util.MakeNdarray( internal_result.outputs['indices_decoded_sequence']) values_decoded_sequence = tensor_util.MakeNdarray( internal_result.outputs['values_decoded_sequence']) dense_shape_decoded_sequence = tensor_util.MakeNdarray( internal_result.outputs['dense_shape_decoded_sequence']) outputs = tf.SparseTensorValue(indices=indices_decoded_sequence, values=values_decoded_sequence, dense_shape=dense_shape_decoded_sequence) self.outputs = [outputs] results = Deepspeech2.model.infer(self.inputs, self.outputs) self.final_result = results[0][0]
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = {} # type: FeedDict sentences = cast(Iterable[List[str]], dataset.get_series(self.data_id, allow_none=True)) fd[self.train_mode] = train if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T # Need to convert the data to a sparse representation bool_mask = (paddings > 0.5) indices = np.stack(np.where(bool_mask), axis=1) values = vectors[bool_mask] fd[self.train_targets] = tf.SparseTensorValue( indices=indices, values=values, dense_shape=vectors.shape) return fd
def testFeedDenseReshapeSemantics(self): with self.test_session(use_gpu=False) as sess: # Compute a random rank-5 initial shape and new shape, randomly sparsify # it, and check that the output of SparseReshape has the same semantics # as a dense reshape. factors = np.array([2] * 4 + [3] * 4 + [5] * 4) # 810k total elements orig_rank = np.random.randint(2, 7) orig_map = np.random.randint(orig_rank, size=factors.shape) orig_shape = [ np.prod(factors[orig_map == d]) for d in range(orig_rank) ] new_rank = np.random.randint(2, 7) new_map = np.random.randint(new_rank, size=factors.shape) new_shape = [ np.prod(factors[new_map == d]) for d in range(new_rank) ] orig_dense = np.random.uniform(size=orig_shape) orig_indices = np.transpose(np.nonzero(orig_dense < 0.5)) orig_values = orig_dense[orig_dense < 0.5] new_dense = np.reshape(orig_dense, new_shape) new_indices = np.transpose(np.nonzero(new_dense < 0.5)) new_values = new_dense[new_dense < 0.5] sp_input = self._SparseTensorPlaceholder() input_val = tf.SparseTensorValue(orig_indices, orig_values, orig_shape) sp_output = tf.sparse_reshape(sp_input, new_shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, new_indices) self.assertAllEqual(output_val.values, new_values) self.assertAllEqual(output_val.shape, new_shape)
def feed_step(batch, label, dtype="train"): indices = np.mat([batch.tocoo().row, batch.tocoo().col]).transpose() values = batch.tocoo().data shape = batch.tocoo().shape feed_dict = { model.input_x: tf.SparseTensorValue(indices, values, shape), model.input_y: np.reshape(label, [-1, 1]) } if dtype == "train": train_operation = [ model.train_step, model.global_step, model.loss, model.auc_score, model.summary_op ] _, step, loss_val, auc_val, merged = sess.run( train_operation, feed_dict=feed_dict) writer.add_summary(merged, step) return step, loss_val, auc_val elif dtype == "evaluate": loss_val, auc_val = sess.run([model.loss, model.auc_score], feed_dict=feed_dict) return loss_val, auc_val elif dtype == "test": logit_val = sess.run([model.logit], feed_dict=feed_dict)[0] print("accuracy:", accuracy_score(label, [round(x) for x in logit_val])) print("precision:", precision_score(label, [round(x) for x in logit_val])) print("reall:", recall_score(label, [round(x) for x in logit_val])) print("f1:", f1_score(label, [round(x) for x in logit_val])) print("auc:", roc_auc_score(label, logit_val))
def _SparseTensorValue_5x6(self): ind = np.array([[0, 0], [1, 0], [1, 3], [1, 4], [3, 2], [3, 3]]).astype(np.int64) val = np.array([0, 10, 13, 14, 32, 33]).astype(np.float64) shape = np.array([5, 6]).astype(np.int64) return tf.SparseTensorValue(ind, val, shape)