def testFeedPartialShapes(self): with self.test_session(use_gpu=False): # Incorporate new rank into shape information if known sp_input = self._SparseTensorPlaceholder() sp_output = tf.sparse_reshape(sp_input, [2, 3, 5]) self.assertListEqual(sp_output.indices.get_shape().as_list(), [None, 3]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [3]) # Incorporate known shape information about input indices in output # indices sp_input = self._SparseTensorPlaceholder() sp_input.indices.set_shape([5, None]) sp_output = tf.sparse_reshape(sp_input, [2, 3, 5]) self.assertListEqual(sp_output.indices.get_shape().as_list(), [5, 3]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [3]) # Even if new_shape has no shape information, we know the ranks of # output indices and shape sp_input = self._SparseTensorPlaceholder() sp_input.indices.set_shape([5, None]) new_shape = tf.placeholder(tf.int64) sp_output = tf.sparse_reshape(sp_input, new_shape) self.assertListEqual(sp_output.indices.get_shape().as_list(), [5, None]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [None])
def lookup_char_emb(text,c2v_vocab, c2v_emb, dim_c2v_emb): str_tensor = tf.string_split(text) str_split = tf.sparse_reshape(str_tensor,[-1]) str_split,text_mask = tf.sparse_fill_empty_rows(str_split,"") #return str_split #str_split = tf.sparse_tensor_to_dense(str_split,default_value="") #char_split = tf.string_split(str_split.values,'') char_split = tf.string_split(str_split.values,'') #return char_split #return tf.SparseTensor(indices=tf.stack([str_split.indices,res.indices],axis=1),values = res.values, dense_shape=tf.stack([str_split.dense_shape[0],str_split.dense_shape[1],res.dense_shape[0], res.dense_shape[1]])) #return char_split #char_tensor_indices = tf.transpose(tf.stack([tf.gather(str_split.indices[:,0],char_split.indices[:,0]),tf.gather(str_split.indices[:,1],char_split.indices[:,0])])) char_tensor = tf.SparseTensor(indices = char_split.indices, values = c2v_vocab.lookup(char_split.values), dense_shape = char_split.dense_shape) #return char_tensor #char_tensor = tf.SparseTensor(indices = char_split.indices, values = char_dict.lookup(char_split.values), dense_shape = char_split.dense_shape) char_tensor_reshape = tf.sparse_reshape(char_tensor,[-1]) char_tensor,term_mask = tf.sparse_fill_empty_rows(char_tensor_reshape,0) #return char_tensor char_vecs = tf.nn.embedding_lookup_sparse(c2v_emb, char_tensor, None, combiner='sum') char_vecs = tf.where(~term_mask, char_vecs, tf.zeros_like(char_vecs)) #return char_vecs term_char_vecs = tf.reshape(char_vecs, shape = tf.stack([tf.shape(text)[0],tf.cast(tf.reduce_max(str_tensor.indices[:,1])+1,tf.int32),-1,tf.shape(char_vecs)[-1]])) term_char_mask_tmp = tf.reduce_sum(term_char_vecs,axis=-1) term_char_mask = ~tf.equal(term_char_mask_tmp,0) term_char_len = tf.cast(tf.count_nonzero(term_char_mask,axis=-1),tf.int32) text_mask = ~tf.equal(tf.reduce_sum(term_char_mask_tmp,axis=-1),0) text_len = tf.cast(tf.count_nonzero(text_mask,axis=-1),tf.int32) return term_char_vecs, term_char_mask, term_char_len, text_mask, text_len
def sparse_loss_to_node(self, samples, support_size, num_samples): batch_size = self.batch_size length = sum(support_size[1:])*batch_size node_dim = self.loss_node.get_shape().as_list() #discount = .9 for k in range(1, 2): #for k in range(1, len(samples)): #import pdb #pdb.set_trace() x = tf.reshape(tf.tile(tf.expand_dims(samples[k-1], -1), [1, tf.cast(support_size[k]/support_size[k-1], tf.int32)]), [-1]) x = tf.cast(x, tf.int64) y = samples[k] y = tf.cast(y, tf.int64) idx = tf.expand_dims(x*node_dim[0] + y,1) #loss = (discount**(k-1))*tf.reshape(tf.tile(tf.expand_dims(tf.reduce_sum(self.cross_entropy, 1), -1), [1, support_size[k]]), [-1]) loss = tf.reshape(tf.tile(tf.expand_dims(self.loss_node_, -1), [1, support_size[k]]), [-1]) scatter1 = tf.SparseTensor(idx, loss, tf.constant([node_dim[0]*node_dim[1]], dtype=tf.int64)) scatter1 = tf.sparse_reshape(scatter1, tf.constant([node_dim[0], node_dim[1]])) self.loss_node = tf.sparse_add(self.loss_node, scatter1) ones = tf.reshape(tf.tile(tf.expand_dims(tf.ones(batch_size), -1), [1, support_size[k]]), [-1]) scatter2 = tf.SparseTensor(idx, ones, tf.constant([node_dim[0]*node_dim[1]], dtype=tf.int64)) scatter2 = tf.sparse_reshape(scatter2, tf.constant([node_dim[0], node_dim[1]])) self.loss_node_count = tf.sparse_add(self.loss_node_count, scatter2)
def sp_attn_head(seq, out_sz, adj_mat_local, adj_mat_global, activation, in_drop=0.0, coef_drop=0.0, residual=False): with tf.name_scope('my_attn'): if in_drop != 0.0: seq = tf.nn.dropout(seq, 1.0 - in_drop) seq_fts = seq latent_factor_size = 8 nb_nodes = seq_fts.shape[1].value w_1 = glorot([seq_fts.shape[2].value, latent_factor_size]) w_2 = glorot([3 * seq_fts.shape[2].value, latent_factor_size]) f_1 = tf.layers.conv1d(seq_fts, 1, 1) f_2 = tf.layers.conv1d(seq_fts, 1, 1) #local neighbours logits = tf.add(f_1[0], tf.transpose(f_2[0])) logits_first = adj_mat_local * logits lrelu = tf.SparseTensor(indices=logits_first.indices, values=tf.nn.leaky_relu(logits_first.values), dense_shape=logits_first.dense_shape) coefs = tf.sparse_softmax(lrelu) coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes]) seq_fts = tf.squeeze(seq_fts) neigh_embs = tf.sparse.sparse_dense_matmul(coefs, seq_fts) #non-local neighbours logits_global = adj_mat_global * logits lrelu_global = tf.SparseTensor(indices=logits_global.indices, values=tf.nn.leaky_relu( logits_global.values), dense_shape=logits_global.dense_shape) coefs_global = tf.sparse_softmax(lrelu_global) coefs_global = tf.sparse_reshape(coefs_global, [nb_nodes, nb_nodes]) neigh_embs_global = tf.sparse.sparse_dense_matmul( coefs_global, seq_fts) neigh_embs_sum_1 = tf.matmul( tf.add(tf.add(seq_fts, neigh_embs), neigh_embs_global), w_1) neigh_embs_sum_2 = tf.matmul( tf.concat( [tf.concat([seq_fts, neigh_embs], axis=-1), neigh_embs_global], axis=-1), w_2) final_embs = activation(neigh_embs_sum_1) + activation( neigh_embs_sum_2) return final_embs
def calc_log_loss(self, Pairwise, Question, Answer, Review, TermtoTermR, TermtoTermP, Question_I, Answer_I, Review_I): #print 'Doing for item %d'%(i) shape1 = tf.shape(Pairwise) shape2 = tf.shape(Answer) nq = shape1[0] nr = shape1[1] na = shape2[1] pairwise = tf.reshape(Pairwise, [-1, self.PairwiseDim]) pairwise = tf.reshape(tf.matmul(pairwise, self.theta), [nq, nr]) termTotermR = tf.sparse_reshape(TermtoTermR, [-1, self.V]) termTotermR = tf.reshape( tf.sparse_tensor_dense_matmul(termTotermR, self.RelvPar), [nq, nr]) QProj = tf.sparse_tensor_dense_matmul(Question_I, self.A) RProjR = tf.sparse_tensor_dense_matmul(Review_I, self.B) BilinearR = tf.matmul(QProj, tf.transpose(RProjR)) Relevance = tf.nn.softmax(pairwise + termTotermR + BilinearR) termTotermP = tf.sparse_reshape(TermtoTermP, [-1, self.V]) termTotermP = tf.reshape( tf.sparse_tensor_dense_matmul(termTotermP, self.PredPar), [nq, na, nr]) AProj = tf.sparse_tensor_dense_matmul( tf.sparse_reshape(Answer_I, [-1, self.V]), self.X) RProjP = tf.sparse_tensor_dense_matmul(Review_I, self.Y) BilinearP = tf.reshape(tf.matmul(AProj, tf.transpose(RProjP)), [nq, na, nr]) Prediction = BilinearP + termTotermP Prediction = tf.expand_dims(Prediction[:, 0, :], 1) - Prediction Prediction = Prediction[:, 1:, :] Prediction = tf.sigmoid(Prediction) MoE = tf.reduce_sum(tf.multiply(Prediction, tf.expand_dims(Relevance, axis=1)), axis=2) accuracy_count = tf.cast(tf.shape(tf.where(MoE > 0.5))[0], tf.float64) count = nq * na log_likelihood = tf.reduce_sum(tf.log(MoE)) R1 = tf.reduce_sum(tf.square(self.A)) + tf.reduce_sum(tf.square( self.B)) R2 = tf.reduce_sum(tf.square(self.X)) + tf.reduce_sum(tf.square( self.Y)) log_likelihood -= self.Lambda * (R1 + R2) return -1 * log_likelihood, MoE, Relevance
def train_filequeue_reader(self, filename_queue): (keys, values) = self.train_reader.read_up_to(filename_queue, self.config.batch_size) label_features = { "label_indices": tf.VarLenFeature(dtype=tf.int64), "label_values": tf.VarLenFeature(dtype=tf.int64), "label_shape": tf.FixedLenFeature([1], dtype=tf.int64), "seq_len": tf.FixedLenFeature([1], dtype=tf.int64) } audio_features = { "audio": tf.FixedLenSequenceFeature([self.last_dim], dtype=tf.float32) } audio_list = [] label_list = [] len_list = [] for i in range(self.config.batch_size): context, sequence = tf.parse_single_sequence_example( serialized=values[i], context_features=label_features, sequence_features=audio_features ) audio = sequence['audio'] seq_len = context['seq_len'] label_values = context['label_values'] label_indices = context['label_indices'] label_shape = context['label_shape'] label_indices = tf.sparse_tensor_to_dense( tf.sparse_reshape(label_indices, [-1, 1])) label_values = tf.sparse_tensor_to_dense(label_values) sparse_label = tf.SparseTensor(label_indices, label_values, label_shape) # then we can sparse_concat at axis 0 sparse_label = tf.sparse_reshape(sparse_label, [1, -1]) audio_list.append(audio) label_list.append(sparse_label) len_list.append(seq_len) seq_lengths = tf.cast(tf.reshape(tf.stack(len_list, name='seq_lengths'), (-1,)), tf.int32) audio_tensor = tf.stack(audio_list, name='input_audio') label_tensor = tf.sparse_concat(0, label_list, expand_nonconcat_dim=True) return audio_tensor, label_tensor, seq_lengths
def _sparse_tensor_dense_matmul(sp_a, b, **kwargs): """Returns (batched) matmul of a SparseTensor with a Tensor. Args: sp_a: `SparseTensor` representing a (batch of) matrices. b: `Tensor` representing a (batch of) matrices, with the same batch shape of `sp_a`. The shape must be compatible with the shape of `sp_a` and kwargs. **kwargs: Keyword arguments to `tf.sparse_tensor_dense_matmul`. Returns: product: A dense (batch of) matrix-shaped Tensor of the same batch shape and dtype as `sp_a` and `b`. If `sp_a` or `b` is adjointed through `kwargs` then the shape is adjusted accordingly. """ batch_shape = _get_shape(sp_a)[:-2] # Reshape the SparseTensor into a rank 3 SparseTensors, with the # batch shape flattened to a single dimension. If the batch rank is 0, then # we add a batch dimension of rank 1. sp_a = tf.sparse_reshape(sp_a, tf.concat([[-1], _get_shape(sp_a)[-2:]], axis=0)) # Reshape b to stack the batch dimension along the rows. b = tf.reshape(b, tf.concat([[-1], _get_shape(b)[-1:]], axis=0)) # Convert the SparseTensor to a matrix in block diagonal form with blocks of # matrices [M, N]. This allow us to use tf.sparse_tensor_dense_matmul which # only accepts rank 2 (Sparse)Tensors. out = tf.sparse_tensor_dense_matmul(_sparse_block_diag(sp_a), b, **kwargs) # Finally retrieve the original batch shape from the resulting rank 2 Tensor. # Note that we avoid inferring the final shape from `sp_a` or `b` because we # might have transposed one or both of them. return tf.reshape( out, tf.concat([batch_shape, [-1], _get_shape(out)[-1:]], axis=0))
def call(self, words, **kwargs): #e.g [batch_size,sent_number,seq_len] context_ids = self.feature_lookup_table.lookup(words) if isinstance(context_ids, tf.SparseTensor): context_ids = tf.sparse_tensor_to_dense(context_ids,default_value=0,name = "sparseid2dense_C") #[batch_size,sent_number,c_seq_len], every sentence real length mask = tf.abs(tf.sign(context_ids)) #[batch_size, sent_number] sen_len = tf.reduce_sum(mask, -1) #[batch_size,sent_number,c_seq_len,1], mask = tf.expand_dims(tf.cast(mask,tf.float32),-1) context_embedding = tf.nn.embedding_lookup(self.word_embedding,context_ids) context_embedding = tf.layers.dropout(context_embedding,self.dropout_rate,training=self.trainable) if self.use_char_embedding : chars = kwargs.get("chars") shape = tf.shape(chars) if isinstance(chars, tf.SparseTensor): chars= tf.sparse_reshape(chars,[-1]) chars = tf.sparse_tensor_to_dense(chars,default_value="0") else: chars = tf.reshape(chars,[-1]) chars = tf.string_split(chars,delimiter=",") chars_ids = self.char_lookup_table.lookup(chars) chars_ids = tf.sparse_tensor_to_dense(chars_ids) char_mask = tf.cast(tf.expand_dims(tf.sign(chars_ids),-1),tf.float32) char_embedding = tf.nn.embedding_lookup(self.char_embeddding,chars_ids)*char_mask #context_mask cares about this char_embedding = tf.layers.dropout(char_embedding,self.dropout_rate*0.5,training=self.trainable) char_embedding = self.conv_chars(char_embedding,shape) context_embedding = tf.concat([context_embedding,char_embedding],-1) context_embedding = self.highway(context_embedding,mask = mask) if self.use_highway else context_embedding*mask return context_embedding,sen_len,mask,context_ids
def map_function(x): i, dense_slice = x[0], x[1] sparse_slice = tf.sparse_reshape(tf.sparse_slice(sp_a, [i, 0, 0], [1, sp_a.dense_shape[1], sp_a.dense_shape[2]]), [sp_a.dense_shape[1], sp_a.dense_shape[2]]) mult_slice = tf.sparse_tensor_dense_matmul(sparse_slice, dense_slice) return mult_slice
def testMismatchedSizesWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [4, -1]) with self.assertRaisesOpError("requested shape requires a multiple"): sess.run(sp_output, {sp_input: input_val})
def testMultipleInferredDims(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [4, -1, -1]) with self.assertRaisesOpError("only one output shape size may be -1"): sess.run(sp_output, {sp_input: input_val})
def labels_to_onehot(lables, class_num = 1): ''' :param lables: shape [batchsize, depth, height, width], 4D, no channel axis :param class_num: :return: ''' if isinstance(class_num, tf.Tensor): class_num_tf = tf.to_int32(class_num) else: class_num_tf = tf.constant(class_num, tf.int32) in_shape = tf.shape(lables) out_shape = tf.concat([in_shape, tf.reshape(class_num_tf, (1,))], 0) # add a extra axis for classNum, 5D if class_num == 1: return tf.reshape(lables, out_shape) else: lables = tf.reshape(lables, (-1,)) # squeeze labels to one row x N cols vector [0,0,0,1,......] dense_shape = tf.stack([tf.shape(lables)[0], class_num_tf], 0) # denshape [N cols , classNum] lables = tf.to_int64(lables) ids = tf.range(tf.to_int64(dense_shape[0]), dtype= tf.int64) # ids is a 1xN vector as[0,1,2,3...., N-1] ids = tf.stack([ids, lables], axis= 1) #ids is N x clsNum mat one_hot = tf.SparseTensor(indices= ids, values= tf.ones_like(lables, dtype= tf.float32), dense_shape = tf.to_int64(dense_shape)) one_hot = tf.sparse_reshape(one_hot, out_shape) return tf.cast(one_hot, tf.float32)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'rows': tf.FixedLenFeature([], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'one_feature': tf.VarLenFeature(tf.float32) }) one_feature = features['one_feature'] # The code below does not work, you just cannot cast a SparseTensor # one_feature = tf.cast(features['one_feature'], tf.float32) """ for CNN feature: n*4096 """ # one_feature = tf.sparse_reshape(one_feature, [-1, 4096]) """ for gist feature: n*512 """ one_feature = tf.sparse_reshape(one_feature, [-1, 512]) label = tf.cast(features['label'], tf.int32) rows = tf.cast(features['rows'], tf.int32) return one_feature, label, rows
def _get_sequence_dense_tensor(self, inputs, weight_collections=None, trainable=None): del weight_collections, trainable if self.signature != 'sequence': raise ValueError( 'Column {} could not be used as sequence feature column. ' 'Use sequence_text_embedding_column instead'.format(self.name)) sparse_keys = inputs.get(self) sparse_keys.shape.with_rank_at_least(2) sparse_keys.shape.with_rank_at_most(3) batch_size, max_length = sparse_keys.dense_shape[ 0], sparse_keys.dense_shape[1] sparse_keys = tf.sparse_reshape(sparse_keys, shape=tf.stack( [batch_size, max_length, -1])) dense_tensor = self._hub_module(sparse_keys, signature=self.signature) sequence_length = feature_column._sequence_length_from_sparse_tensor( inputs.get(self)) return feature_column._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length)
def tensors_to_item(self, keys_to_tensors): """Maps the given dictionary of tensors to a concatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. Returns: [time, num_boxes, 4] tensor of bounding box coordinates, in order [y_min, x_min, y_max, x_max]. Whether the tensor is a SparseTensor or a dense Tensor is determined by the return_dense parameter. Empty positions in the sparse tensor are filled with -1.0 values. """ sides = [] for key in self._full_keys: value = keys_to_tensors[key] expanded_dims = tf.concat( [tf.to_int64(tf.shape(value)), tf.constant([1], dtype=tf.int64)], 0) side = tf.sparse_reshape(value, expanded_dims) sides.append(side) bounding_boxes = tf.sparse_concat(2, sides) if self._return_dense: bounding_boxes = tf.sparse_tensor_to_dense( bounding_boxes, default_value=self._default_value) return bounding_boxes
def _parse_function(self, index_list, sp_value, label): inputs = tf.SparseTensor(indices=index_list, values=sp_value, dense_shape=self.sp_shape) inputs = tf.sparse_reshape(inputs, [-1]) return inputs, label, index_list[:, -1]
def tensors_to_item(self, keys_to_tensors): """Maps the given dictionary of tensors to a concatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. Returns: [time, num_boxes, 4] tensor of bounding box coordinates, in order [y_min, x_min, y_max, x_max]. Whether the tensor is a SparseTensor or a dense Tensor is determined by the return_dense parameter. Empty positions in the sparse tensor are filled with -1.0 values. """ sides = [] for key in self._full_keys: value = keys_to_tensors[key] expanded_dims = tf.concat([ tf.to_int64(tf.shape(value)), tf.constant([1], dtype=tf.int64) ], 0) side = tf.sparse_reshape(value, expanded_dims) sides.append(side) bounding_boxes = tf.sparse_concat(2, sides) if self._return_dense: bounding_boxes = tf.sparse_tensor_to_dense( bounding_boxes, default_value=self._default_value) return bounding_boxes
def _do_test(self, expected_result, config=None): # Make sure that expected_result is an np array if not type(expected_result).__module__ == np.__name__: expected_result = np.array(expected_result) with tf.Graph().as_default(): inputs, indices, _ = dense_to_sparse(self.input) sparse_tensor_reordered = tf.sparse_reorder(inputs) sparse_tensor_reshaped = tf.sparse_reshape(sparse_tensor_reordered, self.input.shape) W = tf.constant(self.W1, dtype=tf.float32) b = tf.constant(self.b1, dtype=tf.float32) # Sparse layer logits = tf.sparse_tensor_dense_matmul(sparse_tensor_reshaped, W) + b # Dense layer logits = logits @ tf.constant(self.W2, tf.float32) + tf.constant(self.b2, tf.float32) explanation = lrp.lrp(inputs, logits, config) with tf.Session() as s: expl = s.run(explanation) self.assertTrue(np.all(np.equal(indices, expl.indices)), "expected indices did not equal actual indices") self.assertTrue(np.allclose(expl.values, expected_result.reshape((-1)), rtol=1.e-3, atol=1.e-3), "expected indices did not equal actual indices")
def testFeedMultipleInferredDims(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [4, -1, -1]) with self.assertRaisesOpError("only one output shape size may be -1"): sess.run(sp_output, {sp_input: input_val})
def compute_per_batch_eval_metrics(metrics, predictions, used_labels, labels, indices, used_indices, ori_batch_idx, step_idx, config, context): new_batch_idx = tf.gather(tf.range(tf.shape(used_indices)[0]), ori_batch_idx) new_indices = tf.concat([ tf.expand_dims(new_batch_idx, 1), tf.expand_dims(step_idx, 1), tf.slice(indices, begin=[0, 2], size=[tf.shape(indices)[0], 1])], axis=1) label_shape = tf.shape(labels) eval_labels = tf.sparse_reshape( tf.SparseTensor( tf.cast(new_indices, tf.int64), tf.cast(used_labels, tf.int64), tf.cast( [tf.shape(used_indices)[0], label_shape[1], label_shape[2]], tf.int64) ), [tf.shape(used_indices)[0], label_shape[1] * label_shape[2]]) ori_predictions = tf.gather(predictions, ori_batch_idx) updates = compute_recommendation_metrics( metrics, eval_labels, predictions) updates += compute_regression_metrics( metrics, ori_predictions, used_labels, indices, config, context) updates += compute_ranking_metrics( metrics, used_indices, new_batch_idx, used_labels, predictions, config) return updates
def testFeedDenseReshapeSemantics(self): with self.test_session(use_gpu=False) as sess: # Compute a random rank-5 initial shape and new shape, randomly sparsify # it, and check that the output of SparseReshape has the same semantics # as a dense reshape. factors = np.array([2] * 4 + [3] * 4 + [5] * 4) # 810k total elements orig_rank = np.random.randint(2, 7) orig_map = np.random.randint(orig_rank, size=factors.shape) orig_shape = [ np.prod(factors[orig_map == d]) for d in range(orig_rank) ] new_rank = np.random.randint(2, 7) new_map = np.random.randint(new_rank, size=factors.shape) new_shape = [ np.prod(factors[new_map == d]) for d in range(new_rank) ] orig_dense = np.random.uniform(size=orig_shape) orig_indices = np.transpose(np.nonzero(orig_dense < 0.5)) orig_values = orig_dense[orig_dense < 0.5] new_dense = np.reshape(orig_dense, new_shape) new_indices = np.transpose(np.nonzero(new_dense < 0.5)) new_values = new_dense[new_dense < 0.5] sp_input = self._SparseTensorPlaceholder() input_val = tf.SparseTensorValue(orig_indices, orig_values, orig_shape) sp_output = tf.sparse_reshape(sp_input, new_shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, new_indices) self.assertAllEqual(output_val.values, new_values) self.assertAllEqual(output_val.shape, new_shape)
def testFeedMismatchedSizesWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [4, -1]) with self.assertRaisesOpError("requested shape requires a multiple"): sess.run(sp_output, {sp_input: input_val})
def sp_attn_head(seq, out_sz, adj_mat, adj_all_mat, adj_neig_mat, N_target_mat, activation, nb_nodes, in_drop=0.0, coef_drop=0.0, residual=False): with tf.name_scope('sp_attn'): if coef_drop != 0.0: adj_mat = tf.SparseTensor(indices=adj_mat.indices, values=tf.nn.dropout( adj_mat.values, 1.0 - coef_drop), dense_shape=adj_mat.dense_shape) adj_neig_mat = tf.SparseTensor( indices=adj_neig_mat.indices, values=tf.nn.dropout(adj_neig_mat.values, 1.0 - coef_drop), dense_shape=adj_neig_mat.dense_shape) if in_drop != 0.0: seq = tf.nn.dropout(seq, 1.0 - in_drop) seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False) # simplest self-attention possible f_1 = tf.layers.conv1d(seq_fts, 1, 1) f_2 = tf.layers.conv1d(seq_fts, 1, 1) f_1 = tf.reshape(f_1, (nb_nodes, 1)) f_2 = tf.reshape(f_2, (nb_nodes, 1)) f_1 = adj_mat * f_1 f_2 = adj_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) if in_drop != 0.0: seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop) coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes]) seq_fts = tf.squeeze(seq_fts) ###HW out_bi = BILinear_pooling(adj_neig_mat, seq_fts) out_bi = dot(N_target_mat, out_bi, True) out_gat = tf.sparse_tensor_dense_matmul(coefs, seq_fts) vals = (1 - FLAGS.alpha) * out_gat + FLAGS.alpha * out_bi vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, nb_nodes, out_sz]) ret = tf.contrib.layers.bias_add(vals) return activation(ret) # activation
def _call(self, inputs): seq_fts = tf.layers.conv1d(inputs, self.out_sz, 1, use_bias=False) # simplest self-attention possible f_1_t = tf.layers.conv1d(seq_fts, 1, 1) f_2_t = tf.layers.conv1d(seq_fts, 1, 1) f_1 = tf.reshape(f_1_t, (self.nb_nodes, 1)) f_2 = tf.reshape(f_2_t, (self.nb_nodes, 1)) f_1 = self.bias_mat * f_1 f_2 = self.bias_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2, # here we make an assumption that our input is of batch size 1, and reshape appropriately. # The method will fail in all other cases! coefs = tf.sparse_reshape(coefs, [self.nb_nodes, self.nb_nodes]) seq_fts = tf.squeeze(seq_fts) vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts) vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, self.nb_nodes, self.out_sz]) ret = self.act(tf.contrib.layers.bias_add(vals)) return ret # activation
def __call__(self, u_inputs, v_inputs, u_size, v_size): x = v_inputs adj_mat = self.adj_mat # simplest self-attention possible f_1 = tf.layers.conv1d(u_inputs, 1, 1) f_2 = tf.layers.conv1d(v_inputs, 1, 1) f_1 = tf.reshape(f_1, (u_size, 1)) f_2 = tf.reshape(f_2, (v_size, 1)) seq_fts = tf.layers.conv1d(x, self.output_dim, 1, use_bias=False) f_1 = adj_mat * f_1 f_2 = adj_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) coefs = tf.sparse_reshape(coefs, [u_size, v_size]) seq_fts = tf.squeeze(seq_fts) vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts) print('--------vals.shape------', vals.shape) # vals = tf.expand_dims(vals, axis=0) # vals.set_shape([1, nb_nodes, out_sz]) ret = tf.contrib.layers.bias_add(vals) return self.act(ret) # activation
def testFeedDenseReshapeSemantics(self): with self.test_session(use_gpu=False) as sess: # Compute a random rank-5 initial shape and new shape, randomly sparsify # it, and check that the output of SparseReshape has the same semantics # as a dense reshape. factors = np.array([2] * 4 + [3] * 4 + [5] * 4) # 810k total elements orig_rank = np.random.randint(2, 7) orig_map = np.random.randint(orig_rank, size=factors.shape) orig_shape = [np.prod(factors[orig_map == d]) for d in range(orig_rank)] new_rank = np.random.randint(2, 7) new_map = np.random.randint(new_rank, size=factors.shape) new_shape = [np.prod(factors[new_map == d]) for d in range(new_rank)] orig_dense = np.random.uniform(size=orig_shape) orig_indices = np.transpose(np.nonzero(orig_dense < 0.5)) orig_values = orig_dense[orig_dense < 0.5] new_dense = np.reshape(orig_dense, new_shape) new_indices = np.transpose(np.nonzero(new_dense < 0.5)) new_values = new_dense[new_dense < 0.5] sp_input = self._SparseTensorPlaceholder() input_val = tf.SparseTensorValue(orig_indices, orig_values, orig_shape) sp_output = tf.sparse_reshape(sp_input, new_shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, new_indices) self.assertAllEqual(output_val.values, new_values) self.assertAllEqual(output_val.shape, new_shape)
def read_and_decode(filename_queue): """ decode tfrecords :param filename_queue: the filename :return: one_feature, label, rows(image number of one scene spot, that is, rows of the data of a .npy file) """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'rows': tf.FixedLenFeature([], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'one_feature': tf.VarLenFeature(tf.float32) }) one_feature = features['one_feature'] # The code below does not work, you just cannot cast a SparseTensor # one_feature = tf.cast(features['one_feature'], tf.float32) """ for CNN feature: n*2048 """ # one_feature = tf.sparse_reshape(one_feature, [-1, 2048]) """ for gist feature: n*512 """ one_feature = tf.sparse_reshape(one_feature, [-1, FLAGS.feature_col]) label = tf.cast(features['label'], tf.int32) rows = tf.cast(features['rows'], tf.int32) return one_feature, label, rows
def _sparse_or_dense_matmul_onehot(sparse_or_dense_matrix, col_index, size): """Returns a (dense) column of a Tensor or SparseTensor. Args: sparse_or_dense_matrix: matrix-shaped, `float` `Tensor` or `SparseTensor`. col_index: scalar, `int` `Tensor` representing the index of the desired column. size: scalar, `int` `Tensor` representing the number of rows in `sparse_or_dense_matrix`. Used only in the sparse case, so that the caller can give side information about the shape of `sparse_or_dense_matrix`. Returns: column: vector-shaped, `float` `Tensor` with the same dtype as `sparse_or_dense_matrix`, representing the `col_index`th column of `sparse_or_dense_matrix`. """ if isinstance(sparse_or_dense_matrix, (tf.SparseTensor, tf.SparseTensorValue)): # TODO(b/111924846): Implement better (ideally in a way that allows us to # eliminate the `size` arg, if possible). return tf.sparse_tensor_to_dense( tf.sparse_reshape( tf.sparse_slice(sparse_or_dense_matrix, tf.cast([0, col_index], tf.int64), tf.cast([size, 1], tf.int64)), [size])) else: return tf.gather(sparse_or_dense_matrix, col_index, axis=-1)
def sp_attn_head(seq, out_sz, adj_mat, activation, nb_nodes, in_drop=0.0, coef_drop=0.0, residual=False): with tf.name_scope('sp_attn'): if in_drop != 0.0: seq = tf.nn.dropout(seq, 1.0 - in_drop) seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False) # simplest self-attention possible f_1 = tf.layers.conv1d(seq_fts, 1, 1) f_2 = tf.layers.conv1d(seq_fts, 1, 1) f_1 = tf.reshape(f_1, (nb_nodes, 1)) f_2 = tf.reshape(f_2, (nb_nodes, 1)) f_1 = adj_mat * f_1 f_2 = adj_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) if coef_drop != 0.0: coefs = tf.SparseTensor(indices=coefs.indices, values=tf.nn.dropout( coefs.values, 1.0 - coef_drop), dense_shape=coefs.dense_shape) if in_drop != 0.0: seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop) # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2, # here we make an assumption that our input is of batch size 1, and reshape appropriately. # The method will fail in all other cases! coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes]) seq_fts = tf.squeeze(seq_fts) vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts) vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, nb_nodes, out_sz]) ret = tf.contrib.layers.bias_add(vals) # residual connection if residual: if seq.shape[-1] != ret.shape[-1]: ret = ret + conv1d(seq, ret.shape[-1], 1) # activation else: seq_fts = ret + seq if activation == None: ## for the final layer return ret else: return activation(ret) # activation
def xletter_feature_extractor(text,model_prefix,input_mode, op_dict=None,xletter_cnt=None,win_size=None,dim_xletter_emb=None): with tf.variable_scope("xletter_layer", reuse=tf.AUTO_REUSE): if input_mode=='mstf': xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) indices, ids, values, offsets = mstf.dssm_xletter(input=text, win_size=win_size, dict_handle=op_dict) offsets_to_dense = tf.segment_sum(tf.ones_like(offsets), offsets) batch_id = tf.cumsum(offsets_to_dense[:-1]) index_tensor = tf.concat([tf.expand_dims(batch_id,axis=-1), tf.expand_dims(indices,axis=-1)],axis=-1) value_tensor = ids dense_shape = tf.concat([tf.shape(offsets),tf.expand_dims(tf.reduce_max(indices) + 1,axis=-1)],axis=0) text_tensor = tf.SparseTensor(indices=tf.cast(index_tensor,tf.int64), values = value_tensor, dense_shape=tf.cast(dense_shape,tf.int64)) #conv text_tensor = tf.sparse_reshape(text_tensor,[-1]) text_tensor,text_mask = tf.sparse_fill_empty_rows(text_tensor,0) text_vecs = tf.nn.embedding_lookup_sparse(xletter_emb,text_tensor,None,combiner='sum') text_vecs = tf.where(~text_mask, text_vecs, tf.zeros_like(text_vecs)) text_vecs = tf.reshape(text_vecs,[-1,tf.reduce_max(indices) + 1,dim_xletter_emb]) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) elif input_mode=='pyfunc': query_split = tf.string_split(text,';') term_split = tf.string_split(query_split.values,',') xletter_tensor_indices = tf.transpose(tf.stack([tf.gather(query_split.indices[:,0],term_split.indices[:,0]),tf.gather(query_split.indices[:,1],term_split.indices[:,0])])) xletter_tensor = tf.SparseTensor(indices = xletter_tensor_indices, values = tf.string_to_number(term_split.values,out_type=tf.int32), dense_shape = query_split.dense_shape) xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) xletter_tensor_reshape = tf.sparse_reshape(xletter_tensor,[-1]) xletter_tensor,text_mask = tf.sparse_fill_empty_rows(xletter_tensor_reshape,0) xletter_vecs = tf.nn.embedding_lookup_sparse(xletter_emb, xletter_tensor, None, combiner='sum') xletter_vecs = tf.where(~text_mask, xletter_vecs, tf.zeros_like(xletter_vecs)) text_vecs = tf.reshape(xletter_vecs, shape=tf.stack([-1,tf.reduce_max(query_split.indices[:,1])+1,dim_xletter_emb])) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) elif input_mode=='pyfunc_batch': indices, values, dense_shape = tf.py_func(op_dict.batch_xletter_extractor,[text],[tf.int64,tf.int32,tf.int64]) xletter_tensor = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) xletter_tensor_reshape = tf.sparse_reshape(xletter_tensor,[-1]) xletter_tensor,text_mask = tf.sparse_fill_empty_rows(xletter_tensor_reshape,0) xletter_vecs = tf.nn.embedding_lookup_sparse(xletter_emb, xletter_tensor, None, combiner='sum') xletter_vecs = tf.where(~text_mask, xletter_vecs, tf.zeros_like(xletter_vecs)) text_vecs = tf.reshape(xletter_vecs, shape=tf.stack([-1,dense_shape[1],dim_xletter_emb])) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) else: NotImplementedError return text_vecs, step_mask, sequence_length
def testFeedMismatchedSizes(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [4, 7]) with self.assertRaisesOpError( "Input to reshape is a tensor with 30 dense values"): sess.run(sp_output, {sp_input: input_val})
def dot_product(params, rankings, x): two_dims = LatentFactorPortfolioGraph.two_dim_shape(x, rankings) x_two_dim = tf.sparse_reshape(x, shape=two_dims) dot_prod = tf.sparse_tensor_dense_matmul(x_two_dim, params) three_dims = LatentFactorPortfolioGraph.three_dim_shape( params, rankings) dot_prod_three_dim = tf.reshape(dot_prod, shape=three_dims) return dot_prod_three_dim
def _attn_r_n_m_h(self): h, r, n = self.heads, self.relations, self._nodes attn_h_n_rm = self._attn_h_n_rm attn_h_n_r_m = tf.sparse_reshape(attn_h_n_rm, [h, n, r, n]) attn_r_n_m_h = tf.sparse_transpose(attn_h_n_r_m, [2, 1, 3, 0]) return attn_r_n_m_h
def preprocessing_fn(inputs): return { 'dense_out': mappers.scale_to_0_1(inputs['dense_1']), 'sparse_out': api.map(lambda x: tf.sparse_reshape(x, (1, 10)), inputs['sparse']) }
def testSameShape(self): with self.test_session(use_gpu=False) as sess: input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(input_val, [5, 6]) output_val = sess.run(sp_output) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.shape, input_val.shape)
def testFeedSameShapeWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [-1, 6]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.shape, input_val.shape)
def create_word_vectors_from_post(self, raw_post, mxlen): # vocab has only lowercase words word2index = self.index if self.do_lowercase: raw_post = self.lowercase(raw_post) word_tokens = tf.string_split(tf.reshape(raw_post, [-1])) word_indices = word2index.lookup(word_tokens) # Reshape them out to the proper length reshaped_words = tf.sparse_reshape(word_indices, shape=[-1]) return self.reshape_indices(reshaped_words, [mxlen])
def _create_word_vectors_from_post_mixed_case(self, nraw_post, mxlen): # vocab has only lowercase words word_tokens = tf.string_split(tf.reshape(nraw_post, [-1])) word_indices = self.word2index.lookup(word_tokens) # Reshape them out to the proper length reshaped_words = tf.sparse_reshape(word_indices, shape=[-1]) x = self._reshape_indices(reshaped_words, [mxlen]) return x
def testUpRank(self): with self.test_session(use_gpu=False) as sess: input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(input_val, [2, 3, 5]) output_val = sess.run(sp_output) self.assertAllEqual(output_val.indices, np.array([ [0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1] ])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.shape, [2, 3, 5])
def testFeedUpRankWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [2, -1, 5]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([ [0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1] ])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
def testFeedNewShapeSameRankWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = tf.sparse_reshape(sp_input, [3, -1]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([ [0, 0], [0, 6], [0, 9], [1, 0], [2, 0], [2, 1] ])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.shape, [3, 10])
def testFeedDownRankWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_2x3x4() sp_output = tf.sparse_reshape(sp_input, [6, -1]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([ [0, 1], [1, 0], [1, 2], [3, 3], [4, 1], [4, 3], [5, 2] ])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.shape, [6, 4])
def sparse_message_pass(node_states, adjacency_matrices, num_edge_types, hidden_size, use_bias=True, average_aggregation=False, name="sparse_ggnn"): """One message-passing step for a GNN with a sparse adjacency matrix. Implements equation 2 (the message passing step) in [Li et al. 2015](https://arxiv.org/abs/1511.05493). N = The number of nodes in each batch. H = The size of the hidden states. T = The number of edge types. Args: node_states: Initial states of each node in the graph. Shape is [N, H]. adjacency_matrices: Adjacency matrix of directed edges for each edge type. Shape is [N, N, T] (sparse tensor). num_edge_types: The number of edge types. T. hidden_size: The size of the hidden state. H. use_bias: Whether to use bias in the hidden layer. average_aggregation: How to aggregate the incoming node messages. If average_aggregation is true, the messages are averaged. If it is false, they are summed. name: (optional) The scope within which tf variables should be created. Returns: The result of one step of Gated Graph Neural Network (GGNN) message passing. Shape: [N, H] """ n = tf.shape(node_states)[0] t = num_edge_types incoming_edges_per_type = tf.sparse_reduce_sum(adjacency_matrices, axis=1) # Convert the adjacency matrix into shape [T, N, N] - one [N, N] adjacency # matrix for each edge type. Since sparse tensor multiplication only supports # two-dimensional tensors, we actually convert the adjacency matrix into a # [T * N, N] tensor. adjacency_matrices = tf.sparse_transpose(adjacency_matrices, [2, 0, 1]) adjacency_matrices = tf.sparse_reshape(adjacency_matrices, [t * n, n]) # Multiply the adjacency matrix by the node states, producing a [T * N, H] # tensor. For each (edge type, node) pair, this tensor stores the sum of # the hidden states of the node's neighbors over incoming edges of that type. messages = tf.sparse_tensor_dense_matmul(adjacency_matrices, node_states) # Rearrange this tensor to have shape [N, T * H]. The incoming states of each # nodes neighbors are summed by edge type and then concatenated together into # a single T * H vector. messages = tf.reshape(messages, [t, n, hidden_size]) messages = tf.transpose(messages, [1, 0, 2]) messages = tf.reshape(messages, [n, t * hidden_size]) # Run each of those T * H vectors through a linear layer that produces # a vector of size H. This process is equivalent to running each H-sized # vector through a separate linear layer for each edge type and then adding # the results together. # # Note that, earlier on, we added together all of the states of neighbors # that were connected by edges of the same edge type. Since addition and # multiplying by a linear layer are commutative, this process was equivalent # to running each incoming edge through a linear layer separately and then # adding everything at the end. with tf.variable_scope(name, default_name="sparse_ggnn"): final_node_states = common_layers.dense( messages, hidden_size, use_bias=False) # Multiply the bias by for each edge type by the number of incoming nodes # of that edge type. if use_bias: bias = tf.get_variable("bias", initializer=tf.zeros([t, hidden_size])) final_node_states += tf.matmul(incoming_edges_per_type, bias) if average_aggregation: incoming_edges = tf.reduce_sum(incoming_edges_per_type, -1, keepdims=True) incoming_edges = tf.tile(incoming_edges, [1, hidden_size]) final_node_states /= incoming_edges + 1e-7 return final_node_states