def generalised_dice_loss(prediction, ground_truth, weight_map=None, type_weight='Square'): """ Function to calculate the Generalised Dice Loss defined in Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. DLMIA 2017 :param prediction: the logits :param ground_truth: the segmentation ground truth :param weight_map: :param type_weight: type of weighting allowed between labels (choice between Square (square of inverse of volume), Simple (inverse of volume) and Uniform (no weighting)) :return: the loss """ ground_truth = tf.to_int64(ground_truth) n_voxels = ground_truth.shape[0].value n_classes = prediction.shape[1].value ids = tf.constant(np.arange(n_voxels), dtype=tf.int64) ids = tf.stack([ids, ground_truth], axis=1) one_hot = tf.SparseTensor(indices=ids, values=tf.ones([n_voxels], dtype=tf.float32), dense_shape=[n_voxels, n_classes]) if weight_map is not None: weight_map_nclasses = tf.reshape( tf.tile(weight_map, [n_classes]), prediction.get_shape()) ref_vol = tf.sparse_reduce_sum( weight_map_nclasses * one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum( weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum( tf.multiply(weight_map_nclasses, prediction), 0) else: ref_vol = tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(prediction, 0) if type_weight == 'Square': weights = tf.reciprocal(tf.square(ref_vol)) elif type_weight == 'Simple': weights = tf.reciprocal(ref_vol) elif type_weight == 'Uniform': weights = tf.ones_like(ref_vol) else: raise ValueError("The variable type_weight \"{}\"" "is not defined.".format(type_weight)) new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights) weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) * tf.reduce_max(new_weights), weights) generalised_dice_numerator = \ 2 * tf.reduce_sum(tf.multiply(weights, intersect)) generalised_dice_denominator = \ tf.reduce_sum(tf.multiply(weights, seg_vol + ref_vol)) generalised_dice_score = \ generalised_dice_numerator / generalised_dice_denominator return 1 - generalised_dice_score
def _map_to_tfidf(x): """Calculates the inverse document frequency of terms in the corpus. Args: x : a SparseTensor of int64 representing string indices in vocab. Returns: The tf*idf values """ # Add one to the reduced term freqnencies to avoid dividing by zero. idf = tf.log(tf.to_double(corpus_size) / ( 1.0 + tf.to_double(reduced_term_freq))) dense_doc_sizes = tf.to_double(tf.sparse_reduce_sum(tf.SparseTensor( indices=x.indices, values=tf.ones_like(x.values), dense_shape=x.dense_shape), 1)) # For every term in x, divide the idf by the doc size. # The two gathers both result in shape <sum_doc_sizes> idf_over_doc_size = (tf.gather(idf, x.values) / tf.gather(dense_doc_sizes, x.indices[:, 0])) return tf.SparseTensor( indices=x.indices, values=idf_over_doc_size, dense_shape=x.dense_shape)
def dice(prediction, ground_truth, weight_map=None): """ Function to calculate the dice loss with the definition given in Milletari, F., Navab, N., & Ahmadi, S. A. (2016) V-net: Fully convolutional neural networks for volumetric medical image segmentation. 3DV 2016 using a square in the denominator :param prediction: the logits :param ground_truth: the segmentation ground_truth :param weight_map: :return: the loss """ ground_truth = tf.to_int64(ground_truth) prediction = tf.cast(prediction, tf.float32) ids = tf.range(tf.to_int64(tf.shape(ground_truth)[0]), dtype=tf.int64) ids = tf.stack([ids, ground_truth], axis=1) one_hot = tf.SparseTensor( indices=ids, values=tf.ones_like(ground_truth, dtype=tf.float32), dense_shape=tf.to_int64(tf.shape(prediction))) if weight_map is not None: n_classes = prediction.shape[1].value weight_map_nclasses = tf.reshape( tf.tile(weight_map, [n_classes]), prediction.get_shape()) dice_numerator = 2.0 * tf.sparse_reduce_sum( weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(weight_map_nclasses * tf.square(prediction), reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot * weight_map_nclasses, reduction_axes=[0]) else: dice_numerator = 2.0 * tf.sparse_reduce_sum( one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(tf.square(prediction), reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) epsilon_denominator = 0.00001 dice_score = dice_numerator / (dice_denominator + epsilon_denominator) # dice_score.set_shape([n_classes]) # minimising (1 - dice_coefficients) return 1.0 - tf.reduce_mean(dice_score)
def dice_nosquare(prediction, ground_truth, weight_map=None): """ Function to calculate the classical dice loss :param prediction: the logits :param ground_truth: the segmentation ground_truth :param weight_map: :return: the loss """ ground_truth = tf.to_int64(ground_truth) n_voxels = ground_truth.shape[0].value n_classes = prediction.shape[1].value # construct sparse matrix for ground_truth to save space ids = tf.constant(np.arange(n_voxels), dtype=tf.int64) ids = tf.stack([ids, ground_truth], axis=1) one_hot = tf.SparseTensor(indices=ids, values=tf.ones([n_voxels], dtype=tf.float32), dense_shape=[n_voxels, n_classes]) # dice if weight_map is not None: weight_map_nclasses = tf.reshape( tf.tile(weight_map, [n_classes]), prediction.get_shape()) dice_numerator = 2.0 * tf.sparse_reduce_sum( weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(prediction * weight_map_nclasses, reduction_indices=[0]) + \ tf.sparse_reduce_sum(weight_map_nclasses * one_hot, reduction_axes=[0]) else: dice_numerator = 2.0 * tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) dice_denominator = tf.reduce_sum(prediction, reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) epsilon_denominator = 0.00001 dice_score = dice_numerator / (dice_denominator + epsilon_denominator) # dice_score.set_shape([n_classes]) # minimising (1 - dice_coefficients) return 1.0 - tf.reduce_mean(dice_score)
def _count_docs_with_term(term_frequency): """Computes the number of documents in a batch that contain each term. Args: term_frequency: The `SparseTensor` output of _to_term_frequency. Returns: A `Tensor` of shape (vocab_size,) that contains the number of documents in the batch that contain each term. """ count_of_doc_inter = tf.SparseTensor( indices=term_frequency.indices, values=tf.ones_like(term_frequency.values), dense_shape=term_frequency.dense_shape) out = tf.sparse_reduce_sum(count_of_doc_inter, axis=0) return tf.expand_dims(out, 0)
def _to_term_frequency(x, vocab_size): """Creates a SparseTensor of term frequency for every doc/term pair. Args: x : a SparseTensor of int64 representing string indices in vocab. vocab_size: An int - the count of vocab used to turn the string into int64s including any OOV buckets. Returns: a SparseTensor with the count of times a term appears in a document at indices <doc_index_in_batch>, <term_index_in_vocab>, with size (num_docs_in_batch, vocab_size). """ # Construct intermediary sparse tensor with indices # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values. split_indices = tf.to_int64( tf.split(x.indices, axis=1, num_or_size_splits=2)) expanded_values = tf.to_int64(tf.expand_dims(x.values, 1)) next_index = tf.concat( [split_indices[0], split_indices[1], expanded_values], axis=1) next_values = tf.ones_like(x.values) vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64) next_shape = tf.concat([x.dense_shape, vocab_size_as_tensor], 0) next_tensor = tf.SparseTensor(indices=tf.to_int64(next_index), values=next_values, dense_shape=next_shape) # Take the intermediary tensor and reduce over the term_index_in_doc # dimension. This produces a tensor with indices [<doc_id>, <term_id>] # and values [count_of_term_in_doc] and shape batch x vocab_size term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1) dense_doc_sizes = tf.to_double( tf.sparse_reduce_sum( tf.SparseTensor(indices=x.indices, values=tf.ones_like(x.values), dense_shape=x.dense_shape), 1)) gather_indices = term_count_per_doc.indices[:, 0] gathered_doc_sizes = tf.gather(dense_doc_sizes, gather_indices) term_frequency = (tf.to_double(term_count_per_doc.values) / tf.to_double(gathered_doc_sizes)) return tf.SparseTensor(indices=term_count_per_doc.indices, values=term_frequency, dense_shape=term_count_per_doc.dense_shape)
def conv(features, adj, weights): degree = tf.sparse_reduce_sum(adj, axis=1) + 1 degree = tf.cast(degree, tf.float32) degree = tf.pow(degree, -0.5) adj = sparse_tensor_diag_matmul(adj, degree, transpose=True) adj = sparse_tensor_diag_matmul(adj, degree, transpose=False) output = tf.sparse_tensor_dense_matmul(adj, features) features = tf.transpose(features) features = tf.multiply(tf.multiply(degree, features), degree) features = tf.transpose(features) output = output + features return tf.matmul(output, weights)
def swtich_loss(prediction, ground_truth, for_brats, weight_map=None): if not for_brats: prediction = tf.cast(prediction, tf.float32) if len(ground_truth.shape) == len(prediction.shape): ground_truth = ground_truth[..., -1] one_hot = labels_to_one_hot(ground_truth, tf.shape(prediction)[-1]) dice_numerator = 2.0 * tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(tf.square(prediction), reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) epsilon_denominator = 0.00001 dice_score = dice_numerator / (dice_denominator + epsilon_denominator) dice_score.set_shape([7]) # dice_score.set_shape([n_classes]) # minimising (1 - dice_coefficients) return 1.0 - tf.reduce_mean(dice_score[:4]) else: prediction = tf.cast(prediction, tf.float32) if len(ground_truth.shape) == len(prediction.shape): ground_truth = ground_truth[..., -1] one_hot = labels_to_one_hot(ground_truth, tf.shape(prediction)[-1]) #prediction = tf.concat([tf.reduce_sum(prediction[:,:4],axis=1,keep_dims=True),prediction[:,1:]],axis=-1) #one_hot = tf.concat([tf.expand_dims(one_hot[:,0],axis=-1),one_hot[:,-3:]],axis=-1) if weight_map is not None: n_classes = prediction.shape[1].value weight_map_nclasses = tf.reshape( tf.tile(weight_map, [n_classes]), prediction.get_shape()) dice_numerator = 2.0 * tf.sparse_reduce_sum( weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(weight_map_nclasses * tf.square(prediction), reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot * weight_map_nclasses, reduction_axes=[0]) else: dice_numerator = 2.0 * tf.sparse_reduce_sum( one_hot * prediction, reduction_axes=[0]) dice_denominator = \ tf.reduce_sum(tf.square(prediction), reduction_indices=[0]) + \ tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) epsilon_denominator = 0.00001 dice_score = dice_numerator / (dice_denominator + epsilon_denominator) # minimising (1 - dice_coefficients) #dice_score[:4] = 0 #dice_score = tf.concat([dice_score[-3:],dice_score[:1]],axis=-1) print(dice_score) return 1.0 - tf.reduce_mean(dice_score)
def parse(self, tensors, dtype): atoms = tensors['atoms'] sparse = 0.0 for key, val in self.dress.items(): sparse += tf.cast(tf.equal(atoms.sparse, key), dtype) * val sparse = tf.SparseTensor(atoms.indices, sparse, atoms.mask.shape) energy = tf.sparse_reduce_sum(sparse, [-1]) if 'e_data' in tensors: # We are in training tensors['e_data'] -= energy tensors['e_data'] *= 627.509 tensors['energy'] = tf.constant(0.0)
def encoder_attr(self, embs_cate, w_list, b_list): embs_cate = tf.nn.l2_normalize(embs_cate, axis=1) embs_cnxt = tf.nn.l2_normalize(self.qInfer_network_sparse( w_list, b_list), axis=1) p_assign = tf.matmul(embs_cnxt, embs_cate, transpose_b=True) / self.tau if not self.gumbel: cates = tf.nn.softmax(p_assign, axis=1) else: cates_dist = RelaxedOneHotCategorical(1, p_assign) cates_sample = cates_dist.sample() cates_mode = tf.nn.softmax(p_assign, axis=1) cates = (self.is_training_ph * cates_sample + (1 - self.is_training_ph) * cates_mode) # VAE based encoding z_list = [] zItem_list, cateItem_list = [], [] kl = None x_input2attr = tf.sparse_tensor_dense_matmul(self.kg_mat, self.input_ph, adjoint_a=True, adjoint_b=True) x_input2attr = tf.transpose(x_input2attr) for k in range(self.K): cates_k = tf.reshape(cates[:, k], (1, -1)) # q-network for user aspects x_k = x_input2attr * cates_k mu_k, std_k, kl_k = self.q_network(x_k, w_list, b_list) eps = tf.random_normal(tf.shape(std_k), dtype=tf.float64) z_k = mu_k + self.is_training_ph * eps * std_k z_list.append(z_k) kl = (kl_k if (kl is None) else (kl + kl_k)) # q-network for item aspects x_k = self.kg_mat.__mul__(cates_k) mu_k, std_k, kl_k = self.q_network_sparse(x_k, w_list, b_list) eps = tf.random_normal(tf.shape(std_k), dtype=tf.float64) z_k = mu_k + self.is_training_ph * eps * std_k zItem_list.append(z_k) cates_sum_k = tf.sparse_reduce_sum(x_k, axis=1) cates_sum_k = tf.reshape(cates_sum_k, (1, -1)) cateItem_list.append(cates_sum_k / tf.reduce_sum(cates_sum_k)) return z_list, zItem_list, cateItem_list, kl
def make_bag_of_words(self, active_words, doc_tokens, batch_size, num_samples, doc_len, vocab_size): """Turn sequences of words with extraction labels into a bag of words. Args: active_words: batch of binary masks indicating picked words. doc_tokens: batch of token sequences for each document. batch_size: number of sequences in batch. num_samples: number of samples of masks for each sequence. doc_len: length of sequences. vocab_size: size of vocabulary. Returns: dense_pred_counts: batch of bags of words. """ # can't make a sparse count tensor directly since doesn't support repeated # words -- make a new sparse tensor with extra indices to handle repeats with tf.variable_scope("bag_of_words"): batch_idx = shared_util.repeat_row(num_samples * doc_len, tf.range(0, batch_size, 1)) sample_idx = tf.tile( shared_util.repeat_row(doc_len, tf.range(0, num_samples, 1)), [batch_size]) tok_idx = tf.tile(tf.range(0, doc_len, 1), [batch_size * num_samples]) vocab_idx = tf.tile( tf.reshape(doc_tokens, [batch_size, 1, doc_len]), [1, num_samples, 1]) active_tok_indices = tf.concat(1, [ tf.reshape(batch_idx, [-1, 1]), tf.reshape(sample_idx, [-1, 1]), tf.reshape(tok_idx, [-1, 1]), tf.reshape(vocab_idx, [-1, 1]) ]) active_toks_sparse = tf.SparseTensor( indices=tf.to_int64(active_tok_indices), values=tf.reshape(active_words, [-1]), shape=tf.to_int64( tf.pack([batch_size, num_samples, doc_len, vocab_size]))) dense_pred_counts = tf.sparse_reduce_sum(active_toks_sparse, reduction_axes=2) return dense_pred_counts
def vector_to_adjacency_sym_sparse(inputs): G, e, dense_shape = inputs e = add_epsilon(e) A = tf.SparseTensor(indices=G.indices, values=e, dense_shape=dense_shape) D = tf.pow(tf.sparse_reduce_sum(A, 1), -0.5) #row wise normalization Drow = tf.gather(D, G.indices[:, 0]) #column wise normalization, currently disabled #Dcol = tf.gather(D, G.indices[:, 1]) #multiply values by D # e_ = tf.multiply(tf.multiply(Dcol, e), Drow) e_ = tf.multiply(e, Drow) A_ = tf.SparseTensor(indices=G.indices, values=e_, dense_shape=dense_shape) return A_
def body(x, previous): # batch [max_rule_len, batch_size, num_relations] # ==> a_vector : [batch_size, num_relations, 1, 1] a_vector = tf.nn.embedding_lookup(self.a_matrix, x) weighted_operator_tensor = tensor_mul_batch_vector(a_vector, self.operator_tensor) # a_vector_exp = tf.expand_dims(tf.expand_dims(a_vector, 2), 2) # [batch_size, num_relations, num_entities, num_entities] # weighted_operator_tensor = tf.multiply(a_vector_exp, # tf.expand_dims(self.operator_tensor, 0)) # score with previous input u = tf.sparse_reduce_sum(weighted_operator_tensor.__mul__( tf.expand_dims(tf.expand_dims(previous, 1), 1)), 3) # sum up relation weighted vectors u_sum = tf.reduce_sum(u, axis=1) # [batch_size,num_relations,num_entities,num_entities] * [batch_size,1,num_entities] return [x+1, u_sum]
def ElectrostaticDampedShiftedLinear(Ds, Qs, NZP, alpha, Rc): """ A tensorflow linear scaling implementation of the Damped Shifted Electrostatic Force http://aip.scitation.org.proxy.library.nd.edu/doi/pdf/10.1063/1.2206581 Batched over molecules. Args: Ds: Distances Enumerated by NZP (flat) Qs: A batch of Atomic Charges. (nmol X maxatom) NZP: a list of nonzero atom pairs NNZ X (mol, i, j). alpha: DSF alpha parameter (~0.2) Rc: DSF Rc parameter. (15A) Returns A #Mols X MaxNAtoms X MaxNAtoms matrix of LJ kernel contributions. """ twooversqrtpi = tf.constant(1.1283791671, dtype=tf.float64) NZP_shape = tf.shape(NZP) Zs_shp = tf.shape(Zs) maxnpairs = NZP_shape[0] nmols = Zs_shp[0] Ii = tf.slice(NZP, [0, 0], [-1, 2]) Ij = tf.concat( [tf.slice(NZP, [0, 0], [-1, 1]), tf.slice(NZP, [0, 2], [-1, 1])], 1) Qi = tf.reshape(tf.gather_nd(Qs, Ii), [maxnpairs]) Qj = tf.reshape(tf.gather_nd(Qs, Ij), [maxnpairs]) # Gather desired LJ parameters. Qij = Qi * Qj # This is Dan's Equation (18) XX = alpha * Rc ZZ = tf.erfc(XX) / Rc YY = twooversqrtpi * alpha * tf.exp(-XX * XX) / Rc K = Qij * (tf.erfc(alpha * Ds) / Ds - ZZ + (Ds - Rc) * (ZZ / Rc + YY)) K = tf.where(tf.is_nan(K), tf.zeros_like(K), K) range_index = tf.reshape( tf.range(tf.cast(maxnpairs, tf.int64), dtype=tf.int64), [maxnpairs, 1]) mol_index = tf.reshape(tf.slice(NZP, [0, 0], [-1, 1]), [maxnpairs, 1]) inds = tf.reshape(tf.stack([mol_index, range_index], axis=1), [maxnpairs, 2]) # Now use the sparse reduce sum trick to scatter this into mols. sp_atomoutputs = tf.SparseTensor( inds, tf.reshape(K, [maxnpairs]), dense_shape=[tf.cast(nmols, tf.int64), tf.cast(maxnpairs, tf.int64)]) return tf.sparse_reduce_sum(sp_atomoutputs, axis=1)
def _call(self, inputs): x = inputs # dropout if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) # sum x if self.sparse_inputs: sx = tf.sparse_reduce_sum(x, axis=1, keep_dims=True) else: sx = tf.reduce_sum(x, axis=1, keep_dims=True) # convolve supports = list() for i in range(len(self.support)): if not self.featureless: pre_sup = dot(x, self.vars['weights_' + str(i)], sparse=self.sparse_inputs) pre_sup_b = dot(x, self.vars['weights_b_' + str(i)], sparse=self.sparse_inputs) else: pre_sup = self.vars['weights_' + str(i)] pre_sup_b = self.vars['weights_b_' + str(i)] # support = dot(self.support[i], pre_sup, sparse=True) # supports.append(support) # calculate second order interaction: XW * sum(X) # pre_sup_sec_ord = tf.multiply(pre_sup, pre_sup_b) * self.alpha pre_sup_sec_ord = tf.multiply(pre_sup, pre_sup_b) * self.vars['alp'] # pre_sup_all = tf.concat([pre_sup, pre_sup_sec_ord], axis=1) pre_sup_all = tf.add_n([pre_sup, pre_sup_b, pre_sup_sec_ord]) support = dot(self.support[i], pre_sup_all, sparse=True) supports.append(support) output = tf.add_n(supports) # bias if self.bias: output += self.vars['bias'] return self.act(output)
def soft_ncut(image, image_segment, image_weights): """ Args: image: [B, H, W, C] image_segment: [B, H, W, K] image_weights: [B, H*W, H*W] Returns: Soft_Ncut: scalar """ batch_size = tf.shape(image)[0] num_class = tf.shape(image_segment)[-1] image_shape = image.get_shape() weight_size = image_shape[1].value * image_shape[2].value image_segment = tf.transpose(image_segment, [0, 3, 1, 2]) # [B, K, H, W] image_segment = tf.reshape(image_segment, tf.stack([batch_size, num_class, weight_size])) # [B, K, H*W] # Dis-association # [B0, H*W, H*W] @ [B1, K1, H*W] contract on [[2],[2]] = [B0, H*W, B1, K1] W_Ak = sparse_tensor_dense_tensordot(image_weights, image_segment, axes=[[2],[2]]) W_Ak = tf.transpose(W_Ak, [0,2,3,1]) # [B0, B1, K1, H*W] W_Ak = sycronize_axes(W_Ak, [0,1], tensor_dims=4) # [B0=B1, K1, H*W] # [B1, K1, H*W] @ [B2, K2, H*W] contract on [[2],[2]] = [B1, K1, B2, K2] dis_assoc = tf.tensordot(W_Ak, image_segment, axes=[[2],[2]]) dis_assoc = sycronize_axes(dis_assoc, [0,2], tensor_dims=4) # [B1=B2, K1, K2] dis_assoc = sycronize_axes(dis_assoc, [1,2], tensor_dims=3) # [K1=K2, B1=B2] dis_assoc = tf.transpose(dis_assoc, [1,0]) # [B1=B2, K1=K2] dis_assoc = tf.identity(dis_assoc, name="dis_assoc") # Association # image_segment: [B, K, H*W] sum_W = tf.sparse_reduce_sum(image_weights,axis=2) # [B, W*H] assoc = tf.tensordot(image_segment, sum_W, axes=[2,1]) # [B, K, B] assoc = sycronize_axes(assoc, [0,2], tensor_dims=3) # [B0=B1, K0] assoc = tf.identity(assoc, name="assoc") utils.add_activation_summary(dis_assoc) utils.add_activation_summary(assoc) # Soft NCut eps = 1e-6 soft_ncut = tf.cast(num_class, tf.float32) - \ tf.reduce_sum((dis_assoc + eps) / (assoc + eps), axis=1) return soft_ncut
def _sparse_ww(config, Rs, predictions_per_sample, zijs, bias): # Zj has shape (batch_size, 1, output_width) dense tensor zj = tf.sparse_reduce_sum(zijs, 1, keep_dims=True) # Stabilizer zj_sign = tf.sign(zj) zj_sign = tf.where(tf.equal(zj, 0), tf.ones_like(zj_sign), zj_sign) zj += zj_sign * EPSILON # construct bias to add to zj fractions = zijs / zj # Distribute the relevance according to the fractions R_new = _sparse_distribute_relevances(Rs, zijs.dense_shape[0], zijs.dense_shape[1], predictions_per_sample, fractions) return R_new
def forward(self, save_emb=False): denom = tf.reduce_sum(self.input_ph, axis=1, keep_dims=True) self.input_ph = self.input_ph.__div__(denom) denom = tf.sparse_reduce_sum(self.kg_mat, axis=1, keep_dims=True) self.kg_mat = self.kg_mat.__div__(denom) zAct_list, cAct_list, klAct = self.encoder_item( self.cate_item, self.w_qItem, self.b_qItem) zAsp_list, zItem_list, cateItem_list, klAsp = self.encoder_attr( self.cate_attr, self.w_qAttr, self.b_qAttr) logits, loss_recon = self.decoder(zAct_list, cAct_list, self.embs_item_self, self.embs_item_enti, zAsp_list, zItem_list, cateItem_list) return tf.train.Saver(), logits, loss_recon, klAct
def _tfidf(x): split = tf.string_split(x) table = lookup.string_to_index_table_from_tensor( vocab, num_oov_buckets=0, default_value=len(vocab)) int_text = table.lookup(split) term_count_per_doc = get_term_count_per_doc(int_text, len(vocab) + 1) # Add one to the reduced term freqnencies to avoid dividing by zero. example_count_with_oov = tf.to_float(tf.concat([example_count, [0]], 0)) idf = tf.log(tf.to_float(corpus_size) / (1.0 + example_count_with_oov)) dense_doc_sizes = tf.to_float(tf.sparse_reduce_sum(tf.SparseTensor( indices=int_text.indices, values=tf.ones_like(int_text.values), dense_shape=int_text.dense_shape), 1)) idf_times_term_count = tf.multiply( tf.gather(idf, term_count_per_doc.indices[:, 1]), tf.to_float(term_count_per_doc.values)) tfidf_weights = ( idf_times_term_count / tf.gather(dense_doc_sizes, term_count_per_doc.indices[:, 0])) tfidf_ids = term_count_per_doc.indices[:, 1] indices = tf.stack([term_count_per_doc.indices[:, 0], segment_indices(term_count_per_doc.indices[:, 0], int_text.dense_shape[0])], 1) dense_shape = term_count_per_doc.dense_shape tfidf_st_weights = tf.SparseTensor(indices=indices, values=tfidf_weights, dense_shape=dense_shape) tfidf_st_ids = tf.SparseTensor(indices=indices, values=tfidf_ids, dense_shape=dense_shape) if part == 'ids': return tfidf_st_ids else: return tfidf_st_weights
def calculate_final_dist(self, vocab_dist, attn_dist, pointer_gen, passage_words, passage_mask=None): ''' vocab_dist: [batch_size, vsize] attn_dist: [batch_size, passage_length] pointer_gen: [batch_size, 1] passage_words: [batch_size, passage_length] passage_mask: [batch_size, passage_length] ''' input_shape = tf.shape(vocab_dist) batch_size = input_shape[0] vsize = input_shape[1] passage_length = tf.shape(passage_words)[1] with tf.variable_scope('final_distribution'): vocab_dist = pointer_gen * vocab_dist attn_dist = (1.0 - pointer_gen) * attn_dist # match attn_dist[batch_size, passage_length] to sparse one-hot representation [batch_size, passage_length, vsize] batch_nums = tf.range(0, limit=batch_size) # shape (batch_size) batch_nums = tf.expand_dims(batch_nums, axis=1) # shape (batch_size, 1) batch_nums = tf.tile(batch_nums, [1, passage_length]) # shape (batch_size, passage_length) step_nums = tf.range(0, limit=passage_length) # [passage_length] step_nums = tf.expand_dims(step_nums, axis=0) # shape (1, passage_length) step_nums = tf.tile(step_nums, [batch_size, 1]) # shape (batch_size, passage_length) indices = tf.stack((batch_nums, step_nums, passage_words), axis=2) # shape (batch_size, passage_length, 3) indices = tf.reshape(indices, [-1, 3]) #[batch_size * passage_length, 3] indices = tf.cast(indices, tf.int64) shape = [batch_size, passage_length, vsize] shape = tf.cast(shape, tf.int64) attn_dist = tf.reshape(attn_dist, shape=[-1]) # [batch_size*passage_length] one_hot_spare_representation = tf.SparseTensor(indices=indices, values=attn_dist, dense_shape=shape) # [batch_size, passage_length, vsize] if passage_mask is not None: passage_mask = tf.expand_dims(passage_mask, axis=-1) one_hot_spare_representation = one_hot_spare_representation * passage_mask one_hot_spare_representation = tf.sparse_reduce_sum(one_hot_spare_representation, axis=1) # [batch_size, vsize] vocab_dist = tf.add(vocab_dist, one_hot_spare_representation) return vocab_dist # [batch_size, vsize]
def merge_prob_dist_for_one_step(self, vocab_dist, attn_dist, p_gen, node_idxs, node_mask=None): ''' vocab_dist: [batch_size, vsize] attn_dist: [batch_size, passage_length] p_gen: [batch_size, 1] node_idxs: [batch_size, passage_length] node_mask: [batch_size, passage_length] ''' input_shape = tf.shape(vocab_dist) batch_size = input_shape[0] vsize = input_shape[1] passage_length = tf.shape(node_idxs)[1] with tf.variable_scope('final_distribution'): vocab_dist = p_gen * vocab_dist attn_dist = (1.0-p_gen) * attn_dist # match attn_dist[batch_size, passage_length] to sparse one-hot representation [batch_size, passage_length, vsize] batch_nums = tf.range(0, limit=batch_size) # shape (batch_size) batch_nums = tf.expand_dims(batch_nums, axis=1) # shape (batch_size, 1) batch_nums = tf.tile(batch_nums, [1, passage_length]) # shape (batch_size, passage_length) step_nums = tf.range(0, limit=passage_length) # [passage_length] step_nums = tf.expand_dims(step_nums, axis=0) # shape (1, passage_length) step_nums = tf.tile(step_nums, [batch_size, 1]) # shape (batch_size, passage_length) indices = tf.stack((batch_nums, step_nums, node_idxs), axis=2) # shape (batch_size, passage_length, 3) indices = tf.reshape(indices, [-1, 3]) #[batch_size * passage_length, 3] indices = tf.cast(indices, tf.int64) shape = [batch_size, passage_length, vsize] shape = tf.cast(shape, tf.int64) attn_dist = tf.reshape(attn_dist, shape=[-1]) # [batch_size*passage_length] one_hot_spare_rep = tf.SparseTensor(indices=indices, values=attn_dist, dense_shape=shape) # [batch_size, passage_length, vsize] if node_mask is not None: node_mask = tf.expand_dims(node_mask, axis=-1) one_hot_spare_rep = one_hot_spare_rep * node_mask # 前面所做的一切都是将attention分布转换到dict范围内,方便与P_vocab相加 one_hot_spare_rep = tf.sparse_reduce_sum(one_hot_spare_rep, axis=1) # [batch_size, vsize] vocab_dist = tf.add(vocab_dist, one_hot_spare_rep) return vocab_dist # [batch_size, vsize]
def text_to_label(self, batch_text, return_dense=True, pad_value=-1, return_lengths=False): """ 给定字符型文本转化为整型标签序列,只适用于英文句子 Args: text: ascii encoded string tensor with shape [batch_size] return_dense: whether to return dense labels pad_value: Value used to pad labels to the same length. return_lengths: if True, also return text lengths Returns: labels: sparse or dense tensor of labels """ """ # 英文句子按空格分词,例如source = ["hello world", "a b c"], delimiter='', # 返回tf.SparseTensor对象, st.indices = [0, 0; 0, 1; 1, 0; 1, 1; 1, 2], # st.shape = [2, 3] st.values = ['hello', 'world', 'a', 'b', 'c'] # The first column of the indices corresponds to the row in source and the second column # corresponds to the index of the split component in this row. """ chars = tf.string_split(batch_text, delimiter='') labels_sp = tf.SparseTensor( chars.indices, self._char_to_label_table.lookup(chars.values), chars.dense_shape) if return_dense: labels = tf.sparse_tensor_to_dense(labels_sp, default_value=pad_value) else: labels = labels_sp if return_lengths: text_lengths = tf.sparse_reduce_sum(tf.SparseTensor( chars.indices, tf.fill([tf.shape(chars.indices)[0]], 1), chars.dense_shape), axis=1) text_lengths.set_shape([None]) return labels, text_lengths else: return labels
def count_nonzero_wrapper(X, optype): """Wrapper for handling sparse and dense versions of `tf.count_nonzero`. Parameters ---------- X : tf.Tensor (N, K) optype : str, {'dense', 'sparse'} Returns ------- tf.Tensor (1,K) """ with tf.name_scope('count_nonzero_wrapper') as scope: if optype == 'dense': return tf.count_nonzero(X, axis=0, keep_dims=True) elif optype == 'sparse': indicator_X = tf.SparseTensor(X.indices, tf.ones_like(X.values), X.dense_shape) return tf.sparse_reduce_sum(indicator_X, axis=0, keep_dims=True) else: raise NameError('Unknown input type in count_nonzero_wrapper')
def LJKernelsLinear(Ds, Zs, Ee, Re, NZP): """ Batched over molecules. Args: Ds: Distances Enumerated by NZP (flat) Zs: A batch of Atomic Numbers. (nmol X maxatom X 1) Ee: a matrix of LJ well depths. Re: a matrix of Bond minima. NZP: a list of nonzero atom pairs NNZ X (mol, i, j). Returns A #Mols vector of LJ energies. """ NZP_shape = tf.shape(NZP) Zs_shp = tf.shape(Zs) maxnpairs = NZP_shape[0] nmols = Zs_shp[0] Ii = tf.slice(NZP, [0, 0], [-1, 2]) Ij = tf.concat( [tf.slice(NZP, [0, 0], [-1, 1]), tf.slice(NZP, [0, 2], [-1, 1])], 1) Zi = tf.reshape(tf.gather_nd(Zs, Ii), [maxnpairs]) Zj = tf.reshape(tf.gather_nd(Zs, Ij), [maxnpairs]) # Gather desired LJ parameters. Zij = tf.stack([Zi, Zj], axis=1) Eeij = tf.reshape(tf.gather_nd(Ee, Zij), [maxnpairs]) Reij = tf.reshape(tf.gather_nd(Re, Zij), [maxnpairs]) R = Reij / tf.reshape(Ds, [maxnpairs]) K = Eeij * (tf.pow(R, 12.0) - 2.0 * tf.pow(R, 6.0)) K = tf.where(tf.is_nan(K), tf.zeros_like(K), K) range_index = tf.reshape( tf.range(tf.cast(maxnpairs, tf.int64), dtype=tf.int64), [maxnpairs, 1]) mol_index = tf.reshape(tf.slice(NZP, [0, 0], [-1, 1]), [maxnpairs, 1]) inds = tf.reshape(tf.stack([mol_index, range_index], axis=1), [maxnpairs, 2]) # Now use the sparse reduce sum trick to scatter this into mols. sp_atomoutputs = tf.SparseTensor( inds, tf.reshape(K, [maxnpairs]), dense_shape=[tf.cast(nmols, tf.int64), tf.cast(maxnpairs, tf.int64)]) return tf.sparse_reduce_sum(sp_atomoutputs, axis=1)
def text_to_labels(self, text, return_dense=True, pad_value=-1, return_lengths=False): """Convert text strings to label sequences. Args: text: ascii encoded string tensor with shape [batch_size] dense: whether to return dense labels pad_value: Value used to pad labels to the same length. return_lengths: if True, also return text lengths Returns: labels: sparse or dense tensor of labels """ batch_size = tf.shape(text)[0] chars = tf.string_split(text, delimiter='') labels_sp = tf.SparseTensor( chars.indices, self._char_to_label_table.lookup(chars.values), chars.dense_shape ) if return_dense: labels = tf.sparse_tensor_to_dense(labels_sp, default_value=pad_value) else: labels = labels_sp if return_lengths: text_lengths = tf.sparse_reduce_sum( tf.SparseTensor( chars.indices, tf.fill([tf.shape(chars.indices)[0]], 1), chars.dense_shape ), axis=1 ) text_lengths.set_shape([None]) return labels, text_lengths else: return labels
def test_t4(): print("----------------------") sp1 = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]], values=[12, -1, 0], dense_shape=(3, 3)) # #sp1.values = sp1.values+1 sp1_dense = tf.sparse_tensor_to_dense(sp1, default_value=-1) sess = tf.Session() print("sp1:", sess.run(sp1)) sp1_sign = tf.sign(sp1) sp2 = tf.SparseTensor(indices=sp1.indices, values=sp1.values + 2, dense_shape=sp1.dense_shape) # #sp1_sign = tf.sign(tf.sparse_add(sp1))) #print("sp1_sign:",sess.run(sp1_sign)) print("sp2:", sess.run(sp2)) sp2_sign = tf.sign(sp2) print("sp2_sign:", sess.run(sp2_sign)) print("sp2_sign_sum:", sess.run(tf.sparse_reduce_sum(sp2_sign, axis=1))) print("sp2_sign_sum_fun:", sess.run(count_sparse_nonzero(sp1, axis=1, add_default=2)))
def _post_setup(self): self.E_predict = _tf.reduce_sum([ _tf.sparse_tensor_dense_matmul(self.atom_maps[t], self.ANNs[t].output) for t in self.atom_types ], axis=[0, 2], name="E_prediction") # Tensorflow operation to initialize the variables of the atomic networks #self.init_vars = [a.init_vars for a in self.ANNs.itervalues()] self.num_atoms = _tf.reduce_sum([ _tf.sparse_reduce_sum(self.atom_maps[t], axis=1) for t in self.atom_types ], axis=0, name="NumberOfAtoms") # Tensorflow operation that calculates the sum squared error per atom. # Note that the whole error per atom is squared. with _tf.name_scope("RMSE"): self.rmse_weights = _tf.placeholder(shape=(None, ), dtype=precision, name="weights") self.rmse = self.error_scaling * _tf.sqrt( _tf.reduce_mean( (self.target - self.E_predict)**2 * self.rmse_weights)) #self.rmse = self.error_scaling*_tf.sqrt( # _tf.losses.mean_squared_error(self.target, # self.E_predict, weights = 1.0/self.num_atoms**2)) self.rmse_summ = _tf.summary.scalar("RMSE", self.rmse, family="performance") self.variables = _tf.get_collection( _tf.GraphKeys.MODEL_VARIABLES, scope=_tf.get_default_graph().get_name_scope()) self.saver = _tf.train.Saver(self.variables, max_to_keep=None, save_relative_paths=True)
def weighted_margin_rank_batch(self, tf_prediction_serial, tf_interactions, tf_sample_predictions, tf_n_items, tf_n_sampled_items): positive_interaction_mask = tf.greater(tf_interactions.values, 0.0) positive_interaction_indices = tf.boolean_mask(tf_interactions.indices, positive_interaction_mask) positive_interaction_values = tf.boolean_mask(tf_interactions.values, positive_interaction_mask) positive_interactions = tf.SparseTensor(indices=positive_interaction_indices, values=positive_interaction_values, dense_shape=tf_interactions.dense_shape) listening_sum_per_item = tf.sparse_reduce_sum(positive_interactions, axis=0) gathered_sums = tf.gather(params=listening_sum_per_item, indices=tf.transpose(positive_interaction_indices)[1]) # [ n_positive_interactions ] positive_predictions = tf.boolean_mask(tf_prediction_serial, positive_interaction_mask) n_items = tf.cast(tf_n_items, dtype=tf.float32) n_sampled_items = tf.cast(tf_n_sampled_items, dtype=tf.float32) # [ n_positive_interactions, n_sampled_items ] mapped_predictions_sample_per_interaction = tf.gather(params=tf_sample_predictions, indices=tf.transpose(positive_interaction_indices)[0]) # [ n_positive_interactions, n_sampled_items ] summation_term = tf.maximum(1.0 - tf.expand_dims(positive_predictions, axis=1) + mapped_predictions_sample_per_interaction, 0.0) # [ n_positive_interactions ] sampled_margin_rank = ((n_items / n_sampled_items) * tf.reduce_sum(summation_term, axis=1) * positive_interaction_values / gathered_sums) loss = tf.log(sampled_margin_rank + 1.0) return loss
def parse(self, tensors, dtype): nodes = tensors['nodes'][self.order] sparse = nodes.sparse for i, n_out in enumerate(self.n_nodes): sparse = tf.layers.dense(sparse, n_out, activation=self.act, name='{}-{}'.format(self.name, i)) sparse = tf.layers.dense(sparse, 1, use_bias=False, activation=None, name='{}-en'.format(self.name)) sparse = tf.squeeze(sparse, -1) sparse = tf.SparseTensor(nodes.indices, sparse, nodes.mask.shape) sparse = tf.sparse_reduce_sum(sparse, [-i - 1 for i in range(self.order + 1)]) tensors['energy'] += sparse
def _encode_proto(values_dict, message_type): """A wrapper around encode_proto_op.encode_proto.""" field_names = [] sizes = [] values = [] for field_name, value in sorted(values_dict.items(), key=lambda x: x[0]): if isinstance(value, tf.SparseTensor): size = tf.sparse_reduce_sum(tf.SparseTensor( value.indices, tf.ones_like(value.values, dtype=tf.int32), value.dense_shape), axis=1) value = tf.sparse_tensor_to_dense( value, _DEFAULT_VALUE_BY_DTYPE[value.dtype]) else: value = tf.reshape(value, [tf.shape(value)[0], -1]) size = tf.fill((tf.shape(value)[0], ), tf.shape(value)[1]) field_names.append(field_name) values.append(value) sizes.append(size) sizes = tf.stack(sizes, axis=1) return encode_proto_op.encode_proto(sizes, values, field_names, message_type)
def attention3(self, inputs, support, la, num1, num2): # input: N * E attention_size = 64 s_m = tf.sparse_tensor_dense_matmul(support, inputs) # M*E support:[M*N] sm_num = tf.sparse_reduce_sum(support, axis=1) # M*1 sm_mean = tf.divide(1, sm_num) s_m = tf.multiply(sm_mean, tf.transpose(s_m)) # M * e*M s_m = tf.transpose(s_m) # M*E # TODO: W * sm inputs_t = tf.transpose(inputs) # M*N w_sm = tf.tensordot(s_m, self.vars['w_omega_%s' % (la)], axes=1) # [M*E] * [E*A] = [M*A] # TODO: modifiy support_3d = tf.sparse_reshape(support, [num2, 1, num1]) # M*N support_3d = tf.sparse_to_dense(support_3d.indices, [num2, 1, num1], support_3d.values, validate_indices=False, default_value=0.0) s_j_all = tf.transpose(tf.multiply(support_3d, inputs_t), [0,2,1]) # M*1*N * [E*N] = [M*E*N] w_sj_all = tf.tensordot(s_j_all, self.vars['w_omega_%s' % (la)], axes=1) # [M*N*E] * [E*A] # M*A - M*N*A = M*N*A M*N w_sm = tf.reshape(w_sm, [num2, 1, attention_size]) # M*1*A euclidean_set = tf.sqrt(tf.reduce_sum(tf.square(w_sm - w_sj_all), 2)) # [M*1*A] * [M*N*A] eucl = tf.tanh(euclidean_set) # M*N print("euclidean computing success!") print(eucl.shape) vu = tf.multiply(self.vars['u_omega_%s' % (la)], eucl) # vu = tf.matmul(self.vars['u_omega_%s' % (la)], eucl) #vu = tf.tensordot(eucl,self.vars['u_omega_%s' % (la)],axes=1) print("vu computing success!") alphas = tf.nn.softmax(vu, name='alphas_%s' % (la)) # (B,T) shape print("alphas computing success!") return alphas
def q_network_sparse(self, x, w_list, b_list): # The q-network contains two layers, an embedding layer and a mapping layer mu_q, std_q, kl = None, None, None denom = tf.sparse_reduce_sum(tf.square(x), axis=1, keep_dims=True) repr = x.__div__(denom) for i, (w, b) in enumerate(zip(w_list, b_list)): if i != len(w_list) - 1: repr = tf.sparse_tensor_dense_matmul(repr, w, adjoint_a=False, adjoint_b=False) repr = tf.nn.tanh(repr) else: repr = tf.matmul(repr, w, a_is_sparse=(i == 0)) + b mu_q = repr[:, :self.dim] mu_q = tf.nn.l2_normalize(mu_q, axis=1) lnvarq_sub_lnvar0 = -repr[:, self.dim:] std_q = tf.exp(0.5 * lnvarq_sub_lnvar0) * self.std kl = tf.reduce_mean( tf.reduce_sum( 0.5 * (-lnvarq_sub_lnvar0 + tf.exp(lnvarq_sub_lnvar0) - 1.), axis=1)) return mu_q, std_q, kl
def _sparse_distribute_bias(config, zijs, bias): # Return if bias or the bias strategy is none or throw error if it is all # since all will kill the memory (remember we are in sparse land here ;) ) if bias is None or config.bias_strategy == BIAS_STRATEGY.IGNORE: return zijs if config.bias_strategy == BIAS_STRATEGY.NONE: return zijs elif config.bias_strategy == BIAS_STRATEGY.ALL: raise NotImplementedError( "BIAS_STRATEGY.ALL is not implemented for sparse matmul") # Dense Shape (batch_size, input_width, output_width) indicators = tf.SparseTensor( zijs.indices, tf.where(tf.equal(zijs.values, 0), tf.zeros_like(zijs.values), tf.ones_like(zijs.values)), zijs.dense_shape) # Count all the indicators in each column # Shape: (batch_size, 1, output_width) counts = tf.sparse_reduce_sum(indicators, axis=1, keep_dims=True) # Hack to avoid dividing by zero (doesn't matter for the final result. counts = tf.where(tf.equal(counts, 0), tf.ones_like(counts), counts) # Divide the bias by broadcasting it to every sample in the batch bias_divided = bias / counts # Scale the indicators by the bias # Dense shape: (batch_size, input_width, output_width) bias_to_add = indicators * bias_divided # Create new zijs with the divided bias zijs_new = tf.SparseTensor(zijs.indices, zijs.values + bias_to_add.values, zijs.dense_shape) return zijs_new
def _sparse_epsilon(config, Rs, predictions_per_sample, zijs, bias): # Zj has shape (batch_size, 1, output_width) dense tensor zj = tf.sparse_reduce_sum(zijs, 1, keep_dims=True) # Prepare sparse tensor with duplicated bias for addition with zj if bias is not None and config.bias_strategy != BIAS_STRATEGY.IGNORE: zj = zj + bias zj_sign = tf.sign(zj) zj_sign = tf.where(tf.equal(zj, 0), tf.ones_like(zj_sign), zj_sign) zj += zj_sign * config.epsilon # Distribute bias according to config zijs = _sparse_distribute_bias(config, zijs, bias) # construct bias to add to zj fractions = zijs / zj # Distribute the relevance according to the fractions R_new = _sparse_distribute_relevances(Rs, zijs.dense_shape[0], zijs.dense_shape[1], predictions_per_sample, fractions) return R_new
def sparse_message_pass(node_states, adjacency_matrices, num_edge_types, hidden_size, use_bias=True, average_aggregation=False, name="sparse_ggnn"): """One message-passing step for a GNN with a sparse adjacency matrix. Implements equation 2 (the message passing step) in [Li et al. 2015](https://arxiv.org/abs/1511.05493). N = The number of nodes in each batch. H = The size of the hidden states. T = The number of edge types. Args: node_states: Initial states of each node in the graph. Shape is [N, H]. adjacency_matrices: Adjacency matrix of directed edges for each edge type. Shape is [N, N, T] (sparse tensor). num_edge_types: The number of edge types. T. hidden_size: The size of the hidden state. H. use_bias: Whether to use bias in the hidden layer. average_aggregation: How to aggregate the incoming node messages. If average_aggregation is true, the messages are averaged. If it is false, they are summed. name: (optional) The scope within which tf variables should be created. Returns: The result of one step of Gated Graph Neural Network (GGNN) message passing. Shape: [N, H] """ n = tf.shape(node_states)[0] t = num_edge_types incoming_edges_per_type = tf.sparse_reduce_sum(adjacency_matrices, axis=1) # Convert the adjacency matrix into shape [T, N, N] - one [N, N] adjacency # matrix for each edge type. Since sparse tensor multiplication only supports # two-dimensional tensors, we actually convert the adjacency matrix into a # [T * N, N] tensor. adjacency_matrices = tf.sparse_transpose(adjacency_matrices, [2, 0, 1]) adjacency_matrices = tf.sparse_reshape(adjacency_matrices, [t * n, n]) # Multiply the adjacency matrix by the node states, producing a [T * N, H] # tensor. For each (edge type, node) pair, this tensor stores the sum of # the hidden states of the node's neighbors over incoming edges of that type. messages = tf.sparse_tensor_dense_matmul(adjacency_matrices, node_states) # Rearrange this tensor to have shape [N, T * H]. The incoming states of each # nodes neighbors are summed by edge type and then concatenated together into # a single T * H vector. messages = tf.reshape(messages, [t, n, hidden_size]) messages = tf.transpose(messages, [1, 0, 2]) messages = tf.reshape(messages, [n, t * hidden_size]) # Run each of those T * H vectors through a linear layer that produces # a vector of size H. This process is equivalent to running each H-sized # vector through a separate linear layer for each edge type and then adding # the results together. # # Note that, earlier on, we added together all of the states of neighbors # that were connected by edges of the same edge type. Since addition and # multiplying by a linear layer are commutative, this process was equivalent # to running each incoming edge through a linear layer separately and then # adding everything at the end. with tf.variable_scope(name, default_name="sparse_ggnn"): final_node_states = common_layers.dense( messages, hidden_size, use_bias=False) # Multiply the bias by for each edge type by the number of incoming nodes # of that edge type. if use_bias: bias = tf.get_variable("bias", initializer=tf.zeros([t, hidden_size])) final_node_states += tf.matmul(incoming_edges_per_type, bias) if average_aggregation: incoming_edges = tf.reduce_sum(incoming_edges_per_type, -1, keepdims=True) incoming_edges = tf.tile(incoming_edges, [1, hidden_size]) final_node_states /= incoming_edges + 1e-7 return final_node_states