def _prediction_loop(t, av, ai, Rs, offset): # current_R shape: (batch_size, out_size) current_R = Rs.read(t) # Prepare batch for current prediction_per_sample to be broadcasted over # the input dimension # current_R shape: (batch_size, 1, out_size) current_R = tf.expand_dims(current_R, 1) # Scale fractions with relevances for current prediction_per_sample distributed_relevances = fractions * current_R # Reduce sum the get the relevances for the individual in_dimensions # new_relevances shape: (batch_size, in_size) new_relevances = tf.sparse_reduce_sum_sparse(distributed_relevances, 2) # Count how many values and indices to add to the tensor arrays value_cnt = tf.shape(new_relevances.values)[0] # Calculate range of indexes in the tensor arrays to write the values and indices to scatter_range = tf.range(offset, offset + value_cnt, dtype=tf.int32) # Scatter the values of the new relevances av = av.scatter(scatter_range, new_relevances.values) # Prepend the prediction_per_sample dimension to be able to make a # sparse tensor of shape (predictions_per_sample, batch_size, in_width) after # the while loop new_indices = tf.pad(new_relevances.indices, [[0, 0], [1, 0]], constant_values=tf.cast(t, dtype=tf.int64)) # Scatter the indices of the new relevances ai = ai.scatter(scatter_range, new_indices) # Go to next prediction_per_sample return t + 1, av, ai, Rs, offset + value_cnt
def _call(self, inputs): # motif conv new_activations = [] # for each motif for m in range(self.num_motifs): x = inputs if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) adj_positions = tf.sparse_split( sp_input=self.support[m], num_split=self.motif_positions[m], axis=0) supports = list() # For each position for k in range(0, self.motif_positions[m]): XW = dot(x, self.vars['weights_' + str(m) + '_' + str(k)], sparse=self.sparse_inputs) temp = tf.sparse_reduce_sum_sparse(adj_positions[k], axis=0) support = dot(temp, XW, sparse=True) supports.append(support) output = tf.add_n(supports) if self.bias: output += self.vars['bias_' + str(m)] new_activations.append(self.act(output)) return new_activations
def _to_term_frequency(x, vocab_size): """Creates a SparseTensor of term frequency for every doc/term pair. Args: x : a SparseTensor of int64 representing string indices in vocab. vocab_size: A scalar int64 Tensor - the count of vocab used to turn the string into int64s including any OOV buckets. Returns: a SparseTensor with the count of times a term appears in a document at indices <doc_index_in_batch>, <term_index_in_vocab>, with size (num_docs_in_batch, vocab_size). """ # Construct intermediary sparse tensor with indices # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values. vocab_size = tf.convert_to_tensor(value=vocab_size, dtype=tf.int64) split_indices = tf.cast(tf.split(x.indices, axis=1, num_or_size_splits=2), dtype=tf.int64) expanded_values = tf.cast(tf.expand_dims(x.values, 1), dtype=tf.int64) next_index = tf.concat( [split_indices[0], split_indices[1], expanded_values], axis=1) next_values = tf.ones_like(x.values) expanded_vocab_size = tf.expand_dims(vocab_size, 0) next_shape = tf.concat([x.dense_shape, expanded_vocab_size], 0) next_tensor = tf.SparseTensor(indices=tf.cast(next_index, dtype=tf.int64), values=next_values, dense_shape=next_shape) # Take the intermediary tensor and reduce over the term_index_in_doc # dimension. This produces a tensor with indices [<doc_id>, <term_id>] # and values [count_of_term_in_doc] and shape batch x vocab_size term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1) dense_doc_sizes = tf.cast(tf.sparse.reduce_sum( tf.SparseTensor(indices=x.indices, values=tf.ones_like(x.values), dense_shape=x.dense_shape), 1), dtype=tf.float64) gather_indices = term_count_per_doc.indices[:, 0] gathered_doc_sizes = tf.gather(dense_doc_sizes, gather_indices) term_frequency = (tf.cast(term_count_per_doc.values, dtype=tf.float64) / tf.cast(gathered_doc_sizes, dtype=tf.float64)) term_count = tf.cast(term_count_per_doc.values, dtype=tf.float64) sparse_term_freq = tf.SparseTensor( indices=term_count_per_doc.indices, values=term_frequency, dense_shape=term_count_per_doc.dense_shape) sparse_term_count = tf.SparseTensor( indices=term_count_per_doc.indices, values=term_count, dense_shape=term_count_per_doc.dense_shape) return sparse_term_freq, sparse_term_count
def forward_incidence_matrix(self, normalization): if normalization[0] == "none": mtr_values = tf.to_float(tf.ones_like(self.receiver_indices)) message_indices = tf.range(self.edge_count) mtr_indices = tf.to_int64( tf.transpose(tf.stack([self.receiver_indices, message_indices]))) mtr_shape = tf.to_int64( tf.stack([self.vertex_count, self.edge_count])) tensor = tf.SparseTensor(indices=mtr_indices, values=mtr_values, dense_shape=mtr_shape) return tensor elif normalization[0] == "global": mtr_values = tf.to_float( tf.ones_like(self.receiver_indices) ) # mtr_values can be normalized weights, eg. intensities message_indices = tf.range(self.edge_count) mtr_indices = tf.to_int64( tf.transpose(tf.stack([self.receiver_indices, message_indices]))) mtr_shape = tf.to_int64( tf.stack([self.vertex_count, self.edge_count])) tensor = tf.sparse_softmax( tf.SparseTensor(indices=mtr_indices, values=mtr_values, dense_shape=mtr_shape)) return tensor elif normalization[0] == "local": mtr_values = tf.to_float(tf.ones_like(self.receiver_indices)) message_indices = tf.range(self.edge_count) mtr_indices = tf.to_int64( tf.transpose( tf.stack([ self.message_types, self.receiver_indices, message_indices ]))) mtr_shape = tf.to_int64( tf.stack( [self.label_count * 2, self.vertex_count, self.edge_count])) tensor = tf.sparse_softmax( tf.SparseTensor(indices=mtr_indices, values=mtr_values, dense_shape=mtr_shape)) tensor = tf.sparse_reduce_sum_sparse(tensor, 0) return tensor
def get_term_count_per_doc(x, vocab_size): """Creates a SparseTensor with 1s at every doc/term pair index. Args: x : a SparseTensor representing string indices in vocab. Returns: a SparseTensor with count at indices <doc_index_in_batch>, <term_index_in_vocab> for every term/doc pair. Example: the tensor SparseTensorValue( indices=array([[0, 0], [1, 0], [1, 2], [2, 1], [3, 1]]), values=array([3, 8, 9, 3, 4], dtype=int64), dense_shape=array([4, 3])) says the 2nd example/document (row index 1) has two tokens, and token 0 occures 8 times and token 2 occures 9 times. """ # Construct intermediary sparse tensor with indices # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values. split_indices = tf.to_int64( tf.split(x.indices, axis=1, num_or_size_splits=2)) expanded_values = tf.to_int64(tf.expand_dims(x.values, 1)) next_index = tf.concat( [split_indices[0], split_indices[1], expanded_values], axis=1) next_values = tf.ones_like(x.values, dtype=tf.int64) vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64) next_shape = tf.concat( [x.dense_shape, vocab_size_as_tensor], 0) next_tensor = tf.SparseTensor( indices=tf.to_int64(next_index), values=next_values, dense_shape=next_shape) # Take the intermediar tensor and reduce over the term_index_in_doc # dimension. This produces a tensor with indices [<doc_id>, <term_id>] # and values [count_of_term_in_doc] and shape batch x vocab_size term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1) return term_count_per_doc
def _to_doc_contains_term(x): """Creates a SparseTensor with 1s at every doc/term pair index. Args: x : a SparseTensor of int64 representing string indices in vocab. Returns: a SparseTensor with 1s at indices <doc_index_in_batch>, <term_index_in_vocab> for every term/doc pair. """ # Construct intermediary sparse tensor with indices # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values. split_indices = tf.to_int64( tf.split(x.indices, axis=1, num_or_size_splits=2)) expanded_values = tf.to_int64(tf.expand_dims(x.values, 1)) next_index = tf.concat( [split_indices[0], split_indices[1], expanded_values], axis=1) next_values = tf.ones_like(x.values) vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64) next_shape = tf.concat([x.dense_shape, vocab_size_as_tensor], 0) next_tensor = tf.SparseTensor(indices=tf.to_int64(next_index), values=next_values, dense_shape=next_shape) # Take the intermediar tensor and reduce over the term_index_in_doc # dimension. This produces a tensor with indices [<doc_id>, <term_id>] # and values [count_of_term_in_doc] and shape batch x vocab_size term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1) one_if_doc_contains_term = tf.SparseTensor( indices=term_count_per_doc.indices, values=tf.to_double(tf.greater(term_count_per_doc.values, 0)), dense_shape=term_count_per_doc.dense_shape) return one_if_doc_contains_term
def variational_message_passing( prior_global_params, global_params, o, o_dim, d, K, N, L=None, I=None, n_ann=None, ann_batch_size=None, n_iters=100): global_stats = global_expected_stats(global_params, d) dir_stats, niw_stats, alpha_stats, beta_stats = global_stats M = tf.shape(o)[0] # Initialize z_stats z_stats = normalize(tf.random_uniform([M, K], 1e-8, maxval=1)) # Encode # h: [M, d], J: [M, d] h, J = encoder(o, d) # J: [M, d * d] J = tf.reshape(tf.matrix_diag(J), [M, d * d]) # x_obs_param: [M, d + d * d] x_obs_param = tf.concat([h, J], axis=-1) # Prepare relational info if L is not None: # I, L: [M, M, W] (sparse), alpha_stats: [W, 2] # nb_weights_per_worker: [M, M, W] (sparse) nb_weights_per_worker = tf.sparse_add( (alpha_stats[:, 1] - beta_stats[:, 0]) * I, (alpha_stats[:, 0] - alpha_stats[:, 1] + beta_stats[:, 0] - beta_stats[:, 1]) * L) # nb_weights: [M, M] (sparse) nb_weights = tf.sparse_reduce_sum_sparse(nb_weights_per_worker, axis=-1) else: nb_weights = None # Message passing for t in range(n_iters): x_nat_param, x_stats = x_mean_field(niw_stats, z_stats, x_obs_param, d) z_nat_param, z_stats = z_mean_field(global_stats, x_stats, z_stats, nb_weights=nb_weights) # Decode # x: [M, d] x = mvn.sample(x_nat_param, d) o_dist, _ = decoder(x, o_dim) # Compute ELBO # log_po_term: [M] log_po_term = o_dist.log_prob(o) # log_p_ann_term: [] if L is not None: # z_stats: [M, K], z_inner_stats: [M, M] z_inner_stats = tf.matmul(z_stats, z_stats, transpose_b=True) log_p_ann_term = annotation_log_likelihood( beta_stats, z_inner_stats, L, I, nb_weights) ann_subsample_factor = n_ann / ann_batch_size else: z_inner_stats = None log_p_ann_term = None ann_subsample_factor = 1 # log_kl_x_term: [M] local_kl_x_term = local_kl_x(x_nat_param, niw_stats, z_stats, x_stats, d) # log_kl_z_term: [M] local_kl_z_term = local_kl_z(z_nat_param, dir_stats, z_stats) # global_kl_term: [] global_kl_term = global_kl( prior_global_params, global_params, global_stats, d) lower_bound = elbo( log_po_term, local_kl_z_term, local_kl_x_term, global_kl_term, N, ann_ll=log_p_ann_term, ann_subsample_factor=ann_subsample_factor) # Natural gradient for global variational parameters # z_stats: [M, K], x_stats: [M, d + d^2] # dir_updates: [K] dir_updates = tf.reduce_mean(z_stats, axis=0) # niw_updates: [K, d + d^2 + 2] niw_updates = tf.matmul(z_stats, tf.concat([x_stats, tf.ones([M, 2])], -1), transpose_a=True) / tf.to_float(M) updates = [dir_updates, niw_updates] if L is not None: # L_worker: [W, M, M] (sparse), false_L_worker: [W, M, M] (sparse) L_worker = tf.sparse_transpose(L, perm=[2, 0, 1]) false_L_worker = tf.sparse_transpose( tf.sparse_add(I, -tf.ones(tf.shape(L)) * L), perm=[2, 0, 1]) # alpha_updates: [W, 2] alpha_updates_1 = tf.sparse_reduce_sum(z_inner_stats * L_worker, axis=(-2, -1)) alpha_updates_2 = tf.sparse_reduce_sum(z_inner_stats * false_L_worker, axis=(-2, -1)) alpha_updates = 0.5 * tf.stack([alpha_updates_1, alpha_updates_2], axis=-1) # beta_updates: [W, 2] # false_z_inner_stats: [M, M] false_z_inner_stats = 1 - z_inner_stats beta_updates_1 = tf.sparse_reduce_sum( false_z_inner_stats * false_L_worker, axis=(-2, -1)) beta_updates_2 = tf.sparse_reduce_sum( false_z_inner_stats * L_worker, axis=(-2, -1)) beta_updates = 0.5 * tf.stack([beta_updates_1, beta_updates_2], axis=-1) updates.extend([alpha_updates / ann_subsample_factor, beta_updates / ann_subsample_factor]) nat_grads = [(prior_global_params[i] - global_params[i]) / N + updates[i] for i in range(len(updates))] return lower_bound, nat_grads, z_stats, niw_stats, dir_stats
def extract_case_length_features(input_words): input_words = transform_normalize_unicode(input_words, 'NFKC') input_chars = expand_split_chars(input_words) input_words_lower = transform_lower_case(input_words) input_words_upper = transform_upper_case(input_words) input_words_title = transform_title_case(input_words) chars_count = tf.sparse_reduce_sum_sparse(tf.SparseTensor( indices=input_chars.indices, values=tf.ones_like(input_chars.values, dtype=tf.float32), dense_shape=input_chars.dense_shape, ), axis=-1) word_length_values = tf.where( tf.greater(chars_count.values, _MAX_LENGTH), tf.fill(tf.shape(chars_count.values), _MAX_LENGTH), chars_count.values ) word_length_values = tf.divide(word_length_values, _MAX_LENGTH) word_length_values.set_shape(input_words.values.shape) word_length = tf.SparseTensor( indices=input_words.indices, values=word_length_values, dense_shape=input_words.dense_shape, ) no_case_value = tf.logical_and( tf.equal(input_words_lower.values, input_words_upper.values), tf.equal(input_words_upper.values, input_words_title.values) ) no_case = tf.SparseTensor( indices=input_words.indices, values=tf.to_float(no_case_value), dense_shape=input_words.dense_shape ) lower_case_value = tf.logical_and( tf.logical_not(no_case_value), tf.equal(input_words.values, input_words_lower.values) ) lower_case = tf.SparseTensor( indices=input_words.indices, values=tf.to_float(lower_case_value), dense_shape=input_words.dense_shape ) upper_case_value = tf.logical_and( tf.logical_not(no_case_value), tf.equal(input_words.values, input_words_upper.values) ) upper_case = tf.SparseTensor( indices=input_words.indices, values=tf.to_float(upper_case_value), dense_shape=input_words.dense_shape ) title_case_value = tf.logical_and( tf.logical_not(no_case_value), tf.equal(input_words.values, input_words_title.values) ) title_case = tf.SparseTensor( indices=input_words.indices, values=tf.to_float(title_case_value), dense_shape=input_words.dense_shape ) mixed_case_value = tf.logical_not(tf.logical_or( tf.logical_or(no_case_value, lower_case_value), tf.logical_or(upper_case_value, title_case_value) )) mixed_case = tf.SparseTensor( indices=input_words.indices, values=tf.to_float(mixed_case_value), dense_shape=input_words.dense_shape ) return word_length, no_case, lower_case, upper_case, title_case, mixed_case
def sparse_norm(x): rsum = tf.sparse_reduce_sum_sparse(x, axis=0, keep_dims=True) tf.SparseTensor(indices=x.indices, values=x.values / rsum.values, dense_shape=x.dense_shape) return x
copy_score = tf.placeholder(tf.float32, shape=(None, None)) encoder_input_mask = tf.one_hot(encoder_input_ids, vocab_size) #expanded_copy_score = tf.einsum("ijn,ij->ij", encoder_input_mask, copy_score) #prob_c = expanded_copy_score prob_c_one_hot2 = tf.einsum("ijn,ij->in", encoder_input_mask, copy_score) batch_size, time_steps = tf.unstack(tf.shape(encoder_input_ids)) inputs_flat = tf.reshape(encoder_input_ids, [-1]) copy_score_flat = tf.reshape(copy_score, [-1]) rr = tf.range(tf.cast(batch_size * time_steps, tf.int64), dtype=tf.int64) indices = tf.stack([rr, tf.cast(inputs_flat, tf.int64)], axis=1) shape = tf.cast([batch_size * time_steps, vocab_size], tf.int64) expanded_copy_score_sparse_flat = tf.SparseTensor(indices, copy_score_flat, shape) expanded_copy_score_sparse = tf.sparse_reshape( expanded_copy_score_sparse_flat, [batch_size, time_steps, vocab_size]) copy_score_sparse = tf.sparse_reduce_sum_sparse(expanded_copy_score_sparse, axis=1) prob_c_one_hot = tf.sparse_to_dense(copy_score_sparse.indices, copy_score_sparse.dense_shape, copy_score_sparse.values) with tf.Session() as sess: print( sess.run( [prob_c_one_hot, prob_c_one_hot2], feed_dict={ encoder_input_ids: [[5, 4, 3, 2, 1], [3, 4, 1, 5, 2]], copy_score: [[0, 0.5, 0, 0.5, 0], [0.3, 0.7, 0, 0, 0]] }))
def define_variables(train_category, priors, sigma2, batch_size): if options.degenerate: emb_user_prior = make_embedding_prior() emb_item_prior = make_embedding_prior() emb_entity_prior = make_embedding_prior() bias_user_prior = make_bias_prior() bias_item_prior = make_bias_prior() bias_entity_prior = make_bias_prior() else: emb_user_prior = make_embedding_prior3(priors, user_batch) emb_item_prior = make_embedding_prior3(priors, item_batch) emb_entity_prior = make_embedding_prior3(priors, all_entities) bias_user_prior = make_bias_prior3(priors, user_batch) bias_item_prior = make_bias_prior3(priors, item_batch) bias_entity_prior = make_bias_prior3(priors, all_entities) user_rescale = tf.nn.embedding_lookup(priors, user_batch)[:, 0] item_rescale = tf.nn.embedding_lookup(priors, item_batch)[:, 0] entity_rescale = priors[:, 0] if is_classification: likelihood = make_likelihood(feat_users, feat_items, bias_users, bias_items) sparse_pred = make_sparse_pred(X_fm_batch) else: likelihood = make_likelihood_reg(sigma2, feat_users, feat_items, bias_users, bias_items) sparse_pred = make_sparse_pred_reg(sigma2, X_fm_batch) pred2 = sparse_pred.mean() # ll = make_likelihood(feat_users2, feat_items2, bias_users2, bias_items2) pred = likelihood.mean() # print(likelihood.log_prob([1, 0])) # Check shapes # print('likelihood', likelihood.log_prob(outcomes)) # print('prior', emb_user_prior.log_prob(feat_users)) # print('scaled prior', emb_user_prior.log_prob(feat_users) / user_rescale) # print('posterior', q_user.log_prob(feat_users)) # print('bias prior', bias_user_prior.log_prob(bias_users)) # print('bias posterior', q_user_bias.log_prob(bias_users)) # sentinel = likelihood.log_prob(outcomes) # sentinel = bias_prior.log_prob(bias_users) # sentinel = tf.reduce_sum(ll.log_prob(outcomes)) # sentinel2 = tf.reduce_sum(likelihood.log_prob(outcomes)) # elbo = tf.reduce_mean( # user_rescale * item_rescale * likelihood.log_prob(outcomes) + # item_rescale * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) + # emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users)) + # user_rescale * (bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) + # emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items))) # (nb_users + nb_items) / 2 if options.degenerate: # elbo = -(tf.reduce_sum((pred - outcomes) ** 2 / 2) + # 0.1 * tf.reduce_sum(tf.nn.l2_loss(bias_users) + tf.nn.l2_loss(bias_items) + # tf.nn.l2_loss(feat_users) + tf.nn.l2_loss(feat_items))) elbo = tf.reduce_mean( likelihood.log_prob(outcomes) + 1 / user_rescale * (bias_user_prior.log_prob(bias_users) + emb_user_prior.log_prob(feat_users)) + 1 / item_rescale * (bias_item_prior.log_prob(bias_items) + emb_user_prior.log_prob(feat_items)), name='elbo') # / 2 : 1.27 # * 2 : 1.16 elif options.sparse: nb_occ = tf.sparse_reshape( tf.sparse_reduce_sum_sparse(X_fm_batch, axis=0), (1, -1)) lp_lq = tf.reduce_sum(bias_entity_prior.log_prob(all_bias) - q_entity_bias.log_prob(all_bias) + emb_entity_prior.log_prob(all_feat) - q_entity.log_prob(all_feat), axis=0) nonzero_entity_rescale = 1 + tf.maximum(0., entity_rescale - 1) lp_lq = tf.reshape(lp_lq / nonzero_entity_rescale, (-1, 1)) relevant_scaled_lp_lq = tf.squeeze( tf.sparse_tensor_dense_matmul(nb_occ, lp_lq)) elbo = (tf.reduce_mean(sparse_pred.log_prob(outcomes)) + relevant_scaled_lp_lq / batch_size) else: # elbo = tf.reduce_mean( # nb_samples['train'] * likelihood.log_prob(outcomes) + # # nb_samples['train'] * sparse_pred.log_prob(outcomes) + # (nb_users + nb_items) / 2 * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) + # emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users) + # bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) + # emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)), name='elbo') # elbo = tf.reduce_mean( # nb_samples[train_category] * likelihood.log_prob(outcomes) + # nb_samples[train_category] * 1 / user_rescale * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) + # emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users)) + # nb_samples[train_category] * 1 / item_rescale * (bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) + # emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)), name='elbo') elbo = tf.reduce_mean( likelihood.log_prob(outcomes) + 1 / user_rescale * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) + emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users)) + 1 / item_rescale * (bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) + emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)) ) sentinel = { 'nb outcomes': tf.shape(outcomes), 'nb samples': tf.constant(nb_samples[train_category]), 'users': entity[:5, 0], # 'lplq': relevant_scaled_lp_lq, # 'll log prob': -likelihood.log_prob(outcomes), # 'll log prob sparse': -sparse_pred.log_prob(outcomes), 'll log prob has nan': tf.reduce_any(tf.is_nan(likelihood.log_prob(outcomes))), 'll log prob sparse has nan': tf.reduce_any(tf.is_nan(sparse_pred.log_prob(outcomes))), # 's ll log prob': -tf.reduce_sum(likelihood.log_prob(outcomes)), # 's pred delta': tf.reduce_sum((pred - outcomes) ** 2 / 2 + np.log(2 * np.pi) / 2), 'entity_rescale sum': tf.reduce_sum(entity_rescale), 'nb occ sum': tf.constant(nb_occurrences[train_category].sum()), # 'logits': logits, # 'max logits': tf.reduce_max(logits), # 'min logits': tf.reduce_min(logits), # 'max logits2': tf.reduce_max(logits2), # 'min logits2': tf.reduce_min(logits2), # 'bias sample': bias_users[0], # 'bias log prob': -bias_user_prior.log_prob(bias_users)[0], # 'sum bias log prob': -tf.reduce_sum(bias_user_prior.log_prob(bias_users)), 'pred': pred, 'pred2': pred2, 'max pred': tf.reduce_max(pred), 'min pred': tf.reduce_min(pred), 'max pred2': tf.reduce_max(pred2), 'min pred2': tf.reduce_min(pred2), 'has nan': tf.reduce_any(tf.is_nan(pred2)) # 'bias mean': bias_user_prior.mean(), # 'bias delta': bias_users[0] ** 2 / 2 + np.log(2 * np.pi) / 2, # 'sum bias delta': tf.reduce_sum(bias_users ** 2 / 2 + np.log(2 * np.pi) / 2) } infer_op = optimizer.minimize(-elbo) if options.sparse: return infer_op, elbo, pred2, likelihood, sentinel else: return infer_op, elbo, pred, likelihood, sentinel
def __call__(self, inputs, state, scope=None): if not isinstance(state, CopyNetWrapperState): raise TypeError( "Expected state to be instance of CopyNetWrapperState. " "Received type %s instead." % type(state)) last_ids = state.last_ids prob_c = state.prob_c cell_state = state.cell_state mask = tf.cast( tf.equal(tf.expand_dims(last_ids, 1), self._encoder_input_ids), tf.float32) mask_sum = tf.reduce_sum(mask, axis=1) mask = tf.where(tf.less(mask_sum, 1e-7), mask, mask / tf.expand_dims(mask_sum, 1)) rou = mask * prob_c selective_read = tf.einsum("ijk,ij->ik", self._encoder_states, rou) inputs = tf.concat([inputs, selective_read], 1) outputs, cell_state = self._cell(inputs, cell_state, scope) generate_score = self._projection(outputs) prob_g = generate_score copy_score = tf.einsum("ijk,km->ijm", self._encoder_states, self._copy_weight) copy_score = tf.nn.tanh(copy_score) copy_score = tf.einsum("ijm,im->ij", copy_score, outputs) prob_c = copy_score """ encoder_input_mask = tf.one_hot(self._encoder_input_ids, self._vocab_size) #expanded_copy_score = tf.einsum("ijn,ij->ij", encoder_input_mask, copy_score) prob_c_one_hot = tf.einsum("ijn,ij->in", encoder_input_mask, prob_c) """ #Using sparse tensor batch_size, time_steps = tf.unstack(tf.shape(self._encoder_input_ids)) inputs_flat = tf.reshape(self._encoder_input_ids, [-1]) copy_score_flat = tf.reshape(copy_score, [-1]) rr = tf.range(tf.cast(batch_size * time_steps, tf.int64), dtype=tf.int64) indices = tf.stack([rr, tf.cast(inputs_flat, tf.int64)], axis=1) shape = tf.cast([batch_size * time_steps, self._vocab_size], tf.int64) expanded_copy_score_sparse_flat = tf.SparseTensor( indices, copy_score_flat, shape) expanded_copy_score_sparse = tf.sparse_reshape( expanded_copy_score_sparse_flat, [batch_size, time_steps, self._vocab_size]) copy_score_sparse = tf.sparse_reduce_sum_sparse( expanded_copy_score_sparse, axis=1) prob_c_one_hot2 = tf.sparse_to_dense(copy_score_sparse.indices, copy_score_sparse.dense_shape, copy_score_sparse.values) """expanded_copy_score_flat = tf.sparse_to_dense(expanded_copy_score_sparse_flat.indices,expanded_copy_score_sparse_flat.dense_shape,expanded_copy_score_sparse_flat.values ) expanded_copy_score = tf.reshape(expanded_copy_score_flat, [batch_size, time_steps, self._vocab_size]) prob_c_one_hot3 = tf.reduce_sum(expanded_copy_score, axis=1)""" #prob_c_one_hot = tf.Print(prob_c_one_hot, [tf.reduce_max(tf.abs(tf.add(prob_c_one_hot3,-prob_c_one_hot2)))]) prob_g_total = tf.pad( prob_g, [[0, 0], [0, self._vocab_size - self._gen_vocab_size]]) outputs = prob_g_total + prob_c_one_hot2 """ Bugs tres bizzares: prob_c_one_hot est toujours egal a prob_c_one_hot3 mais preplexite explose direct si je mets prob_c_one_hot3 à la place de prob_c_one_hot https://stackoverflow.com/questions/45348902/why-is-no-gradient-available-when-using-sparse-tensors-in-tensorflow: It turns out the sparse_to_dense operation (around which sparse_tensor_to_dense is a convenience wrapper) does not have a gradient in TensorFlow sparse_to_dense => scatter_nd ? prob_c_one_hot2 et prob_c_one_hot3 sont différents à 10-6 près environ... mais surement normal(correspond à float32 floating precision) """ #pr = tf.reduce_min(tf.reshape(tf.add(prob_c_one_hot2,-prob_c_one_hot),[-1])) #outputs = tf.Print(outputs,[pr]) last_ids = tf.argmax(outputs, axis=-1, output_type=tf.int32) last_ids.set_shape([None]) state = CopyNetWrapperState(cell_state=cell_state, last_ids=last_ids, prob_c=prob_c) return outputs, state