コード例 #1
0
ファイル: linear_lrp.py プロジェクト: fhvilshoj/lrp
    def _prediction_loop(t, av, ai, Rs, offset):
        # current_R shape: (batch_size, out_size)
        current_R = Rs.read(t)

        # Prepare batch for current prediction_per_sample to be broadcasted over
        # the input dimension
        # current_R shape: (batch_size, 1, out_size)
        current_R = tf.expand_dims(current_R, 1)

        # Scale fractions with relevances for current prediction_per_sample
        distributed_relevances = fractions * current_R

        # Reduce sum the get the relevances for the individual in_dimensions
        # new_relevances shape: (batch_size, in_size)
        new_relevances = tf.sparse_reduce_sum_sparse(distributed_relevances, 2)

        # Count how many values and indices to add to the tensor arrays
        value_cnt = tf.shape(new_relevances.values)[0]
        # Calculate range of indexes in the tensor arrays to write the values and indices to
        scatter_range = tf.range(offset, offset + value_cnt, dtype=tf.int32)

        # Scatter the values of the new relevances
        av = av.scatter(scatter_range, new_relevances.values)

        # Prepend the prediction_per_sample dimension to be able to make a
        # sparse tensor of shape (predictions_per_sample, batch_size, in_width) after
        # the while loop
        new_indices = tf.pad(new_relevances.indices, [[0, 0], [1, 0]],
                             constant_values=tf.cast(t, dtype=tf.int64))

        # Scatter the indices of the new relevances
        ai = ai.scatter(scatter_range, new_indices)

        # Go to next prediction_per_sample
        return t + 1, av, ai, Rs, offset + value_cnt
コード例 #2
0
 def _call(self, inputs):
     # motif conv
     new_activations = []
     # for each motif
     for m in range(self.num_motifs):
         x = inputs
         if self.sparse_inputs:
             x = sparse_dropout(x, 1 - self.dropout,
                                self.num_features_nonzero)
         else:
             x = tf.nn.dropout(x, 1 - self.dropout)
         adj_positions = tf.sparse_split(
             sp_input=self.support[m], num_split=self.motif_positions[m], axis=0)
         supports = list()
         # For each position
         for k in range(0, self.motif_positions[m]):
             XW = dot(x, self.vars['weights_' + str(m) + '_' + str(k)],
                      sparse=self.sparse_inputs)
             temp = tf.sparse_reduce_sum_sparse(adj_positions[k], axis=0)
             support = dot(temp, XW, sparse=True)
             supports.append(support)
         output = tf.add_n(supports)
         if self.bias:
             output += self.vars['bias_' + str(m)]
         new_activations.append(self.act(output))
     return new_activations
コード例 #3
0
def _to_term_frequency(x, vocab_size):
    """Creates a SparseTensor of term frequency for every doc/term pair.
  Args:
    x : a SparseTensor of int64 representing string indices in vocab.
    vocab_size: A scalar int64 Tensor - the count of vocab used to turn the
        string into int64s including any OOV buckets.
  Returns:
    a SparseTensor with the count of times a term appears in a document at
        indices <doc_index_in_batch>, <term_index_in_vocab>,
        with size (num_docs_in_batch, vocab_size).
  """
    # Construct intermediary sparse tensor with indices
    # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values.
    vocab_size = tf.convert_to_tensor(value=vocab_size, dtype=tf.int64)
    split_indices = tf.cast(tf.split(x.indices, axis=1, num_or_size_splits=2),
                            dtype=tf.int64)
    expanded_values = tf.cast(tf.expand_dims(x.values, 1), dtype=tf.int64)
    next_index = tf.concat(
        [split_indices[0], split_indices[1], expanded_values], axis=1)

    next_values = tf.ones_like(x.values)
    expanded_vocab_size = tf.expand_dims(vocab_size, 0)
    next_shape = tf.concat([x.dense_shape, expanded_vocab_size], 0)

    next_tensor = tf.SparseTensor(indices=tf.cast(next_index, dtype=tf.int64),
                                  values=next_values,
                                  dense_shape=next_shape)

    # Take the intermediary tensor and reduce over the term_index_in_doc
    # dimension. This produces a tensor with indices [<doc_id>, <term_id>]
    # and values [count_of_term_in_doc] and shape batch x vocab_size
    term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1)

    dense_doc_sizes = tf.cast(tf.sparse.reduce_sum(
        tf.SparseTensor(indices=x.indices,
                        values=tf.ones_like(x.values),
                        dense_shape=x.dense_shape), 1),
                              dtype=tf.float64)

    gather_indices = term_count_per_doc.indices[:, 0]
    gathered_doc_sizes = tf.gather(dense_doc_sizes, gather_indices)

    term_frequency = (tf.cast(term_count_per_doc.values, dtype=tf.float64) /
                      tf.cast(gathered_doc_sizes, dtype=tf.float64))
    term_count = tf.cast(term_count_per_doc.values, dtype=tf.float64)

    sparse_term_freq = tf.SparseTensor(
        indices=term_count_per_doc.indices,
        values=term_frequency,
        dense_shape=term_count_per_doc.dense_shape)

    sparse_term_count = tf.SparseTensor(
        indices=term_count_per_doc.indices,
        values=term_count,
        dense_shape=term_count_per_doc.dense_shape)

    return sparse_term_freq, sparse_term_count
コード例 #4
0
    def forward_incidence_matrix(self, normalization):
        if normalization[0] == "none":
            mtr_values = tf.to_float(tf.ones_like(self.receiver_indices))
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(tf.stack([self.receiver_indices,
                                       message_indices])))
            mtr_shape = tf.to_int64(
                tf.stack([self.vertex_count, self.edge_count]))

            tensor = tf.SparseTensor(indices=mtr_indices,
                                     values=mtr_values,
                                     dense_shape=mtr_shape)

            return tensor
        elif normalization[0] == "global":
            mtr_values = tf.to_float(
                tf.ones_like(self.receiver_indices)
            )  # mtr_values can be normalized weights, eg. intensities
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(tf.stack([self.receiver_indices,
                                       message_indices])))
            mtr_shape = tf.to_int64(
                tf.stack([self.vertex_count, self.edge_count]))

            tensor = tf.sparse_softmax(
                tf.SparseTensor(indices=mtr_indices,
                                values=mtr_values,
                                dense_shape=mtr_shape))

            return tensor
        elif normalization[0] == "local":
            mtr_values = tf.to_float(tf.ones_like(self.receiver_indices))
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(
                    tf.stack([
                        self.message_types, self.receiver_indices,
                        message_indices
                    ])))
            mtr_shape = tf.to_int64(
                tf.stack(
                    [self.label_count * 2, self.vertex_count,
                     self.edge_count]))

            tensor = tf.sparse_softmax(
                tf.SparseTensor(indices=mtr_indices,
                                values=mtr_values,
                                dense_shape=mtr_shape))

            tensor = tf.sparse_reduce_sum_sparse(tensor, 0)

            return tensor
コード例 #5
0
def get_term_count_per_doc(x, vocab_size):
  """Creates a SparseTensor with 1s at every doc/term pair index.

  Args:
    x : a SparseTensor representing string indices in vocab.

  Returns:
    a SparseTensor with count at indices <doc_index_in_batch>,
        <term_index_in_vocab> for every term/doc pair. Example: the tensor
        SparseTensorValue(
          indices=array([[0, 0],
                         [1, 0],
                         [1, 2],
                         [2, 1],
                         [3, 1]]),
          values=array([3, 8, 9, 3, 4], dtype=int64),
          dense_shape=array([4, 3]))
        says the 2nd example/document (row index 1) has two tokens, and
        token 0 occures 8 times and token 2 occures 9 times.
  """
  # Construct intermediary sparse tensor with indices
  # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values.
  split_indices = tf.to_int64(
      tf.split(x.indices, axis=1, num_or_size_splits=2))
  expanded_values = tf.to_int64(tf.expand_dims(x.values, 1))
  next_index = tf.concat(
      [split_indices[0], split_indices[1], expanded_values], axis=1)
  next_values = tf.ones_like(x.values, dtype=tf.int64)
  vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64)
  next_shape = tf.concat(
      [x.dense_shape, vocab_size_as_tensor], 0)
  next_tensor = tf.SparseTensor(
      indices=tf.to_int64(next_index),
      values=next_values,
      dense_shape=next_shape)

  # Take the intermediar tensor and reduce over the term_index_in_doc
  # dimension. This produces a tensor with indices [<doc_id>, <term_id>]
  # and values [count_of_term_in_doc] and shape batch x vocab_size
  term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1)
  return term_count_per_doc
コード例 #6
0
ファイル: mappers.py プロジェクト: mariobriggs/transform
    def _to_doc_contains_term(x):
        """Creates a SparseTensor with 1s at every doc/term pair index.

    Args:
      x : a SparseTensor of int64 representing string indices in vocab.

    Returns:
      a SparseTensor with 1s at indices <doc_index_in_batch>,
          <term_index_in_vocab> for every term/doc pair.
    """
        # Construct intermediary sparse tensor with indices
        # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values.
        split_indices = tf.to_int64(
            tf.split(x.indices, axis=1, num_or_size_splits=2))
        expanded_values = tf.to_int64(tf.expand_dims(x.values, 1))
        next_index = tf.concat(
            [split_indices[0], split_indices[1], expanded_values], axis=1)

        next_values = tf.ones_like(x.values)
        vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64)
        next_shape = tf.concat([x.dense_shape, vocab_size_as_tensor], 0)

        next_tensor = tf.SparseTensor(indices=tf.to_int64(next_index),
                                      values=next_values,
                                      dense_shape=next_shape)

        # Take the intermediar tensor and reduce over the term_index_in_doc
        # dimension. This produces a tensor with indices [<doc_id>, <term_id>]
        # and values [count_of_term_in_doc] and shape batch x vocab_size
        term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1)

        one_if_doc_contains_term = tf.SparseTensor(
            indices=term_count_per_doc.indices,
            values=tf.to_double(tf.greater(term_count_per_doc.values, 0)),
            dense_shape=term_count_per_doc.dense_shape)

        return one_if_doc_contains_term
コード例 #7
0
def variational_message_passing(
        prior_global_params, global_params, o, o_dim, d, K, N,
        L=None, I=None, n_ann=None, ann_batch_size=None, n_iters=100):
    global_stats = global_expected_stats(global_params, d)
    dir_stats, niw_stats, alpha_stats, beta_stats = global_stats
    M = tf.shape(o)[0]

    # Initialize z_stats
    z_stats = normalize(tf.random_uniform([M, K], 1e-8, maxval=1))

    # Encode
    # h: [M, d], J: [M, d]
    h, J = encoder(o, d)
    # J: [M, d * d]
    J = tf.reshape(tf.matrix_diag(J), [M, d * d])
    # x_obs_param: [M, d + d * d]
    x_obs_param = tf.concat([h, J], axis=-1)

    # Prepare relational info
    if L is not None:
        # I, L: [M, M, W] (sparse), alpha_stats: [W, 2]
        # nb_weights_per_worker: [M, M, W] (sparse)
        nb_weights_per_worker = tf.sparse_add(
            (alpha_stats[:, 1] - beta_stats[:, 0]) * I,
            (alpha_stats[:, 0] - alpha_stats[:, 1] +
             beta_stats[:, 0] - beta_stats[:, 1]) * L)
        # nb_weights: [M, M] (sparse)
        nb_weights = tf.sparse_reduce_sum_sparse(nb_weights_per_worker, axis=-1)
    else:
        nb_weights = None

    # Message passing
    for t in range(n_iters):
        x_nat_param, x_stats = x_mean_field(niw_stats, z_stats, x_obs_param, d)
        z_nat_param, z_stats = z_mean_field(global_stats, x_stats, z_stats,
                                            nb_weights=nb_weights)

    # Decode
    # x: [M, d]
    x = mvn.sample(x_nat_param, d)
    o_dist, _ = decoder(x, o_dim)

    # Compute ELBO
    # log_po_term: [M]
    log_po_term = o_dist.log_prob(o)
    # log_p_ann_term: []
    if L is not None:
        # z_stats: [M, K], z_inner_stats: [M, M]
        z_inner_stats = tf.matmul(z_stats, z_stats, transpose_b=True)
        log_p_ann_term = annotation_log_likelihood(
            beta_stats, z_inner_stats, L, I, nb_weights)
        ann_subsample_factor = n_ann / ann_batch_size
    else:
        z_inner_stats = None
        log_p_ann_term = None
        ann_subsample_factor = 1
    # log_kl_x_term: [M]
    local_kl_x_term = local_kl_x(x_nat_param, niw_stats, z_stats, x_stats, d)
    # log_kl_z_term: [M]
    local_kl_z_term = local_kl_z(z_nat_param, dir_stats, z_stats)
    # global_kl_term: []
    global_kl_term = global_kl(
        prior_global_params, global_params, global_stats, d)
    lower_bound = elbo(
        log_po_term, local_kl_z_term, local_kl_x_term, global_kl_term, N,
        ann_ll=log_p_ann_term, ann_subsample_factor=ann_subsample_factor)

    # Natural gradient for global variational parameters
    # z_stats: [M, K], x_stats: [M, d + d^2]
    # dir_updates: [K]
    dir_updates = tf.reduce_mean(z_stats, axis=0)
    # niw_updates: [K, d + d^2 + 2]
    niw_updates = tf.matmul(z_stats, tf.concat([x_stats, tf.ones([M, 2])], -1),
                            transpose_a=True) / tf.to_float(M)
    updates = [dir_updates, niw_updates]

    if L is not None:
        # L_worker: [W, M, M] (sparse), false_L_worker: [W, M, M] (sparse)
        L_worker = tf.sparse_transpose(L, perm=[2, 0, 1])
        false_L_worker = tf.sparse_transpose(
            tf.sparse_add(I, -tf.ones(tf.shape(L)) * L), perm=[2, 0, 1])
        # alpha_updates: [W, 2]
        alpha_updates_1 = tf.sparse_reduce_sum(z_inner_stats * L_worker,
                                               axis=(-2, -1))
        alpha_updates_2 = tf.sparse_reduce_sum(z_inner_stats * false_L_worker,
                                               axis=(-2, -1))
        alpha_updates = 0.5 * tf.stack([alpha_updates_1, alpha_updates_2],
                                       axis=-1)
        # beta_updates: [W, 2]
        # false_z_inner_stats: [M, M]
        false_z_inner_stats = 1 - z_inner_stats
        beta_updates_1 = tf.sparse_reduce_sum(
            false_z_inner_stats * false_L_worker, axis=(-2, -1))
        beta_updates_2 = tf.sparse_reduce_sum(
            false_z_inner_stats * L_worker, axis=(-2, -1))
        beta_updates = 0.5 * tf.stack([beta_updates_1, beta_updates_2], axis=-1)
        updates.extend([alpha_updates / ann_subsample_factor,
                        beta_updates / ann_subsample_factor])

    nat_grads = [(prior_global_params[i] - global_params[i]) / N + updates[i]
                 for i in range(len(updates))]
    return lower_bound, nat_grads, z_stats, niw_stats, dir_stats
コード例 #8
0
ファイル: feature.py プロジェクト: shkarupa-alex/ruconlluconv
def extract_case_length_features(input_words):
    input_words = transform_normalize_unicode(input_words, 'NFKC')
    input_chars = expand_split_chars(input_words)
    input_words_lower = transform_lower_case(input_words)
    input_words_upper = transform_upper_case(input_words)
    input_words_title = transform_title_case(input_words)

    chars_count = tf.sparse_reduce_sum_sparse(tf.SparseTensor(
        indices=input_chars.indices,
        values=tf.ones_like(input_chars.values, dtype=tf.float32),
        dense_shape=input_chars.dense_shape,
    ), axis=-1)

    word_length_values = tf.where(
        tf.greater(chars_count.values, _MAX_LENGTH),
        tf.fill(tf.shape(chars_count.values), _MAX_LENGTH),
        chars_count.values
    )
    word_length_values = tf.divide(word_length_values, _MAX_LENGTH)
    word_length_values.set_shape(input_words.values.shape)
    word_length = tf.SparseTensor(
        indices=input_words.indices,
        values=word_length_values,
        dense_shape=input_words.dense_shape,
    )

    no_case_value = tf.logical_and(
        tf.equal(input_words_lower.values, input_words_upper.values),
        tf.equal(input_words_upper.values, input_words_title.values)
    )
    no_case = tf.SparseTensor(
        indices=input_words.indices,
        values=tf.to_float(no_case_value),
        dense_shape=input_words.dense_shape
    )

    lower_case_value = tf.logical_and(
        tf.logical_not(no_case_value),
        tf.equal(input_words.values, input_words_lower.values)
    )
    lower_case = tf.SparseTensor(
        indices=input_words.indices,
        values=tf.to_float(lower_case_value),
        dense_shape=input_words.dense_shape
    )

    upper_case_value = tf.logical_and(
        tf.logical_not(no_case_value),
        tf.equal(input_words.values, input_words_upper.values)
    )
    upper_case = tf.SparseTensor(
        indices=input_words.indices,
        values=tf.to_float(upper_case_value),
        dense_shape=input_words.dense_shape
    )

    title_case_value = tf.logical_and(
        tf.logical_not(no_case_value),
        tf.equal(input_words.values, input_words_title.values)
    )
    title_case = tf.SparseTensor(
        indices=input_words.indices,
        values=tf.to_float(title_case_value),
        dense_shape=input_words.dense_shape
    )

    mixed_case_value = tf.logical_not(tf.logical_or(
        tf.logical_or(no_case_value, lower_case_value),
        tf.logical_or(upper_case_value, title_case_value)
    ))
    mixed_case = tf.SparseTensor(
        indices=input_words.indices,
        values=tf.to_float(mixed_case_value),
        dense_shape=input_words.dense_shape
    )

    return word_length, no_case, lower_case, upper_case, title_case, mixed_case
コード例 #9
0
 def sparse_norm(x):
     rsum = tf.sparse_reduce_sum_sparse(x, axis=0, keep_dims=True)
     tf.SparseTensor(indices=x.indices,
                     values=x.values / rsum.values,
                     dense_shape=x.dense_shape)
     return x
コード例 #10
0
ファイル: t.py プロジェクト: lucasgnz/question_answering
copy_score = tf.placeholder(tf.float32, shape=(None, None))

encoder_input_mask = tf.one_hot(encoder_input_ids, vocab_size)
#expanded_copy_score = tf.einsum("ijn,ij->ij", encoder_input_mask, copy_score)
#prob_c = expanded_copy_score
prob_c_one_hot2 = tf.einsum("ijn,ij->in", encoder_input_mask, copy_score)

batch_size, time_steps = tf.unstack(tf.shape(encoder_input_ids))
inputs_flat = tf.reshape(encoder_input_ids, [-1])
copy_score_flat = tf.reshape(copy_score, [-1])
rr = tf.range(tf.cast(batch_size * time_steps, tf.int64), dtype=tf.int64)
indices = tf.stack([rr, tf.cast(inputs_flat, tf.int64)], axis=1)
shape = tf.cast([batch_size * time_steps, vocab_size], tf.int64)
expanded_copy_score_sparse_flat = tf.SparseTensor(indices, copy_score_flat,
                                                  shape)
expanded_copy_score_sparse = tf.sparse_reshape(
    expanded_copy_score_sparse_flat, [batch_size, time_steps, vocab_size])
copy_score_sparse = tf.sparse_reduce_sum_sparse(expanded_copy_score_sparse,
                                                axis=1)
prob_c_one_hot = tf.sparse_to_dense(copy_score_sparse.indices,
                                    copy_score_sparse.dense_shape,
                                    copy_score_sparse.values)

with tf.Session() as sess:
    print(
        sess.run(
            [prob_c_one_hot, prob_c_one_hot2],
            feed_dict={
                encoder_input_ids: [[5, 4, 3, 2, 1], [3, 4, 1, 5, 2]],
                copy_score: [[0, 0.5, 0, 0.5, 0], [0.3, 0.7, 0, 0, 0]]
            }))
コード例 #11
0
def define_variables(train_category, priors, sigma2, batch_size):
    if options.degenerate:
        emb_user_prior = make_embedding_prior()
        emb_item_prior = make_embedding_prior()
        emb_entity_prior = make_embedding_prior()
        bias_user_prior = make_bias_prior()
        bias_item_prior = make_bias_prior()
        bias_entity_prior = make_bias_prior()
    else:
        emb_user_prior = make_embedding_prior3(priors, user_batch)
        emb_item_prior = make_embedding_prior3(priors, item_batch)
        emb_entity_prior = make_embedding_prior3(priors, all_entities)
        bias_user_prior = make_bias_prior3(priors, user_batch)
        bias_item_prior = make_bias_prior3(priors, item_batch)
        bias_entity_prior = make_bias_prior3(priors, all_entities)

    user_rescale = tf.nn.embedding_lookup(priors, user_batch)[:, 0]
    item_rescale = tf.nn.embedding_lookup(priors, item_batch)[:, 0]
    entity_rescale = priors[:, 0]

    if is_classification:
        likelihood = make_likelihood(feat_users, feat_items, bias_users,
                                     bias_items)
        sparse_pred = make_sparse_pred(X_fm_batch)
    else:
        likelihood = make_likelihood_reg(sigma2, feat_users, feat_items,
                                         bias_users, bias_items)
        sparse_pred = make_sparse_pred_reg(sigma2, X_fm_batch)
    pred2 = sparse_pred.mean()
    # ll = make_likelihood(feat_users2, feat_items2, bias_users2, bias_items2)
    pred = likelihood.mean()
    # print(likelihood.log_prob([1, 0]))

    # Check shapes
    # print('likelihood', likelihood.log_prob(outcomes))
    # print('prior', emb_user_prior.log_prob(feat_users))
    # print('scaled prior', emb_user_prior.log_prob(feat_users) / user_rescale)
    # print('posterior', q_user.log_prob(feat_users))
    # print('bias prior', bias_user_prior.log_prob(bias_users))
    # print('bias posterior', q_user_bias.log_prob(bias_users))

    # sentinel = likelihood.log_prob(outcomes)
    # sentinel = bias_prior.log_prob(bias_users)
    # sentinel = tf.reduce_sum(ll.log_prob(outcomes))
    # sentinel2 = tf.reduce_sum(likelihood.log_prob(outcomes))

    # elbo = tf.reduce_mean(
    #     user_rescale * item_rescale * likelihood.log_prob(outcomes) +
    #     item_rescale * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) +
    #                     emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users)) +
    #     user_rescale * (bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) +
    #                     emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)))

    # (nb_users + nb_items) / 2
    if options.degenerate:
        # elbo = -(tf.reduce_sum((pred - outcomes) ** 2 / 2) +
        #          0.1 * tf.reduce_sum(tf.nn.l2_loss(bias_users) + tf.nn.l2_loss(bias_items) +
        #          tf.nn.l2_loss(feat_users) + tf.nn.l2_loss(feat_items)))
        elbo = tf.reduce_mean(
            likelihood.log_prob(outcomes) + 1 / user_rescale *
            (bias_user_prior.log_prob(bias_users) +
             emb_user_prior.log_prob(feat_users)) + 1 / item_rescale *
            (bias_item_prior.log_prob(bias_items) +
             emb_user_prior.log_prob(feat_items)),
            name='elbo')
    # / 2 : 1.27
    # * 2 : 1.16
    elif options.sparse:
        nb_occ = tf.sparse_reshape(
            tf.sparse_reduce_sum_sparse(X_fm_batch, axis=0), (1, -1))

        lp_lq = tf.reduce_sum(bias_entity_prior.log_prob(all_bias) -
                              q_entity_bias.log_prob(all_bias) +
                              emb_entity_prior.log_prob(all_feat) -
                              q_entity.log_prob(all_feat),
                              axis=0)
        nonzero_entity_rescale = 1 + tf.maximum(0., entity_rescale - 1)
        lp_lq = tf.reshape(lp_lq / nonzero_entity_rescale, (-1, 1))
        relevant_scaled_lp_lq = tf.squeeze(
            tf.sparse_tensor_dense_matmul(nb_occ, lp_lq))

        elbo = (tf.reduce_mean(sparse_pred.log_prob(outcomes)) +
                relevant_scaled_lp_lq / batch_size)

    else:
        # elbo = tf.reduce_mean(
        #     nb_samples['train'] * likelihood.log_prob(outcomes) +
        #     # nb_samples['train'] * sparse_pred.log_prob(outcomes) +
        #     (nb_users + nb_items) / 2 * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) +
        #                                  emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users) +
        #                                  bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) +
        #                                  emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)), name='elbo')

        # elbo = tf.reduce_mean(
        #     nb_samples[train_category] * likelihood.log_prob(outcomes) +
        #     nb_samples[train_category] * 1 / user_rescale * (bias_user_prior.log_prob(bias_users) - q_user_bias.log_prob(bias_users) +
        #                       emb_user_prior.log_prob(feat_users) - q_user.log_prob(feat_users)) +
        #     nb_samples[train_category] * 1 / item_rescale * (bias_item_prior.log_prob(bias_items) - q_item_bias.log_prob(bias_items) +
        #                       emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items)), name='elbo')

        elbo = tf.reduce_mean(
            likelihood.log_prob(outcomes) + 1 / user_rescale *
            (bias_user_prior.log_prob(bias_users) -
             q_user_bias.log_prob(bias_users) +
             emb_user_prior.log_prob(feat_users) -
             q_user.log_prob(feat_users)) + 1 / item_rescale *
            (bias_item_prior.log_prob(bias_items) -
             q_item_bias.log_prob(bias_items) +
             emb_user_prior.log_prob(feat_items) - q_item.log_prob(feat_items))
        )

    sentinel = {
        'nb outcomes':
        tf.shape(outcomes),
        'nb samples':
        tf.constant(nb_samples[train_category]),
        'users':
        entity[:5, 0],
        # 'lplq': relevant_scaled_lp_lq,
        # 'll log prob': -likelihood.log_prob(outcomes),
        # 'll log prob sparse': -sparse_pred.log_prob(outcomes),
        'll log prob has nan':
        tf.reduce_any(tf.is_nan(likelihood.log_prob(outcomes))),
        'll log prob sparse has nan':
        tf.reduce_any(tf.is_nan(sparse_pred.log_prob(outcomes))),
        # 's ll log prob': -tf.reduce_sum(likelihood.log_prob(outcomes)),
        # 's pred delta': tf.reduce_sum((pred - outcomes) ** 2 / 2 + np.log(2 * np.pi) / 2),
        'entity_rescale sum':
        tf.reduce_sum(entity_rescale),
        'nb occ sum':
        tf.constant(nb_occurrences[train_category].sum()),
        # 'logits': logits,
        # 'max logits': tf.reduce_max(logits),
        # 'min logits': tf.reduce_min(logits),
        # 'max logits2': tf.reduce_max(logits2),
        # 'min logits2': tf.reduce_min(logits2),
        # 'bias sample': bias_users[0],
        # 'bias log prob': -bias_user_prior.log_prob(bias_users)[0],
        # 'sum bias log prob': -tf.reduce_sum(bias_user_prior.log_prob(bias_users)),
        'pred':
        pred,
        'pred2':
        pred2,
        'max pred':
        tf.reduce_max(pred),
        'min pred':
        tf.reduce_min(pred),
        'max pred2':
        tf.reduce_max(pred2),
        'min pred2':
        tf.reduce_min(pred2),
        'has nan':
        tf.reduce_any(tf.is_nan(pred2))
        # 'bias mean': bias_user_prior.mean(),
        # 'bias delta': bias_users[0] ** 2 / 2 + np.log(2 * np.pi) / 2,
        # 'sum bias delta': tf.reduce_sum(bias_users ** 2 / 2 + np.log(2 * np.pi) / 2)
    }

    infer_op = optimizer.minimize(-elbo)
    if options.sparse:
        return infer_op, elbo, pred2, likelihood, sentinel
    else:
        return infer_op, elbo, pred, likelihood, sentinel
コード例 #12
0
    def __call__(self, inputs, state, scope=None):
        if not isinstance(state, CopyNetWrapperState):
            raise TypeError(
                "Expected state to be instance of CopyNetWrapperState. "
                "Received type %s instead." % type(state))
        last_ids = state.last_ids
        prob_c = state.prob_c
        cell_state = state.cell_state

        mask = tf.cast(
            tf.equal(tf.expand_dims(last_ids, 1), self._encoder_input_ids),
            tf.float32)
        mask_sum = tf.reduce_sum(mask, axis=1)
        mask = tf.where(tf.less(mask_sum, 1e-7), mask,
                        mask / tf.expand_dims(mask_sum, 1))
        rou = mask * prob_c
        selective_read = tf.einsum("ijk,ij->ik", self._encoder_states, rou)
        inputs = tf.concat([inputs, selective_read], 1)

        outputs, cell_state = self._cell(inputs, cell_state, scope)
        generate_score = self._projection(outputs)
        prob_g = generate_score

        copy_score = tf.einsum("ijk,km->ijm", self._encoder_states,
                               self._copy_weight)
        copy_score = tf.nn.tanh(copy_score)
        copy_score = tf.einsum("ijm,im->ij", copy_score, outputs)
        prob_c = copy_score
        """
        encoder_input_mask = tf.one_hot(self._encoder_input_ids, self._vocab_size)
        #expanded_copy_score = tf.einsum("ijn,ij->ij", encoder_input_mask, copy_score)

        prob_c_one_hot = tf.einsum("ijn,ij->in", encoder_input_mask, prob_c)
        """

        #Using sparse tensor

        batch_size, time_steps = tf.unstack(tf.shape(self._encoder_input_ids))

        inputs_flat = tf.reshape(self._encoder_input_ids, [-1])
        copy_score_flat = tf.reshape(copy_score, [-1])

        rr = tf.range(tf.cast(batch_size * time_steps, tf.int64),
                      dtype=tf.int64)
        indices = tf.stack([rr, tf.cast(inputs_flat, tf.int64)], axis=1)
        shape = tf.cast([batch_size * time_steps, self._vocab_size], tf.int64)
        expanded_copy_score_sparse_flat = tf.SparseTensor(
            indices, copy_score_flat, shape)

        expanded_copy_score_sparse = tf.sparse_reshape(
            expanded_copy_score_sparse_flat,
            [batch_size, time_steps, self._vocab_size])
        copy_score_sparse = tf.sparse_reduce_sum_sparse(
            expanded_copy_score_sparse, axis=1)
        prob_c_one_hot2 = tf.sparse_to_dense(copy_score_sparse.indices,
                                             copy_score_sparse.dense_shape,
                                             copy_score_sparse.values)
        """expanded_copy_score_flat = tf.sparse_to_dense(expanded_copy_score_sparse_flat.indices,expanded_copy_score_sparse_flat.dense_shape,expanded_copy_score_sparse_flat.values )
        expanded_copy_score = tf.reshape(expanded_copy_score_flat, [batch_size, time_steps, self._vocab_size])
        prob_c_one_hot3 = tf.reduce_sum(expanded_copy_score, axis=1)"""

        #prob_c_one_hot = tf.Print(prob_c_one_hot, [tf.reduce_max(tf.abs(tf.add(prob_c_one_hot3,-prob_c_one_hot2)))])

        prob_g_total = tf.pad(
            prob_g, [[0, 0], [0, self._vocab_size - self._gen_vocab_size]])
        outputs = prob_g_total + prob_c_one_hot2
        """
        Bugs tres bizzares:
        prob_c_one_hot est toujours egal a prob_c_one_hot3
        mais preplexite explose direct si je mets prob_c_one_hot3 à la place de prob_c_one_hot
        
        https://stackoverflow.com/questions/45348902/why-is-no-gradient-available-when-using-sparse-tensors-in-tensorflow:
        It turns out the sparse_to_dense operation (around which sparse_tensor_to_dense is a convenience wrapper) does not have a gradient in TensorFlow
        
        sparse_to_dense => scatter_nd ?
        
        prob_c_one_hot2 et prob_c_one_hot3 sont différents à 10-6 près environ... mais surement normal(correspond à float32 floating precision)
        """
        #pr = tf.reduce_min(tf.reshape(tf.add(prob_c_one_hot2,-prob_c_one_hot),[-1]))
        #outputs = tf.Print(outputs,[pr])

        last_ids = tf.argmax(outputs, axis=-1, output_type=tf.int32)
        last_ids.set_shape([None])
        state = CopyNetWrapperState(cell_state=cell_state,
                                    last_ids=last_ids,
                                    prob_c=prob_c)
        return outputs, state