def forward_incidence_matrix(self, normalization):
        if normalization[0] == "none":
            mtr_values = tf.to_float(tf.ones_like(self.receiver_indices))
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(tf.stack([self.receiver_indices,
                                       message_indices])))
            mtr_shape = tf.to_int64(
                tf.stack([self.vertex_count, self.edge_count]))

            tensor = tf.SparseTensor(indices=mtr_indices,
                                     values=mtr_values,
                                     dense_shape=mtr_shape)

            return tensor
        elif normalization[0] == "global":
            mtr_values = tf.to_float(
                tf.ones_like(self.receiver_indices)
            )  # mtr_values can be normalized weights, eg. intensities
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(tf.stack([self.receiver_indices,
                                       message_indices])))
            mtr_shape = tf.to_int64(
                tf.stack([self.vertex_count, self.edge_count]))

            tensor = tf.sparse_softmax(
                tf.SparseTensor(indices=mtr_indices,
                                values=mtr_values,
                                dense_shape=mtr_shape))

            return tensor
        elif normalization[0] == "local":
            mtr_values = tf.to_float(tf.ones_like(self.receiver_indices))
            message_indices = tf.range(self.edge_count)

            mtr_indices = tf.to_int64(
                tf.transpose(
                    tf.stack([
                        self.message_types, self.receiver_indices,
                        message_indices
                    ])))
            mtr_shape = tf.to_int64(
                tf.stack(
                    [self.label_count * 2, self.vertex_count,
                     self.edge_count]))

            tensor = tf.sparse_softmax(
                tf.SparseTensor(indices=mtr_indices,
                                values=mtr_values,
                                dense_shape=mtr_shape))

            tensor = tf.sparse_reduce_sum_sparse(tensor, 0)

            return tensor
Esempio n. 2
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat_local,
                 adj_mat_global,
                 activation,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('my_attn'):
        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)
        seq_fts = seq

        latent_factor_size = 8
        nb_nodes = seq_fts.shape[1].value

        w_1 = glorot([seq_fts.shape[2].value, latent_factor_size])
        w_2 = glorot([3 * seq_fts.shape[2].value, latent_factor_size])

        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        #local neighbours
        logits = tf.add(f_1[0], tf.transpose(f_2[0]))
        logits_first = adj_mat_local * logits
        lrelu = tf.SparseTensor(indices=logits_first.indices,
                                values=tf.nn.leaky_relu(logits_first.values),
                                dense_shape=logits_first.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        neigh_embs = tf.sparse.sparse_dense_matmul(coefs, seq_fts)

        #non-local neighbours
        logits_global = adj_mat_global * logits
        lrelu_global = tf.SparseTensor(indices=logits_global.indices,
                                       values=tf.nn.leaky_relu(
                                           logits_global.values),
                                       dense_shape=logits_global.dense_shape)
        coefs_global = tf.sparse_softmax(lrelu_global)

        coefs_global = tf.sparse_reshape(coefs_global, [nb_nodes, nb_nodes])
        neigh_embs_global = tf.sparse.sparse_dense_matmul(
            coefs_global, seq_fts)

        neigh_embs_sum_1 = tf.matmul(
            tf.add(tf.add(seq_fts, neigh_embs), neigh_embs_global), w_1)
        neigh_embs_sum_2 = tf.matmul(
            tf.concat(
                [tf.concat([seq_fts, neigh_embs], axis=-1), neigh_embs_global],
                axis=-1), w_2)

        final_embs = activation(neigh_embs_sum_1) + activation(
            neigh_embs_sum_2)

        return final_embs
Esempio n. 3
0
    def call(self, inputs):
        ent_emb = inputs[0]
        rel_emb = inputs[1]
        adj = tf.SparseTensor(
            K.cast(K.squeeze(inputs[2], axis=0), dtype="int64"),
            K.ones_like(inputs[2][0, :, 0], dtype='float32'),
            (self.node_size, self.node_size))

        rel_adj = K.cast(K.squeeze(inputs[3], axis=0), dtype="int64")
        rel_adj = tf.SparseTensor(indices=rel_adj,
                                  values=tf.ones_like(rel_adj[:, 0],
                                                      dtype='float32'),
                                  dense_shape=(self.node_size, self.rel_size))
        rel_adj = tf.sparse_softmax(rel_adj)
        rel_features = tf.sparse_tensor_dense_matmul(rel_adj, rel_emb)

        ent_adj = K.cast(K.squeeze(inputs[4], axis=0), dtype="int64")
        ent_adj = tf.SparseTensor(indices=ent_adj,
                                  values=tf.ones_like(ent_adj[:, 0],
                                                      dtype='float32'),
                                  dense_shape=(self.node_size, self.node_size))
        ent_adj = tf.sparse_softmax(ent_adj)
        ent_features = tf.sparse_tensor_dense_matmul(ent_adj, ent_emb)

        features = K.concatenate([ent_features, rel_features])
        outputs = [self.activation(features)]

        for _ in range(self.depth):
            features_list = []
            for head in range(self.attn_heads):
                attention_kernel = self.attn_kernels[head]

                attn_for_self = K.dot(features, attention_kernel[0])
                attn_for_neighs = tf.transpose(
                    K.dot(features, attention_kernel[1]), [1, 0])

                att = tf.sparse_add(adj * attn_for_self, adj * attn_for_neighs)

                att = tf.SparseTensor(indices=att.indices,
                                      values=tf.nn.leaky_relu(att.values),
                                      dense_shape=att.dense_shape)
                att = tf.sparse_softmax(att)
                new_features = tf.sparse_tensor_dense_matmul(att, features)

                if self.use_bias:
                    new_features = K.bias_add(new_features, self.biases[head])
                features_list.append(new_features)

            if self.attn_heads_reduction == 'concat':
                features = K.concatenate(features_list)
            else:
                features = K.mean(K.stack(features_list), axis=0)

            features = self.activation(features)
            outputs.append(features)
        outputs = K.concatenate(outputs)
        return outputs
Esempio n. 4
0
def get_log_prob(model, action_placeholder, mask_placeholder):
    action_dim = 9
    logits = model

    indices = tf.where(mask_placeholder)
    values = tf.gather_nd(logits, indices)
    denseShape = tf.cast(tf.shape(logits), tf.int64)
    """THIS IS THE KEY: tensorflow will automatically set output probabilities to zero of undesignated entries in sparse vector"""
    sparseResult = tf.sparse_softmax(
        tf.SparseTensor(indices, values, denseShape))

    probability_dist = tf.scatter_nd(sparseResult.indices, sparseResult.values,
                                     sparseResult.dense_shape)
    #     probability_dist = probability_dist.set_shape(logits.shape)
    log_probability_dist = tf.scatter_nd(sparseResult.indices,
                                         tf.log(sparseResult.values),
                                         sparseResult.dense_shape)
    """Want to emulate this:"""
    #     probability_dist = tf.nn.softmax(logits)
    #     legal_pseudo_probability_dist = probability_dist*values
    #     legalprobability_dist = tf.divide(legal_pseudo_probability_dist, tf.reduce_sum(legal_pseudo_probability_dist, axis= 1))

    prod = tf.multiply(probability_dist,
                       tf.one_hot(action_placeholder, action_dim))

    entropy = -tf.reduce_sum(probability_dist * log_probability_dist, axis=1)

    log_prob = tf.log(tf.reduce_sum(prod, axis=1))
    #    log_prob = -tf.nn.sparse_softmax_cross_entropy_with_logits(labels= action_placeholder, logits= tf.SparseTensor(indices, values, denseShape))
    return log_prob, entropy
Esempio n. 5
0
    def attention(self, c, mem, existing_facts):
        with tf.variable_scope("attending") as scope:
            attending = tf.concat([
                c, mem, self.re_q, c * self.re_q, c * mem, (c - self.re_q)**2,
                (c - mem)**2
            ], 2)
            m1 = tf.matmul(
                attending * existing_facts,
                tf.tile(self.w_1, tf.stack([tf.shape(attending)[0], 1, 1
                                            ]))) * existing_facts
            bias_1 = self.b_1 * existing_facts
            tnhan = tf.nn.relu(m1 + bias_1)

            m2 = tf.matmul(
                tnhan,
                tf.tile(self.w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            bias_2 = self.b_2 * existing_facts
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
            softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_idx, softmax_gather,
                                          softmax_shape)

        return tf.expand_dims(
            tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)
Esempio n. 6
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat,
                 adj_all_mat,
                 adj_neig_mat,
                 N_target_mat,
                 activation,
                 nb_nodes,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('sp_attn'):

        if coef_drop != 0.0:
            adj_mat = tf.SparseTensor(indices=adj_mat.indices,
                                      values=tf.nn.dropout(
                                          adj_mat.values, 1.0 - coef_drop),
                                      dense_shape=adj_mat.dense_shape)
            adj_neig_mat = tf.SparseTensor(
                indices=adj_neig_mat.indices,
                values=tf.nn.dropout(adj_neig_mat.values, 1.0 - coef_drop),
                dense_shape=adj_neig_mat.dense_shape)

        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)

        seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)

        # simplest self-attention possible
        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1, (nb_nodes, 1))
        f_2 = tf.reshape(f_2, (nb_nodes, 1))

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        if in_drop != 0.0:
            seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)  ###HW

        out_bi = BILinear_pooling(adj_neig_mat, seq_fts)
        out_bi = dot(N_target_mat, out_bi, True)
        out_gat = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = (1 - FLAGS.alpha) * out_gat + FLAGS.alpha * out_bi

        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)

        return activation(ret)  # activation
Esempio n. 7
0
def maskedSoftmax(logits, mask):
    '''Computes the softmax of our logits, given that some moves are illegal
    Inputs:
        Masked softmax over dim 1
        param logits: [None, ac_dim]
        param mask: [None, ac_dim]
        
        ***This code is edited from code we found online***
        We do not want there to be any probability of making illegal moves. 
        Intuitively, we are computing softmax of our logits, but pretending that the only entries 
            are the legal ones.
        This is actually implemented via SparseTensor calculations.
        
    Returns: 
        result: [None, ac_dim] a sequence of probability distributions, with zero probability of illegal moves
    '''
    indices = tf.where(mask)
    values = tf.gather_nd(logits, indices)
    denseShape = tf.cast(tf.shape(logits), tf.int64)

    # Tensorflow will automatically set output probabilities to zero of
    # undesignated entries in sparse vector
    sparseResult = tf.sparse_softmax(
        tf.SparseTensor(indices, values, denseShape))

    result = tf.scatter_nd(sparseResult.indices, sparseResult.values,
                           sparseResult.dense_shape)
    result.set_shape(logits.shape)
    return result
Esempio n. 8
0
    def __call__(self, inputs):
        with tf.name_scope(self.name):
            x = inputs
            x = dropout_sparse(x, 1-self.dropout, self.features_nonzero)
            x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])  # この結果は, denseとなる

            # XWa = XWa_{self} + XWa_{neigh} と分ける
            att_self = tf.matmul(x, self.vars['attention_self'])  # (N,1)
            att_neigh = tf.matmul(x, self.vars['attention_neigh'])  # (N,1)

            # SparseTensor型で, attentionの重みを作成
            # 注意として, placeholderは入るまで分からないため, 型とか値とかは, この時点では, ? なので, 例えば, 
            #  att = tf.SparseTensor(indices=self.adj.indices, values=[att_self[self.adj.indices[i][0]]+att_neigh[self.adj.indices[i][1]] for i in range(self.adj.indices[0].value)], dense_shape=self.adj.dense_shape)
            # 上記は, self.adj.indices[0].valueがNoneゆえ, ループが回らなく, エラーとなる

            # (N,N)と(N,1)の要素積は, (N,1)を(N,N)の列方向に各列に掛けていったものになる
            # (N,N)と(1,N)の要素積は, (1,N)を(N,N)の行方向に各行に掛けていったものになる

            att_1 = self.adj.__mul__(tf.nn.leaky_relu(att_self, alpha=0.2))
            att_2 = self.adj.__mul__(tf.transpose(tf.nn.leaky_relu(att_neigh, alpha=0.2)))
            att = tf.sparse_add(att_1, att_2)

            del att_1,   att_2
            gc.collect()

            #att = tf.add(att_self, tf.transpose(att_neigh))  # (N,1) + (1,N) → (N,N)にbroadcastされるのを利用
            #att = self.adj.__mul__(att)  # 隣接行列と要素積を取ったもの(adjはsparseゆえ, __mul__でsparse型との要素積, 結果はsparseとなる)
            #att = tf.SparseTensor(indices=att.indices, values=tf.nn.leaky_relu(att.values), dense_shape=att.dense_shape)

            att = tf.sparse_softmax(att)

            x = tf.sparse_tensor_dense_matmul(att, x)
            outputs = self.act(x)
        return outputs
Esempio n. 9
0
def go():
    dense = tf.Variable([[0, 0, 10, 1, 0, 0], [0, 0, -2, 3, 0, 0]],
                        dtype=tf.float32)
    sm1 = tf.nn.softmax(dense)

    denseReplacing0WithNeg10 = tf.where(
        dense > 0.0, dense,
        tf.ones(tf.shape(dense), tf.float32) * (-10.0))
    sm2 = tf.nn.softmax(denseReplacing0WithNeg10)

    nz_indices = tf.where(tf.not_equal(dense, tf.constant(0,
                                                          dtype=tf.float32)))
    nz_values = tf.gather_nd(dense, nz_indices)
    sparse = tf.SparseTensor(nz_indices, nz_values, dense.get_shape())
    sm3 = tf.sparse_softmax(sparse)
    dm3a = tf.sparse_to_dense(sm3.indices, sm3.get_shape(), sm3.values)
    dm3b = tf.scatter_nd(sm3.indices, sm3.values, dense.get_shape())

    session = tf.Session()
    session.run(tf.global_variables_initializer())
    from tensorflow.python.framework import ops
    for v in nz_indices, nz_values, sparse, sm3, dm3a, dm3b:
        print 'gradient of op', v, ops.get_gradient_function(v.op)

    print 'dense sm - direct', session.run(sm1)
    print 'dense sm - with -10 trick', session.run(sm2)
    print 'sparse sm', session.run(sm3)
    print 'densified sparse sm - old', session.run(dm3a)
    print 'densified sparse sm - new', session.run(dm3a)
    def OLD_to_embedding(self, values, indices):
        if self.duplicate_policy == "average":
            pass
        elif self.duplicate_policy == "sum":
            from_indices = tf.range(
                self.variables.get_variable(self.variable_prefix +
                                            "n_centroids"))
            to_indices = indices
            stg_values = tf.to_float(tf.ones_like(from_indices))

            from_size = self.variables.get_variable(self.variable_prefix +
                                                    "n_centroids")
            to_size = self.variables.get_variable(self.variable_prefix +
                                                  "target_embedding_size")

            stg_indices = tf.to_int64(
                tf.transpose(tf.stack([from_indices, to_indices])))
            stg_shape = tf.to_int64([from_size, to_size])

            matrix = tf.sparse_softmax(
                tf.SparseTensor(indices=stg_indices,
                                values=stg_values,
                                dense_shape=stg_shape))

            return tf.sparse_tensor_dense_matmul(matrix, values)
        else:
            pass
Esempio n. 11
0
  def call(self, inputs):
    self_embedding, neigh_embedding, adj = inputs
    adj = _sparse_ones_like(adj)
    if self.renorm:
      eye = _sparse_eye(adj.dense_shape[0])
      adj = tf.sparse_concat(1, [eye, adj])

    if not self.renorm:
      from_all = self.dense(neigh_embedding)
      from_self = self.dense(self_embedding)
    else:
      all_embedding = tf.concat([self_embedding, neigh_embedding], 0)
      from_all = self.dense(all_embedding)
      from_self = from_all[:adj.dense_shape[0], :]

    self_weight = self.self_layer(from_self)
    all_weight = self.neigh_layer(from_all)
    coefficient = tf.sparse_add(adj * self_weight,
                                adj * tf.reshape(all_weight, [1, -1]))
    coefficient = tf.SparseTensor(
        coefficient.indices, tf.nn.leaky_relu(coefficient.values),
        coefficient.dense_shape)
    coefficient = tf.sparse_softmax(coefficient)

    output = tf.sparse_tensor_dense_matmul(coefficient, from_all)
    if not self.renorm:
      output = from_self + output
    if self.activation:
      output = self.activation(output)
    return output
Esempio n. 12
0
    def __call__(self, u_inputs, v_inputs, u_size, v_size):

        x = v_inputs
        adj_mat = self.adj_mat

        # simplest self-attention possible
        f_1 = tf.layers.conv1d(u_inputs, 1, 1)
        f_2 = tf.layers.conv1d(v_inputs, 1, 1)

        f_1 = tf.reshape(f_1, (u_size, 1))
        f_2 = tf.reshape(f_2, (v_size, 1))

        seq_fts = tf.layers.conv1d(x, self.output_dim, 1, use_bias=False)

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        coefs = tf.sparse_reshape(coefs, [u_size, v_size])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        print('--------vals.shape------', vals.shape)
        # vals = tf.expand_dims(vals, axis=0)
        # vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)
        return self.act(ret)  # activation
Esempio n. 13
0
File: layers.py Progetto: DASE4/CEAD
    def _call(self, inputs):

        seq_fts = tf.layers.conv1d(inputs, self.out_sz, 1, use_bias=False)

        # simplest self-attention possible
        f_1_t = tf.layers.conv1d(seq_fts, 1, 1)
        f_2_t = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1_t, (self.nb_nodes, 1))
        f_2 = tf.reshape(f_2_t, (self.nb_nodes, 1))

        f_1 = self.bias_mat * f_1
        f_2 = self.bias_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2,
        # here we make an assumption that our input is of batch size 1, and reshape appropriately.
        # The method will fail in all other cases!
        coefs = tf.sparse_reshape(coefs, [self.nb_nodes, self.nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, self.nb_nodes, self.out_sz])
        ret = self.act(tf.contrib.layers.bias_add(vals))

        return ret  # activation
Esempio n. 14
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat,
                 activation,
                 nb_nodes,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('sp_attn'):
        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)

        seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)

        # simplest self-attention possible
        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1, (nb_nodes, 1))
        f_2 = tf.reshape(f_2, (nb_nodes, 1))

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        if coef_drop != 0.0:
            coefs = tf.SparseTensor(indices=coefs.indices,
                                    values=tf.nn.dropout(
                                        coefs.values, 1.0 - coef_drop),
                                    dense_shape=coefs.dense_shape)
        if in_drop != 0.0:
            seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

        # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2,
        # here we make an assumption that our input is of batch size 1, and reshape appropriately.
        # The method will fail in all other cases!
        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)

        # residual connection
        if residual:
            if seq.shape[-1] != ret.shape[-1]:
                ret = ret + conv1d(seq, ret.shape[-1], 1)  # activation
            else:
                seq_fts = ret + seq

        if activation == None:  ## for the final layer
            return ret
        else:
            return activation(ret)  # activation
Esempio n. 15
0
 def avg(tensor, size):
     adj = K.cast(K.squeeze(tensor[0], axis=0), dtype="int64")
     adj = tf.SparseTensor(indices=adj,
                           values=tf.ones_like(adj[:, 0],
                                               dtype='float32'),
                           dense_shape=(node_size, size))
     adj = tf.sparse_softmax(adj)
     return tf.sparse_tensor_dense_matmul(adj, tensor[1])
Esempio n. 16
0
def attention_mechanism(name, v, W_s, W_d, V, cur_embed, left, right, n2n):
    # a_{i,j} \propto v^\top tanh (W_s (\mu_i + \mu_j))
    if name == 'linear':
        t = tf.sparse_tensor_dense_matmul(sp_a=edge,
                                          b=cur_embed)  # edge \in \R^{m, n}
        t = tf.matmul(t, W_s)  # m by 16
        t = tf.nn.tanh(t)
        t = tf.matmul(t, tf.reshape(v, [-1, 1]))  # m by 1
        sparse_attention = tf.SparseTensor(n2n.indices, tf.reshape(t, [-1]),
                                           n2n.dense_shape)
        sparse_attention = tf.sparse_softmax(sparse_attention)
    # a_{i,j} \propto v^\top tanh (W_s |\mu_i - \mu_j|)
    elif name == 'abs':
        t = tf.sparse_tensor_dense_matmul(sp_a=edge,
                                          b=cur_embed)  # edge \in \R^{m, n}
        t = tf.abs(t)
        t = tf.matmul(t, W_s)  # m by 16
        t = tf.nn.tanh(t)
        t = tf.matmul(t, tf.reshape(v, [-1, 1]))  # m by 1
        sparse_attention = tf.SparseTensor(n2n.indices, tf.reshape(t, [-1]),
                                           n2n.dense_shape)
        sparse_attention = tf.sparse_softmax(sparse_attention)
    # a_{i,j} \propto leakyrelu (\mu_i V \mu_j)
    elif name == 'bilinear':
        tl = tf.sparse_tensor_dense_matmul(sp_a=left, b=cur_embed)  # m by k
        tl = tf.matmul(tl, V)
        tr = tf.sparse_tensor_dense_matmul(sp_a=right, b=cur_embed)
        t = tf.reduce_sum(tf.multiply(tl, tr), 1, keep_dims=True)
        t = tf.keras.layers.LeakyReLU(t)
        sparse_attention = tf.SparseTensor(n2n.indices, tf.reshape(t, [-1]),
                                           n2n.dense_shape)
        sparse_attention = tf.sparse_softmax(sparse_attention)
    # a_{i,j} \propto v^\top tanh (W_s \mu_i + W_d \mu_j)
    if name == 'generalized_linear':
        tl = tf.sparse_tensor_dense_matmul(sp_a=left, b=cur_embed)  # m by k
        tl = tf.matmul(tl, W_s)
        tr = tf.sparse_tensor_dense_matmul(sp_a=right, b=cur_embed)
        tr = tf.matmul(tr, W_d)
        t = tf.nn.tanh(tf.add(tl, tr))
        t = tf.matmul(t, tf.reshape(v, [-1, 1]))
        sparse_attention = tf.SparseTensor(n2n.indices, tf.reshape(t, [-1]),
                                           n2n.dense_shape)
        sparse_attention = tf.sparse_softmax(sparse_attention)
    else:
        sys.exit(-1)
    return sparse_attention
Esempio n. 17
0
    def build_sparse_matrix_softmax(self, idx_non_zero_values, X,
                                    dense_shape_A):
        A = tf.SparseTensorValue(idx_non_zero_values, tf.squeeze(X),
                                 dense_shape_A)
        A = tf.sparse_reorder(A)  # n_edges x n_edges
        A = tf.sparse_softmax(A)

        return A
Esempio n. 18
0
 def testGradient(self):
     x_shape = [2, 5, 10]
     with self.test_session(use_gpu=False):
         for dtype in [np.float32, np.float64]:
             x_np = np.random.randn(*x_shape).astype(dtype)
             x_tf, nnz = _sparsify(x_np)
             y_tf = tf.sparse_softmax(x_tf)
             err = tf.test.compute_gradient_error(x_tf.values, (nnz,), y_tf.values, (nnz,))
             self.assertLess(err, 1e-4)
Esempio n. 19
0
 def build_sparse_matrix_softmax(self, idx_non_zero_values, X, dense_shape_A):
     A = tf.SparseTensorValue(idx_non_zero_values, tf.squeeze(X), dense_shape_A)
     A = tf.sparse_reorder(A)  # n_edges x n_edges
     A = tf.sparse_softmax(A)
     
     # dropout after softmax
     #A = tf.SparseTensorValue(indices=A.indices,
     #                         values=tf.nn.dropout(A.values, self.keep_prob),
     #                         dense_shape=A.dense_shape)
     return A
Esempio n. 20
0
 def testGradient(self):
   x_shape = [2, 5, 10]
   with self.test_session(use_gpu=False):
     for dtype in [np.float32, np.float64]:
       x_np = np.random.randn(*x_shape).astype(dtype)
       x_tf, nnz = _sparsify(x_np)
       y_tf = tf.sparse_softmax(x_tf)
       err = tf.test.compute_gradient_error(x_tf.values, (nnz,), y_tf.values,
                                            (nnz,))
       self.assertLess(err, 1e-4)
Esempio n. 21
0
    def sp_attn_head(self, seq, in_sz, out_sz, adj_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False,
                     layer_str="", sparse_inputs=False, reuse_scope=None):
        """ Sparse Attention Head for the GAT layer. Note: the variable scope is necessary to avoid
        variable duplication across snapshots"""

        with tf.variable_scope('struct_attn', reuse=reuse_scope):
            if sparse_inputs:
                weight_var = tf.get_variable("layer_" + str(layer_str) + "_weight_transform", shape=[in_sz, out_sz],
                                             dtype=tf.float32)
                new_temporal_weight_var = tf.get_variable("layer_" + str(layer_str) + "_new_weight_transform", shape=[out_sz, out_sz],
                                                dtype=tf.float32)
                try:
                    seq_fts = tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, weight_var), axis=0)  # [N, F]
                except:
                    seq_fts = tf.expand_dims(tf.matmul(seq, new_temporal_weight_var), axis=0)  # [N, F]
            else:
                seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False,
                                           name='layer_' + str(layer_str) + '_weight_transform', reuse=reuse_scope)

            # Additive self-attention.
            f_1 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a1', reuse=reuse_scope)
            f_2 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a2', reuse=reuse_scope)
            f_1 = tf.reshape(f_1, [-1, 1])  # [N, 1]
            f_2 = tf.reshape(f_2, [-1, 1])  # [N, 1]

            logits = tf.sparse_add(adj_mat * f_1, adj_mat * tf.transpose(f_2))  # adj_mat is [N, N] (sparse)

            leaky_relu = tf.SparseTensor(indices=logits.indices,
                                         values=self.leaky_relu(logits.values),
                                         dense_shape=logits.dense_shape)
            coefficients = tf.sparse_softmax(leaky_relu)  # [N, N] (sparse)

            if coef_drop != 0.0:
                coefficients = tf.SparseTensor(indices=coefficients.indices,
                                               values=tf.nn.dropout(coefficients.values, 1.0 - coef_drop),
                                               dense_shape=coefficients.dense_shape)  # [N, N] (sparse)
            if in_drop != 0.0:
                seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)  # [N, D]

            seq_fts = tf.squeeze(seq_fts)
            values = tf.sparse_tensor_dense_matmul(coefficients, seq_fts)
            values = tf.reshape(values, [-1, out_sz])
            values = tf.expand_dims(values, axis=0)
            ret = values  # [1, N, F]

            if residual:
                residual_wt = tf.get_variable("layer_" + str(layer_str) + "_residual_weight", shape=[in_sz, out_sz],
                                              dtype=tf.float32)
                if sparse_inputs:
                    ret = ret + tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, residual_wt),
                                               axis=0)  # [N, F] * [F, D] = [N, D].
                else:
                    ret = ret + tf.layers.conv1d(seq, out_sz, 1, use_bias=False,
                                                 name='layer_' + str(layer_str) + '_residual_weight', reuse=reuse_scope)
            return activation(ret)
Esempio n. 22
0
def attention_mechanism(features, graph_adj, adj_with_self_loops_indices,
                        coefficient_dropout_prob, weight_decay, name):
    # apply a feedforward network parametrized with a weight vector to the transformed features.
    input_dim = int(features.get_shape()[1])
    a_i = tf.get_variable(f"{name}-att_i", [input_dim, 1],
                          dtype=tf.float32,
                          initializer=tf.glorot_uniform_initializer(),
                          regularizer=slim.l2_regularizer(weight_decay))
    a_j = tf.get_variable(f"{name}-att_j", [input_dim, 1],
                          dtype=tf.float32,
                          initializer=tf.glorot_uniform_initializer(),
                          regularizer=slim.l2_regularizer(weight_decay))
    tf.add_to_collection(ATTENTION_WEIGHTS, a_i)
    tf.add_to_collection(ATTENTION_WEIGHTS, a_j)

    # dims: num_nodes x input_dim, input_dim, 1 -> num_nodes x 1
    att_i = tf.matmul(features, a_i)
    att_i = tf.contrib.layers.bias_add(att_i)
    # dims: num_nodes x input_dim, input_dim, 1 -> num_nodes x 1
    att_j = tf.matmul(features, a_j)
    att_j = tf.contrib.layers.bias_add(att_j)

    # Extracts the relevant attention coefficients with respect to the 1-hop neighbours of each node
    # Method: first extract all the attention coefficients of the left nodes of each edge, then those
    # of the right nodes and add them up.
    # The result is a list of relevant attention weights ordered in the same way as the edges in the
    # sparse adjacency matrix.
    # dims: num_nodes x 1, num_edges, num_nodes x 1, num_edges -> 1 x num_edges x 1
    attention_weights_of_edges = tf.gather(att_i, adj_with_self_loops_indices[0], axis=0) + \
                                 tf.gather(att_j, adj_with_self_loops_indices[1], axis=0)
    # dims: 1 x num_edges x 1 -> num_edges
    attention_weights_of_edges = tf.squeeze(attention_weights_of_edges)

    # blow list of attention weights up into a sparse matrix. Use the coordinates from the original
    # adjacency matrix to specify which attention weight belongs to which edge.
    # Simultaneously applies the LeakyReLU as given in the paper.
    # dims: num_nodes x num_nodes, num_edges -> num_nodes x num_nodes
    attention_weight_matrix = tf.SparseTensor(
        indices=graph_adj.indices,
        values=tf.nn.leaky_relu(attention_weights_of_edges, alpha=0.2),
        dense_shape=graph_adj.dense_shape)

    # finish the attention by normalizing coefficients using softmax
    attention_coefficients = tf.sparse_softmax(attention_weight_matrix)

    # apply dropout to attention coefficients, meaning that in every epoch a single node is only exposed to a
    # sampled subset of its neighbour
    attention_coefficients = tf.cond(
        tf.cast(coefficient_dropout_prob, tf.bool),
        true_fn=(lambda: dropout_supporting_sparse_tensors(
            attention_coefficients, 1.0 - coefficient_dropout_prob)),
        false_fn=(lambda: attention_coefficients))

    return attention_coefficients
Esempio n. 23
0
    def _init_weights(self):
        indices = np.vstack((self.graph.tocoo().row, self.graph.tocoo().col)).T

        self.values = tf.get_variable(
            'weights', shape=self.graph.tocoo().row.shape)

        weights_unnormalized = tf.SparseTensor(
            indices, self.values, [self.num_nodes, self.num_nodes])
        weights = tf.sparse_softmax(weights_unnormalized)

        return weights
Esempio n. 24
0
    def call(self, inputs):
        outputs = []
        features = inputs[0]
        rel_emb = inputs[1]
        adj = tf.SparseTensor(
            K.cast(K.squeeze(inputs[2], axis=0), dtype="int64"),
            K.ones_like(inputs[2][0, :, 0]), (self.node_size, self.node_size))
        sparse_indices = tf.squeeze(inputs[3], axis=0)
        sparse_val = tf.squeeze(inputs[4], axis=0)

        features = self.activation(features)
        outputs.append(features)

        for l in range(self.depth):
            features_list = []
            for head in range(self.attn_heads):
                attention_kernel = self.attn_kernels[l][head]
                rels_sum = tf.SparseTensor(indices=sparse_indices,
                                           values=sparse_val,
                                           dense_shape=(self.triple_size,
                                                        self.rel_size))

                rels_sum = tf.sparse_tensor_dense_matmul(rels_sum, rel_emb)
                neighs = K.gather(features, adj.indices[:, 1])
                selfs = K.gather(features, adj.indices[:, 0])

                rels_sum = tf.nn.l2_normalize(rels_sum, 1)
                bias = tf.reduce_sum(neighs * rels_sum, 1,
                                     keepdims=True) * rels_sum
                neighs = neighs - 2 * bias

                att = K.squeeze(K.dot(K.concatenate([selfs, neighs, rels_sum]),
                                      attention_kernel),
                                axis=-1)
                att = tf.SparseTensor(indices=adj.indices,
                                      values=att,
                                      dense_shape=adj.dense_shape)
                att = tf.sparse_softmax(att)

                new_features = tf.segment_sum(
                    neighs * K.expand_dims(att.values, axis=-1),
                    adj.indices[:, 0])
                features_list.append(new_features)

            if self.attn_heads_reduction == 'concat':
                features = K.concatenate(features_list)  # (N x KF')
            else:
                features = K.mean(K.stack(features_list), axis=0)

            features = self.activation(features)
            outputs.append(features)

        outputs = K.concatenate(outputs)
        return outputs
Esempio n. 25
0
 def compute_inference(self, node_features_in, sp_adj_matrix, is_training):
     adj_matrix_pred = self.edge_model.compute_inference(
         node_features_in, sp_adj_matrix, is_training)
     self.adj_matrix_pred = adj_matrix_pred
     adj_mask = get_sp_topk(adj_matrix_pred, sp_adj_matrix, self.nb_nodes,
                            self.topk)
     sp_adj_pred = tf.contrib.layers.dense_to_sparse(
         tf.multiply(adj_mask, tf.nn.leaky_relu(adj_matrix_pred)))
     sp_adj_pred = tf.sparse_softmax(sp_adj_pred)
     logits = self.node_model.compute_inference(node_features_in,
                                                sp_adj_pred, is_training)
     return logits, adj_matrix_pred
Esempio n. 26
0
    def attention(self, w_1, b_1, w_2, b_2, context_facts, current_mem, existing_facts, re_question_rnn):
        """
        Custom attention mechanism (constructing similarity measures between each fact, our
        current memory (i.e. the question vector), and the original question)
        :param context_facts: a [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all
                    the facts from the contexts.
        :param current_mem: a [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains
                    the current memory. It should be the same memory for all facts for accurate results.
        :param existing_facts: a [batch_size, maximum_sentence_count, 1] tensor that acts as a binary
                    mask for which facts exist and which do not.
        :return:
        """

        with tf.variable_scope("attending") as scope:
            # attending: the metrics by which we decide what to attend to
            attending = tf.concat([context_facts, current_mem, re_question_rnn,
                                   context_facts * re_question_rnn,     # compare each fact to the question
                                   context_facts * current_mem,         # compare each fact with memory
                                   (context_facts - re_question_rnn)**2,
                                   (context_facts - current_mem)**2], 2)

            # m1:  First layer of multiplied weights for the feed-forward network.
            # We tile the weights in order to manually broadcast, since tf.matmul does not automatically broadcast batch
            # matrix multiplication (as of TensorFlow 1.2).
            m1 = tf.matmul((attending * existing_facts), tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) \
                 * existing_facts

            # bias_1: A masked version of the first feed-forward layer's bias over only existing facts.
            bias_1 = b_1 * existing_facts

            # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; choosing relu was a design
            # choice intended to avoid issues with low gradient magnitude when the tanh returned values close to 1 or -1
            tnhan = tf.nn.relu(m1 + bias_1)

            # m2: Second layer of multiplied weights for the feed-forward network.
            m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            # bias_2: A masked version of the second feed-forward layer's bias.
            bias_2 = b_2 * existing_facts

            # norm_m2: A normalized version of the second layer of weights, which is used to help make sure the softmax
            # nonlinearity doesn't saturate.
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
            # We make norm_m2 a sparse tensor, then make it dense again after the operation.
            softmax_id = tf.where(tf.not_equal(norm_m2, 0))[:,:-1]
            softmax_gather = tf.gather_nd(norm_m2[...,0], softmax_id)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_id, softmax_gather, softmax_shape)

            res = tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)
            return res
Esempio n. 27
0
def attention(c, mem, existing_facts):
    """
    Custom attention mechanism.
    c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
        that contains all the facts from the contexts.
    mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
        contains the current memory. It should be the same memory for all facts for accurate results.
    existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
        acts as a binary mask for which facts exist and which do not.

    """
    with tf.variable_scope("attending") as scope:
        # attending: The metrics by which we decide what to attend to.
        attending = tf.concat(
            [c, mem, re_q, c * re_q, c * mem, (c - re_q)**2, (c - mem)**2], 2)

        # m1: First layer of multiplied weights for the feed-forward network.
        #     We tile the weights in order to manually broadcast, since tf.matmul does not
        #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
        m1 = tf.matmul(attending * existing_facts,
                       tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1
                                              ]))) * existing_facts
        # bias_1: A masked version of the first feed-forward layer's bias
        #     over only existing facts.

        bias_1 = b_1 * existing_facts

        # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
        #        choosing relu was a design choice intended to avoid issues with
        #        low gradient magnitude when the tanh returned values close to 1 or -1.
        tnhan = tf.nn.relu(m1 + bias_1)

        # m2: Second layer of multiplied weights for the feed-forward network.
        #     Still tiling weights for the same reason described in m1's comments.
        m2 = tf.matmul(tnhan,
                       tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

        # bias_2: A masked version of the second feed-forward layer's bias.
        bias_2 = b_2 * existing_facts

        # norm_m2: A normalized version of the second layer of weights, which is used
        #     to help make sure the softmax nonlinearity doesn't saturate.
        norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

        # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
        #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
        softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
        softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
        softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
        softmaxable = tf.SparseTensor(softmax_idx, softmax_gather,
                                      softmax_shape)
        return tf.expand_dims(
            tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)
Esempio n. 28
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat,
                 adj_hop1_all_mat,
                 adj_hop2_all_mat,
                 adj_hop1_neig_mat,
                 adj_hop2_neig_mat,
                 N_hop1_neig_mat,
                 N_hop2_neig_mat,
                 activation,
                 nb_nodes,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('sp_attn'):
        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)

        seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)

        # simplest self-attention possible
        ###this is the first layer of GAT
        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1, (nb_nodes, 1))
        f_2 = tf.reshape(f_2, (nb_nodes, 1))

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        if coef_drop != 0.0:
            coefs = tf.SparseTensor(indices=coefs.indices,
                                    values=tf.nn.dropout(
                                        coefs.values, 1.0 - coef_drop),
                                    dense_shape=coefs.dense_shape)
        if in_drop != 0.0:
            seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)

        return activation(ret)  # activation
Esempio n. 29
0
 def loop(curr_sample, new_h):
     # weight values are the sum of entropy and kl features
     W_vals = tf.gather(col_entropies, curr_sample, axis=1) + \
         tf.gather(kl_mat, curr_sample, axis=1)
     W_shape = [self.num_nodes, self.num_nodes]
     W = tf.SparseTensor(self.indices, W_vals, W_shape)
     Wnorm = tf.sparse_softmax(W)
     # propagate labels
     new_sample = tf.sparse_tensor_dense_matmul(
         Wnorm, tf.gather(h, curr_sample, axis=1))
     # append to result
     new_h = tf.concat([new_h, [new_sample]], 0)
     return [curr_sample + 1, new_h]
Esempio n. 30
0
 def add_sparse_att_layer(self, inlayer, dual_layer):
     dual_transform = tf.reshape(
         tf.layers.conv1d(tf.expand_dims(dual_layer, 0), 1, 1), (-1, 1))
     logits = tf.reshape(
         tf.nn.embedding_lookup(dual_transform, self.r_mat.values), [-1])
     lrelu = tf.SparseTensor(indices=self.r_mat.indices,
                             values=tf.nn.leaky_relu(logits),
                             dense_shape=self.r_mat.dense_shape)
     coefs = tf.sparse_softmax(lrelu)
     vals = tf.sparse_tensor_dense_matmul(coefs, inlayer)
     if self.act_func is None:
         return vals
     else:
         return self.act_func(vals)
    def evaluate(self, G_holdout, Y_holdout):
        """ Perform cross validation on the hold-out set.

    This calculates the mean absolute error.

    Parameters
    ----------
    G_holdout : tf.Tensor
       Sample metadata for the hold-out test dataset
    Y_holdout : tf.Tensor
       Dense feature table for the hold-out test dataset

    Returns
    -------
    mad : tf.Tensor
       Mean absolute deviation.  This represents the average error
       for each cell value in the matrix.
    """
        with tf.name_scope('evaluate'):

            # evaluate the accuracy
            holdout_count = tf.cast(tf.sparse_reduce_sum(Y_holdout, axis=1),
                                    dtype=tf.float32)
            obs_ids = tf.gather(Y_holdout.indices, 1, axis=1)
            samp_ids = tf.gather(Y_holdout.indices, 0, axis=1)

            g_data = tf.gather(G_holdout, samp_ids, axis=0)

            # Calculate predicted abundance
            Gpos = tf.concat([tf.ones([g_data.shape[0], 1]), g_data],
                             axis=1,
                             name='g_holdout')
            Vprime = tf.transpose(tf.gather(self.V, obs_ids, axis=1),
                                  name='V_holdout')
            # sparse matrix multiplication for positive samples
            y_pred = tf.reduce_sum(tf.multiply(Gpos, Vprime), axis=1)
            smax = tf.SparseTensorValue(indices=Y_holdout.indices,
                                        values=y_pred,
                                        dense_shape=Y_holdout.dense_shape)

            smax = tf.sparse_softmax(smax)

            holdout_count = tf.gather(holdout_count, samp_ids, axis=0)
            pred_values = tf.cast(tf.multiply(holdout_count, smax.values),
                                  tf.float32)

            Y_values = tf.cast(Y_holdout.values, tf.float32)
            mse = tf.reduce_mean(tf.squeeze(tf.abs(pred_values - Y_values)))
            return mse