def testSmallValuesShouldVanish(self):
    with self.test_session(use_gpu=False) as sess:
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x3_v2()

      # sum:
      # [       2]
      # [.1      ]
      # [ 6   -.2]

      # two values should vanish: |.1| < .21, and |-.2| < .21
      sp_sum = tf.sparse_add(sp_a, sp_b, thresh=0.21)
      sum_out = sess.run(sp_sum)

      self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
      self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0]])
      self.assertAllEqual(sum_out.values, [2, 6])
      self.assertAllEqual(sum_out.shape, [3, 3])

      # only .1 vanishes
      sp_sum = tf.sparse_add(sp_a, sp_b, thresh=0.11)
      sum_out = sess.run(sp_sum)

      self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
      self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0], [2, 1]])
      self.assertAllClose(sum_out.values, [2, 6, -.2])
      self.assertAllEqual(sum_out.shape, [3, 3])
  def testAddSparseDense(self):
    np.random.seed(1618)  # Make it reproducible.
    n, m = np.random.randint(30, size=2)
    for dtype in [np.float32, np.float64, np.int64, np.complex64]:
      for index_dtype in [np.int32, np.int64]:
        rand_vals_np = np.random.randn(n, m).astype(dtype)
        dense_np = np.random.randn(n, m).astype(dtype)

        with self.test_session(use_gpu=False):
          sparse, unused_nnz = _sparsify(rand_vals_np, index_dtype=index_dtype)
          s = tf.sparse_add(sparse, tf.constant(dense_np)).eval()
          self.assertAllEqual(dense_np + rand_vals_np, s)
          self.assertTrue(s.dtype == dtype)

          # check commutativity
          s = tf.sparse_add(tf.constant(dense_np), sparse).eval()
          self.assertAllEqual(dense_np + rand_vals_np, s)
          self.assertTrue(s.dtype == dtype)
Exemple #3
0
 def next_batch(self):
     '''
     Draw the next batch from from the combined switchable queue.
     '''
     source, source_lengths, target, target_lengths = self._queue.dequeue_many(self._model_feeder.ph_batch_size)
     # Back to sparse, then subtract one to get the real labels
     sparse_labels = tf.contrib.layers.dense_to_sparse(target)
     neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
     return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
Exemple #4
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat,
                 activation,
                 nb_nodes,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('sp_attn'):
        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)

        seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)

        # simplest self-attention possible
        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1, (nb_nodes, 1))
        f_2 = tf.reshape(f_2, (nb_nodes, 1))

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        if coef_drop != 0.0:
            coefs = tf.SparseTensor(indices=coefs.indices,
                                    values=tf.nn.dropout(
                                        coefs.values, 1.0 - coef_drop),
                                    dense_shape=coefs.dense_shape)
        if in_drop != 0.0:
            seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

        # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2,
        # here we make an assumption that our input is of batch size 1, and reshape appropriately.
        # The method will fail in all other cases!
        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)

        # residual connection
        if residual:
            if seq.shape[-1] != ret.shape[-1]:
                ret = ret + conv1d(seq, ret.shape[-1], 1)  # activation
            else:
                seq_fts = ret + seq

        return activation(ret)  # activation
    def testAddSparseDense(self):
        np.random.seed(1618)  # Make it reproducible.
        n, m = np.random.randint(30, size=2)
        for dtype in [np.float32, np.float64, np.int64, np.complex64]:
            for index_dtype in [np.int32, np.int64]:
                rand_vals_np = np.random.randn(n, m).astype(dtype)
                dense_np = np.random.randn(n, m).astype(dtype)

                with self.test_session(use_gpu=False):
                    sparse, unused_nnz = _sparsify(rand_vals_np,
                                                   index_dtype=index_dtype)
                    s = tf.sparse_add(sparse, tf.constant(dense_np)).eval()
                    self.assertAllEqual(dense_np + rand_vals_np, s)
                    self.assertTrue(s.dtype == dtype)

                    # check commutativity
                    s = tf.sparse_add(tf.constant(dense_np), sparse).eval()
                    self.assertAllEqual(dense_np + rand_vals_np, s)
                    self.assertTrue(s.dtype == dtype)
Exemple #6
0
    def setupQ(self, init):

        # only need R choose 2 parameters
        sparseshape = int(self.r*(self.r-1)/2)

        # get list of sparse indices for upper triangular minus diag
        # Get pairs of indices of positions
        indices = list(zip(*np.triu_indices(self.r,k=1)))
        indices = tf.constant([list(i) for i in indices], dtype=tf.int64)

        Q = []
        # self.vs = []
        for i in range(0, self.d):
            for j in range(0, self.n_out[i]):
                for k in range(0, self.n_in[i]):
                    vname = self._name+str(i).zfill(4)+str(j).zfill(4)+str(k).zfill(4)
                    myvar = None
                    if i == 0 or i == self.d-1 or self.r == 1:
                        # Vector for first and last cores of TT
                        myvar = tf.get_variable(vname, shape=[self.r,1], initializer=init)
                        # myvar = tf.nn.l2_normalize(myvar)
                        # myvar = tf.nn.dropout(myvar, keep_prob=0.8)
                        tmp = myvar
                    else:
                        # sparse representation for skew symm matrix
                        myvar = tf.get_variable(vname, shape=[sparseshape,1], initializer=init)
                        # myvar = tf.nn.dropout(myvar, keep_prob=0.8)

                        #clipped = tf.clip_by_value(myvar, clip_value_min=-1., clip_value_max=1.)

                        # dense rep
                        striu = tf.SparseTensor(indices=indices, values=tf.squeeze(myvar), dense_shape=[self.r, self.r])
                        triu = tf.sparse_add(striu, tf.zeros(striu.dense_shape)) 
                        
                        # skew symmetric
                        A = triu - tf.transpose(triu)
                        
                        # tmp = tf.linalg.expm(A)

                        # Cayley transform to Orthogonal SO(r)
                        I = tf.eye(self.r)
                        tmp = tf.matmul(I - A , tf.matrix_inverse(I + A)) # 43.58 secs
                        # tmp = A
                        # invapprox = tfpmath.pinv(I + self.r*A) # 58.09 secs
                        # A2 = tf.matmul(self.r*A,self.r*A)
                        # invapprox = I - self.r*A + A2 - tf.matmul(A2, self.r*A) # 63 secs
                        # tmp = tf.matmul(I - A, invapprox)
                        # tmp = tf.linalg.lstsq(I + A, I - A, fast=True, l2_regularizer=1e-8) # 57.4 secs
                        # tmp = A
                        # tmp = tf.linalg.expm(A) # crazy
                    
                    #tmp = tmp/tf.linalg.norm(tmp, ord=2)
                    Q.append( tmp )
                    # self.vs.append(myvar)
        return Q
Exemple #7
0
    def sp_attn_head(self, seq, in_sz, out_sz, adj_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False,
                     layer_str="", sparse_inputs=False, reuse_scope=None):
        """ Sparse Attention Head for the GAT layer. Note: the variable scope is necessary to avoid
        variable duplication across snapshots"""

        with tf.variable_scope('struct_attn', reuse=reuse_scope):
            if sparse_inputs:
                weight_var = tf.get_variable("layer_" + str(layer_str) + "_weight_transform", shape=[in_sz, out_sz],
                                             dtype=tf.float32)
                new_temporal_weight_var = tf.get_variable("layer_" + str(layer_str) + "_new_weight_transform", shape=[out_sz, out_sz],
                                                dtype=tf.float32)
                try:
                    seq_fts = tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, weight_var), axis=0)  # [N, F]
                except:
                    seq_fts = tf.expand_dims(tf.matmul(seq, new_temporal_weight_var), axis=0)  # [N, F]
            else:
                seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False,
                                           name='layer_' + str(layer_str) + '_weight_transform', reuse=reuse_scope)

            # Additive self-attention.
            f_1 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a1', reuse=reuse_scope)
            f_2 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a2', reuse=reuse_scope)
            f_1 = tf.reshape(f_1, [-1, 1])  # [N, 1]
            f_2 = tf.reshape(f_2, [-1, 1])  # [N, 1]

            logits = tf.sparse_add(adj_mat * f_1, adj_mat * tf.transpose(f_2))  # adj_mat is [N, N] (sparse)

            leaky_relu = tf.SparseTensor(indices=logits.indices,
                                         values=self.leaky_relu(logits.values),
                                         dense_shape=logits.dense_shape)
            coefficients = tf.sparse_softmax(leaky_relu)  # [N, N] (sparse)

            if coef_drop != 0.0:
                coefficients = tf.SparseTensor(indices=coefficients.indices,
                                               values=tf.nn.dropout(coefficients.values, 1.0 - coef_drop),
                                               dense_shape=coefficients.dense_shape)  # [N, N] (sparse)
            if in_drop != 0.0:
                seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)  # [N, D]

            seq_fts = tf.squeeze(seq_fts)
            values = tf.sparse_tensor_dense_matmul(coefficients, seq_fts)
            values = tf.reshape(values, [-1, out_sz])
            values = tf.expand_dims(values, axis=0)
            ret = values  # [1, N, F]

            if residual:
                residual_wt = tf.get_variable("layer_" + str(layer_str) + "_residual_weight", shape=[in_sz, out_sz],
                                              dtype=tf.float32)
                if sparse_inputs:
                    ret = ret + tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, residual_wt),
                                               axis=0)  # [N, F] * [F, D] = [N, D].
                else:
                    ret = ret + tf.layers.conv1d(seq, out_sz, 1, use_bias=False,
                                                 name='layer_' + str(layer_str) + '_residual_weight', reuse=reuse_scope)
            return activation(ret)
Exemple #8
0
    def _call(self):
        x = self.input

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1 - self.dropout)

        # convolve
        supports = list()
        H = None
        for i in range(len(self.support)):
            if self.use_theta:
                if H != None:
                    H = tf.sparse_add(
                        H, self.support[i] * self.vars['theta_' + str(i)])
                else:
                    H = self.support[i] * self.vars['theta_' + str(i)]
            else:
                if not self.featureless:
                    pre_sup = dot(x,
                                  self.vars['weights_' + str(i)],
                                  sparse=self.sparse_inputs)
                    #          print(x.get_shape()[1])
                    pre_sup_2 = dot(x,
                                    tf.eye(self.input_dim),
                                    sparse=self.sparse_inputs)
                else:
                    pre_sup = self.vars['weights_' + str(i)]
                support = dot(self.support[i], pre_sup, sparse=True)
                supports.append(support)

#           print(tf.sparse_tensor_to_dense(x))
            return_without_w1 = tf.sparse_tensor_dense_matmul(
                self.support[i], pre_sup_2)

        if self.use_theta:
            output = dot(H,
                         dot(x, self.vars['weight'],
                             sparse=self.sparse_inputs),
                         sparse=True)
        else:
            output = tf.add_n(supports)

        # bias
        if self.bias:
            output += self.vars['bias']
        print('relu_flag', self.relu_flag)

        #        return self.act(output), self.act(output)
        if self.relu_flag == False:
            return output, return_without_w1

        return self.act(output)
def sparseGating(inputs_, gates=2):
    indi = tf.cast(tf.math.top_k(inputs_, gates, sorted=False).indices,
                   dtype=tf.int64)
    v = tf.math.top_k(inputs_, gates, sorted=False).values

    sparse_indices = slices_to_dims(indi)
    sparse = tf.SparseTensor(indices=sparse_indices,
                             values=tf.reshape(v, [-1]),
                             dense_shape=tf.cast(tf.shape(inputs_),
                                                 dtype=tf.int64))
    c = tf.zeros_like(inputs_)
    d = tf.sparse_add(c, sparse)
    z = tf.ones_like(inputs_) * -np.inf
    mask = tf.less_equal(d, tf.zeros_like(d))
    new_tensor = tf.multiply(z, tf.cast(mask, dtype=tf.float32))

    g = tf.where(tf.is_nan(new_tensor), tf.zeros_like(new_tensor), new_tensor)
    g = tf.sparse_add(g, sparse)
    b = Lambda(lambda a: g)(inputs_)
    return b
    def testAddSelf(self):
        with self.test_session(use_gpu=False) as sess:
            for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
                for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
                    sp_sum = tf.sparse_add(sp_a, sp_b)

                    sum_out = sess.run(sp_sum)

                    self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
                    self.assertAllEqual(sum_out.indices, [[0, 1], [1, 0], [2, 0], [2, 1]])
                    self.assertAllEqual(sum_out.values, [2, 4, 6, 8])
                    self.assertAllEqual(sum_out.dense_shape, [3, 3])
Exemple #11
0
def sp_attn_head(seq,
                 out_sz,
                 adj_mat,
                 adj_hop1_all_mat,
                 adj_hop2_all_mat,
                 adj_hop1_neig_mat,
                 adj_hop2_neig_mat,
                 N_hop1_neig_mat,
                 N_hop2_neig_mat,
                 activation,
                 nb_nodes,
                 in_drop=0.0,
                 coef_drop=0.0,
                 residual=False):
    with tf.name_scope('sp_attn'):
        if in_drop != 0.0:
            seq = tf.nn.dropout(seq, 1.0 - in_drop)

        seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)

        # simplest self-attention possible
        ###this is the first layer of GAT
        f_1 = tf.layers.conv1d(seq_fts, 1, 1)
        f_2 = tf.layers.conv1d(seq_fts, 1, 1)

        f_1 = tf.reshape(f_1, (nb_nodes, 1))
        f_2 = tf.reshape(f_2, (nb_nodes, 1))

        f_1 = adj_mat * f_1
        f_2 = adj_mat * tf.transpose(f_2, [1, 0])

        logits = tf.sparse_add(f_1, f_2)
        lrelu = tf.SparseTensor(indices=logits.indices,
                                values=tf.nn.leaky_relu(logits.values),
                                dense_shape=logits.dense_shape)
        coefs = tf.sparse_softmax(lrelu)

        if coef_drop != 0.0:
            coefs = tf.SparseTensor(indices=coefs.indices,
                                    values=tf.nn.dropout(
                                        coefs.values, 1.0 - coef_drop),
                                    dense_shape=coefs.dense_shape)
        if in_drop != 0.0:
            seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

        coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes])
        seq_fts = tf.squeeze(seq_fts)
        vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts)
        vals = tf.expand_dims(vals, axis=0)
        vals.set_shape([1, nb_nodes, out_sz])
        ret = tf.contrib.layers.bias_add(vals)

        return activation(ret)  # activation
  def testAddSelfAndNegation(self):
    with self.test_session(use_gpu=False) as sess:
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x3(negate=True)

      sp_sum = tf.sparse_add(sp_a, sp_b, 0.1)
      sum_out = sess.run(sp_sum)

      self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
      self.assertAllEqual(sum_out.indices, np.empty([0, 2]))
      self.assertAllEqual(sum_out.values, [])
      self.assertAllEqual(sum_out.shape, [3, 3])
Exemple #13
0
    def build(self):
        features = tf.placeholder(dtype=tf.float32,
                                  shape=[None, self.config.num_features])
        w = tf.Variable(tf.ones((self.config.num_features, 1)))
        adj = tf.placeholder(
            dtype=tf.float32,
            shape=[self.config.num_vertices, self.config.num_vertices])
        source = tf.placeholder(dtype=tf.int32, shape=[1])

        strengths = self.logistic_edge_strength_function(
            w, features) + self.config.small_epsilon
        A = tf.SparseTensor(tf.where(
            tf.greater(adj, tf.constant(0, dtype=tf.float32))),
                            strengths[:, 0],
                            dense_shape=tf.shape(adj, out_type=tf.int64))
        # hack for bug in tensorflow because sparse_tensor_to_dense() does not have gradient
        A = tf.sparse_add(tf.zeros(tf.cast(A.dense_shape, tf.int32)), A)

        if self.hybrid_weights:
            row_sum = tf.reduce_sum(adj, axis=1)
            col_sum = tf.reduce_sum(adj, axis=0)
            W = 1 / 2 * (tf.matmul(A, (adj / tf.reshape(col_sum, (1, -1)))) +
                         tf.matmul((adj / tf.reshape(row_sum, (-1, 1))), A))
            W2 = 1 / 2 * (A + W)
            Q_prim = self.get_stochastic_transition_matrix(W2)
        else:
            Q_prim = self.get_stochastic_transition_matrix(A)

        Q = self.get_transition_matrix(Q_prim, source, self.config.alpha)
        p = self.iterative_page_rank(Q, self.config.epsilon,
                                     self.config.max_iter)

        if self.mode == 'training':
            vertices = tf.placeholder(dtype=tf.int32, shape=[None])
            destinations = tf.placeholder(dtype=tf.int32, shape=[None])
            l_set = tf.sets.set_difference(
                tf.expand_dims(vertices, axis=0),
                tf.sets.set_union(tf.expand_dims(destinations, axis=0),
                                  tf.expand_dims(source, axis=0)))
            l_set = tf.sparse_tensor_to_dense(l_set)[0]
            diff = self.get_differences(p, l_set, destinations)
            loss = tf.reduce_sum(tf.square(w)) + self.loss_function(
                diff, self.config.margin_loss)
            # loss = tf.nn.relu(diff)
            self.loss = tf.reduce_sum(loss)
            self.vertices = vertices
            self.destinations = destinations
        else:
            self.result = p

        self.features = features
        self.adj = adj
        self.source = source
Exemple #14
0
 def next_batch(self):
     '''
     Draw the next batch from from the combined switchable queue.
     '''
     source, source_lengths, target, target_lengths = self._queue.dequeue_many(
         self._model_feeder.ph_batch_size)
     # Back to sparse, then subtract one to get the real labels
     sparse_labels = tf.contrib.layers.dense_to_sparse(target)
     neg_ones = tf.SparseTensor(sparse_labels.indices,
                                -1 * tf.ones_like(sparse_labels.values),
                                sparse_labels.dense_shape)
     return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
    def testAddSelfAndNegation(self):
        with self.test_session(use_gpu=False) as sess:
            sp_a = self._SparseTensor_3x3()
            sp_b = self._SparseTensor_3x3(negate=True)

            sp_sum = tf.sparse_add(sp_a, sp_b, 0.1)
            sum_out = sess.run(sp_sum)

            self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
            self.assertAllEqual(sum_out.indices, np.empty([0, 2]))
            self.assertAllEqual(sum_out.values, [])
            self.assertAllEqual(sum_out.dense_shape, [3, 3])
    def testSparseTensorDenseAddGradients(self):
        np.random.seed(1618)  # Make it reproducible.
        n, m = np.random.randint(30, size=2)
        rand_vals_np = np.random.randn(n, m).astype(np.float32)
        dense_np = np.random.randn(n, m).astype(np.float32)

        with self.test_session(use_gpu=False):
            sparse, nnz = _sparsify(rand_vals_np)
            dense = tf.constant(dense_np, dtype=tf.float32)
            s = tf.sparse_add(sparse, dense)

            err = tf.test.compute_gradient_error([sparse.values, dense], [(nnz,), (n, m)], s, (n, m))
            self.assertLess(err, 1e-3)
Exemple #17
0
  def testAddSelf(self):
    with self.test_session(use_gpu=False) as sess:
      for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
        for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
          sp_sum = tf.sparse_add(sp_a, sp_b)

          sum_out = sess.run(sp_sum)

          self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
          self.assertAllEqual(
              sum_out.indices, [[0, 1], [1, 0], [2, 0], [2, 1]])
          self.assertAllEqual(sum_out.values, [2, 4, 6, 8])
          self.assertAllEqual(sum_out.shape, [3, 3])
Exemple #18
0
    def _apply_sparse(self, grad, var):
        """
    :param tf.IndexedSlices grad:
    :param tf.Variable var:
    :return: group of update operations
    :rtype: tf.Operation
    """
        beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype)
        lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)
        mu_t = tf.cast(self._mu_t, var.dtype.base_dtype)
        mu_t_next = tf.cast(self._mu_t_next, var.dtype.base_dtype)
        mu_prod_t_next = tf.cast(self._mu_prod_t_next, var.dtype.base_dtype)
        mu_prod_t_next2 = tf.cast(self._mu_prod_t_next2, var.dtype.base_dtype)

        m_prev = self.get_slot(var, "m")
        v_prev = self.get_slot(var, "v")

        # called m_t in paper
        m = beta1_t * m_prev
        m = tf.assign(m_prev, m, use_locking=self._use_locking)
        m = tf.scatter_add(m,
                           grad.indices, (1 - beta1_t) * grad.values,
                           use_locking=self._use_locking)
        m_update = m
        m_ = m / (
            1 - mu_prod_t_next2
        )  # bias correction (with momentum schedule (include the next t+1))

        # called n_t in paper
        v = beta2_t * v_prev
        v = tf.assign(v_prev, v, use_locking=self._use_locking)
        v = tf.scatter_add(v,
                           grad.indices,
                           (1 - beta2_t) * (grad.values * grad.values),
                           use_locking=self._use_locking)
        v_update = v
        v_ = v / (1 - beta2_power)

        m__ = tf.sparse_add(
            mu_t_next * m_,
            tf.IndexedSlices((1 - mu_t) * grad.values / (1 - mu_prod_t_next),
                             grad.indices, grad.dense_shape))

        step = lr_t * m__ / (tf.sqrt(v_) + epsilon_t)
        var_update = tf.assign_sub(var, step, use_locking=self._use_locking)

        return tf.group(var_update, m_update, v_update)
Exemple #19
0
 def _weights_jac_a(
         self,
         X,
         loc,
         scale,
 ):
     one_minus_loc = 1 - loc
     if isinstance(X, tf.SparseTensor):
         const1 = tf.log(tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))))
     else:
         const1 = tf.log(X/(1-X))
     const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
     const = const2 * scale * loc * one_minus_loc
     return const
    def testGradients(self):
        np.random.seed(1618)  # Make it reproducible.
        with self.test_session(use_gpu=False):
            for n in [10, 31]:
                for m in [4, 17]:
                    sp_a, nnz_a = self._randomTensor([n, m], np.float32)
                    sp_b, nnz_b = self._randomTensor([n, m], np.float32)
                    sp_sum = tf.sparse_add(sp_a, sp_b)
                    nnz_sum = len(sp_sum.values.eval())

                    err = tf.test.compute_gradient_error(
                        [sp_a.values, sp_b.values], [(nnz_a, ), (nnz_b, )],
                        sp_sum.values, (nnz_sum, ))
                    self.assertLess(err, 1e-3)
    def testSparseTensorDenseAddGradients(self):
        np.random.seed(1618)  # Make it reproducible.
        n, m = np.random.randint(30, size=2)
        rand_vals_np = np.random.randn(n, m).astype(np.float32)
        dense_np = np.random.randn(n, m).astype(np.float32)

        with self.test_session(use_gpu=False):
            sparse, nnz = _sparsify(rand_vals_np)
            dense = tf.constant(dense_np, dtype=tf.float32)
            s = tf.sparse_add(sparse, dense)

            err = tf.test.compute_gradient_error([sparse.values, dense],
                                                 [(nnz, ), (n, m)], s, (n, m))
            self.assertLess(err, 1e-3)
  def testGradients(self):
    np.random.seed(1618)  # Make it reproducible.
    with self.test_session(use_gpu=False):
      for n in [10, 31]:
        for m in [4, 17]:
          sp_a, nnz_a = self._randomTensor([n, m], np.float32)
          sp_b, nnz_b = self._randomTensor([n, m], np.float32)
          sp_sum = tf.sparse_add(sp_a, sp_b)
          nnz_sum = len(sp_sum.values.eval())

          err = tf.test.compute_gradient_error([sp_a.values, sp_b.values],
                                               [(nnz_a,), (nnz_b,)],
                                               sp_sum.values, (nnz_sum,))
          self.assertLess(err, 1e-3)
Exemple #23
0
    def _gen_negsample(self):
        self.model._create_loss()
        user_i_pos = tf.SparseTensor(indices=self.i_pos, values=tf.ones([tf.shape(self.i_pos)[0]],dtype=tf.float32),
                                     dense_shape = [tf.shape(self.user_input, out_type=tf.int64)[0],self.num_items])
        # all_prob = tf.exp(self.model.all_logits)
        # all_prob_masked = tf.sparse_add(all_prob, user_i_pos*(-1)*all_prob)
        if not self.reduced:
            self.all_logits_masked = tf.sparse_add(self.model.all_logits / self.temperature, user_i_pos*(-np.inf))
            # self.prob_negsample = all_prob_masked/(tf.reduce_sum(all_prob_masked,axis=1)[:,None]) # n * M i_pos -> prob=0
        else:
            # for reduced sampling
            self.all_logits_masked = self.model.sampled_logits / self.temperature

        # self.negsamples = tf.reshape(tf.multinomial(self.all_logits_masked, self.num_neg, output_dtype=tf.int32), [-1, 1])
        self.negsamples = tf.reshape(tf.multinomial(self.all_logits_masked, self.num_neg), [-1, 1])
Exemple #24
0
    def _call(self):
        x = self.input

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1 - self.dropout)

        # convolve
        supports = list()
        H = None
        for i in range(len(self.support)):
            if self.use_theta:
                if H != None:
                    H = tf.sparse_add(
                        H, self.support[i] * self.vars['theta_' + str(i)])
                else:
                    H = self.support[i] * self.vars['theta_' + str(i)]
            else:
                if not self.featureless:
                    pre_sup = dot(x,
                                  self.vars['weights_' + str(i)],
                                  sparse=self.sparse_inputs)
                else:
                    pre_sup = self.vars['weights_' + str(i)]
                support = dot(self.support[i], pre_sup, sparse=True)
                supports.append(support)

        if self.use_theta:
            output = dot(H,
                         dot(x, self.vars['weight'],
                             sparse=self.sparse_inputs),
                         sparse=True)
        else:
            output = tf.add_n(supports)

        # output = tf.layers.batch_normalization(output,
        #                               axis=1,
        #                               center=True,
        #                               scale=True,
        #                               training=True)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)
Exemple #25
0
    def _loss_helper(self, tvt):
        if logging_enabled == True:
            print("- Entered model::Model::_loss_helper Private Method")

        rtn = 0

        # Weight decay loss.
        wdl = 0
        for layer in self.layers:
            for var in layer.vars.values():
                wdl = self.weight_decay * tf.nn.l2_loss(var)
                rtn += wdl
        if tvt == 'train':
            tf.compat.v1.summary.scalar('weight_decay_loss', wdl)

        task_loss_dict = self._task_loss(tvt)
        for loss_label, loss in task_loss_dict.items():
            rtn += loss
            if tvt == 'train':
                tf.compat.v1.summary.scalar(loss_label, loss)

        if ec.graph_loss == '1st':
            node_emb_list = self._get_last_gcn_layer_outputs(tvt)
            laplacian_list = self._get_laplacians_for_graph_loss(tvt)
            gl = 0

            for i, node_emb_mat in enumerate(node_emb_list):
                # gli = 2 * tf.trace(
                #     dot(tf.transpose(
                #         dot(laplacian_list[i], node_emb_mat, sparse=True)),
                #         node_emb_mat))
                # gl += gli
                mat = tf.matmul(node_emb_mat, tf.transpose(node_emb_mat))
                gl += tf.sqrt(
                    tf.reduce_sum(
                        tf.square(tf.sparse_add(-mat, laplacian_list[i][0]))))

            gl /= ec.batch_size
            gl *= ec.graph_loss_alpha
            rtn += gl

            if tvt == 'train':
                tf.compat.v1.summary.scalar('1st_order_graph_loss', gl)

        if tvt == 'train':
            tf.compat.v1.summary.scalar('total_loss', rtn)

        return rtn
    def build(self, input_shape):
        self.n_subsets = input_shape[1].value
        self.n_channels = input_shape[2].value
        self.n_vertices = int(np.log2(self.n_subsets))
        self.ft = FourierTransform(self.n_vertices,
                                   self.model,
                                   dtype=self.dtype)
        if self.model == 5:
            self.fr = self.ft
        else:
            self.fr = FourierTransform(self.n_vertices, 1, dtype=self.dtype)

        self.coef_indices = np.concatenate(
            [np.zeros(1, dtype=np.int32), 2**np.arange(self.n_vertices)],
            axis=0)
        with tf.variable_scope(self.name, reuse=self._reuse) as scope:
            self.w = self.add_variable('w',
                                       shape=[
                                           self.coef_indices.shape[0],
                                           self.n_channels, self.n_filters
                                       ],
                                       dtype=self.dtype,
                                       initializer=self.kernel_initializer,
                                       trainable=True)
            if self.use_bias:
                self.bias = self.add_variable(
                    'bias',
                    shape=[1, 1, self.n_filters],
                    dtype=self.dtype,
                    trainable=True,
                    initializer=tf.constant_initializer(
                        np.ones((1, 1, self.n_filters)) * 0.01))
        ind_subsets = self.coef_indices
        ind_channels = np.arange(self.n_channels)
        ind_filters = np.arange(self.n_filters)
        indices = list(
            itertools.product(ind_subsets, ind_channels, ind_filters))
        indices = np.asarray(indices)
        values = tf.reshape(self.w, [-1])
        W_sparse = tf.SparseTensor(
            indices, values, [self.n_subsets, self.n_channels, self.n_filters])
        W_sdom = tf.sparse_add(
            tf.zeros(W_sparse.dense_shape, dtype=self.dtype), W_sparse)
        W_sdom = tf.transpose(W_sdom, [1, 0, 2])
        self.W = self.fr.fft(W_sdom)  #[n_chnanels, n_subsets, n_filters]
        self.W = tf.transpose(self.W, [1, 0, 2])
        self.built = True
Exemple #27
0
    def attn_head(self,
                  input,
                  output_sz,
                  num_nodes,
                  adj,
                  attn_drop,
                  f_drop,
                  activate=tf.nn.elu):

        with tf.name_scope('attn'):
            if f_drop != 0.0:
                input = tf.nn.dropout(input, 1.0 - f_drop)

            combined = tf.layers.conv1d(input, output_sz, 1, use_bias=False)

            f_1 = tf.layers.conv1d(combined, 1, 1)
            f_2 = tf.layers.conv1d(combined, 1, 1)

            f_1 = tf.reshape(f_1, (num_nodes, 1))
            f_2 = tf.reshape(f_2, (num_nodes, 1))

            f_1 = adj * f_1
            f_2 = adj * tf.transpose(f_2, [1, 0])

            output = tf.sparse_add(f_1, f_2)
            output = tf.SparseTensor(indices=output.indices,
                                     values=tf.nn.leaky_relu(output.values),
                                     dense_shape=output.dense_shape)

            coefs = tf.sparse_softmax(output)

            if attn_drop != 0.0:
                coefs = tf.SparseTensor(indices=coefs.indices,
                                        values=tf.nn.dropout(
                                            coefs.values, 1.0 - attn_drop),
                                        dense_shape=coefs.dense_shape)
            if f_drop != 0.0:
                combined = tf.nn.dropout(combined, 1.0 - f_drop)

            coefs = tf.sparse_reshape(coefs, [num_nodes, num_nodes])
            combined = tf.squeeze(combined)
            vals = tf.sparse_tensor_dense_matmul(coefs, combined)
            vals = tf.expand_dims(vals, axis=0)
            vals.set_shape([1, num_nodes, output_sz])
            ret = tf.contrib.layers.bias_add(vals)

        return activate(ret)
    def getDecoderOutput(self,
                         output,
                         lstm_cell_size,
                         token_vocab_size,
                         out_weights,
                         alpha_sentinel,
                         encoder_input_sequence,
                         batch_size,
                         vocab_size,
                         context,
                         use_context_for_out=config.use_context_for_out):
        # outputs_list: list of tensor(batch_size, cell_size) with time_steps number of items

        # print "out_weights = ",out_weights
        w_out, b_out, w_context_out, b_context_out = out_weights
        alpha, sentinel_weight = alpha_sentinel  # sentinel_weight: N,

        pred = tf.matmul(output, w_out) + b_out  # (N,vocab_size)
        if use_context_for_out:
            pred += (tf.matmul(context, w_context_out) + b_context_out
                     )  # (N,vocab_size)

        pred_softmax = tf.nn.softmax(pred)
        sentinel_weight = tf.expand_dims(sentinel_weight, 1)  # N,1
        pred = pred_softmax * sentinel_weight  # g * rnnprob(w)

        r = tf.expand_dims(tf.range(batch_size), 1)
        encoder_length = tf.shape(encoder_input_sequence)[1]
        r = tf.tile(r, [1, encoder_length])  # batch_size, encoder_length
        r_concat = tf.stack([r, encoder_input_sequence],
                            axis=2)  # batch_size, encoder_length, 2
        r_concat_flattened = tf.reshape(
            r_concat, [-1, 2])  # batch_size * encoder_length, 2
        r_concat_flattened = tf.cast(r_concat_flattened, tf.int64)
        # alpha = alpha * (tf.ones(sentinel_weight.shape) ## sum of alpha is already (1-g) ## - sentinel_weight)
        # alpha: N,encoder_length. sentinel_weight: N,1
        alpha_flattened = tf.reshape(alpha,
                                     [-1])  # batch_size * encoder_length
        alpha_flattened = alpha_flattened  # batch_size * encoder_length
        dense_shape = np.array([batch_size, vocab_size], dtype=np.int64)
        pointer_probs = tf.SparseTensor(indices=r_concat_flattened,
                                        values=alpha_flattened,
                                        dense_shape=dense_shape)
        pred = tf.sparse_add(pred, pointer_probs)

        return pred  # Note: these are probabiltiies. use sparse cross entropy with logits only after processing
def Test_Gradient():
    indices = tf.placeholder(tf.int64, (None, 2))
    values = tf.placeholder(tf.float32, (None, ))
    sparse_tensor = tf.SparseTensor(indices, values, (5, 7))
    dense_tensor1 = tf.sparse_tensor_to_dense(sparse_tensor)
    dense_tensor2 = tf.scatter_nd(indices, values, shape=[5, 7])
    dense_tensor3 = tf.sparse_add(tf.zeros((5, 7)), sparse_tensor)
    sum1 = tf.reduce_sum(dense_tensor1)
    sum2 = tf.reduce_sum(dense_tensor2)
    sum3 = tf.reduce_sum(dense_tensor3)

    print('dense_tensor1', tf.gradients(sum1, values))  #None
    print('dense_tensor2', tf.gradients(
        sum2, values))  #tf.Tensor 'gradients_1/ScatterNd_grad/GatherNd:0'
    print('dense_tensor3', tf.gradients(
        sum3,
        values))  #tf.Tensor 'gradients_2/SparseTensorDenseAdd_grad/GatherNd:0'
Exemple #30
0
    def __call__(self, x):
        mapped = self.net(x)

        batch_size = mapped.shape.as_list()[0]
        time_length = mapped.shape.as_list()[1]

        # Obtain mean and precision matrix components
        num_dim = len(mapped.shape.as_list())
        perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2]
        mapped_transposed = tf.transpose(mapped, perm=perm)
        mapped_mean = mapped_transposed[:, :self.z_size]
        mapped_covar = mapped_transposed[:, self.z_size:]

        # tf.nn.sigmoid provides more stable performance on Physionet dataset
        if self.data_type == 'physionet':
            mapped_covar = tf.nn.sigmoid(mapped_covar)
        else:
            mapped_covar = tf.nn.softplus(mapped_covar)

        mapped_reshaped = tf.reshape(mapped_covar, [batch_size, self.z_size, 2*time_length])

        dense_shape = [batch_size, self.z_size, time_length, time_length]
        idxs_1 = np.repeat(np.arange(batch_size), self.z_size*(2*time_length-1))
        idxs_2 = np.tile(np.repeat(np.arange(self.z_size), (2*time_length-1)), batch_size)
        idxs_3 = np.tile(np.concatenate([np.arange(time_length), np.arange(time_length-1)]), batch_size*self.z_size)
        idxs_4 = np.tile(np.concatenate([np.arange(time_length), np.arange(1,time_length)]), batch_size*self.z_size)
        idxs_all = np.stack([idxs_1, idxs_2, idxs_3, idxs_4], axis=1)

        # ~10x times faster on CPU then on GPU
        with tf.device('/cpu:0'):
            # Obtain covariance matrix from precision one
            mapped_values = tf.reshape(mapped_reshaped[:, :, :-1], [-1])
            prec_sparse = tf.sparse.SparseTensor(indices=idxs_all, values=mapped_values, dense_shape=dense_shape)
            prec_sparse = tf.sparse.reorder(prec_sparse)
            prec_tril = tf.sparse_add(tf.zeros(prec_sparse.dense_shape, dtype=tf.float32), prec_sparse)
            eye = tf.eye(num_rows=prec_tril.shape.as_list()[-1], batch_shape=prec_tril.shape.as_list()[:-2])
            prec_tril = prec_tril + eye
            cov_tril = tf.linalg.triangular_solve(matrix=prec_tril, rhs=eye, lower=False)
            cov_tril = tf.where(tf.math.is_finite(cov_tril), cov_tril, tf.zeros_like(cov_tril))

        num_dim = len(cov_tril.shape)
        perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2]
        cov_tril_lower = tf.transpose(cov_tril, perm=perm)
        z_dist = tfd.MultivariateNormalTriL(loc=mapped_mean, scale_tril=cov_tril_lower)
        return z_dist
Exemple #31
0
    def graph_attention_layer2(self, A, M, v, layer):
        with tf.variable_scope("layer_%s" % layer):
            f1 = tf.matmul(M, v[0])
            f1 = A * f1
            f2 = tf.matmul(M, v[1])
            f2 = A * tf.transpose(f2, [1, 0])
            logits = tf.sparse_add(f1, f2)
            unnormalized_attentions = tf.SparseTensor(
                indices=logits.indices,
                values=tf.nn.sigmoid(logits.values),
                dense_shape=logits.dense_shape)
            attentions = tf.sparse_softmax(unnormalized_attentions)

            attentions = tf.SparseTensor(indices=attentions.indices,
                                         values=attentions.values,
                                         dense_shape=attentions.dense_shape)

            return attentions
Exemple #32
0
    def setupQ(self, init):

        # only need R choose 2 parameters
        sparseshape = int(self.r * (self.r - 1) / 2)
        print(sparseshape)

        # get list of sparse indices for upper triangular minus diag
        # Get pairs of indices of positions
        indices = list(zip(*np.triu_indices(self.r, k=1)))
        indices = tf.constant([list(i) for i in indices], dtype=tf.int64)

        Q = []
        self.vs = []
        for i in range(0, self.d):
            for j in range(0, self.n[i]):
                vname = self._name + str(i) + str(j)
                if i == 0 or i == self.d - 1 or self.r == 1:
                    # Vector for first and last cores of TT
                    myvar = tf.get_variable(vname,
                                            shape=[self.r, 1],
                                            initializer=init)
                    tmp = myvar
                else:
                    # sparse representation for skew symm matrix
                    myvar = tf.get_variable(vname,
                                            shape=[sparseshape, 1],
                                            initializer=init)

                    # dense rep
                    striu = tf.SparseTensor(indices=indices,
                                            values=tf.squeeze(myvar),
                                            dense_shape=[self.r, self.r])
                    triu = tf.sparse_add(striu, tf.zeros(striu.dense_shape))

                    # skew symmetric
                    sksym = triu - tf.transpose(triu)

                    # Cayley transform to Orthogonal SO(r)
                    I = tf.eye(self.r)
                    tmp = tf.matmul(I - sksym, tf.matrix_inverse(I + sksym))

                Q.append(tmp)
                self.vs.append(myvar)
        return Q
Exemple #33
0
def fc_layer(x, output_num, activation='none', dropout=None, name='fc'):
    weight = tf.get_variable(
        name, [47236, output_num],
        initializer=tf.random_normal_initializer(stddev=0.01))
    bias = tf.Variable(tf.zeros(output_num))

    y = tf.sparse_matmul(x, weight, a_is_sparse=True)
    y = tf.sparse_add(y, bias)

    if activation == 'relu':
        y = tf.nn.relu(y)
    elif activation == 'sigmoid':
        y = tf.nn.sigmoid(y)
    elif activation == 'none':
        pass

    if not dropout is None:
        y = tf.nn.dropout(y, dropout)
    return y
Exemple #34
0
    def __init__(self, sess, input_dim, emb_dim, decoder_num_steps):
        self.sess = sess
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.decoder_num_steps = decoder_num_steps
        # define the input as a SparseTensor
        self.indices_x = tf.placeholder("int64", [None, 2])
        self.values_x = tf.placeholder("float", [None])
        self.dense_shape_x = tf.placeholder("int64", [2])
        self.input_x = tf.SparseTensor(indices=self.indices_x,
                                       values=self.values_x,
                                       dense_shape=self.dense_shape_x)
        self.encoder_weight = tf.Variable(
            tf.truncated_normal([self.input_dim, self.emb_dim],
                                stddev=1.0 / np.sqrt(self.input_dim)))
        # encode the input
        self.encode = tf.sparse_tensor_dense_matmul(self.input_x,
                                                    self.encoder_weight)
        # decode by simulating decoder_num_steps projected subgradient updates
        self.step_size = tf.Variable(1.0)

        def decode_subgrad(x, W, num_steps, step_size):
            """
            Simulates several steps of subgradient descent of an l1-min:
            x+ = x + step_size*(W^TW-I)sign(x)
            """
            x = tf.matmul(x, W, transpose_b=True)
            for i in xrange(num_steps):
                x = x + (
                    tf.matmul(tf.matmul(tf.sign(x), W), W, transpose_b=True) -
                    tf.sign(x)) * (step_size / (i + 1))
                x = tf.layers.batch_normalization(x, axis=1)
            return tf.nn.relu(x)

        self.pred = decode_subgrad(self.encode, self.encoder_weight,
                                   self.decoder_num_steps, self.step_size)
        # define the squared loss
        self.sq_loss = tf.reduce_mean(
            tf.pow(tf.sparse_add(self.input_x, -self.pred),
                   2)) * self.input_dim
        self.learning_rate = tf.placeholder("float", [])
        self.sq_optim = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.sq_loss)
Exemple #35
0
def _s2d_add_vs_sparse_add(sparsity, n, m, num_iters=50):
  np.random.seed(1618)

  with tf.Session(graph=tf.Graph()) as sess:
    sp_vals = np.random.rand(n, m).astype(np.float32)
    sp_t, unused_nnz = _sparsify(sp_vals, thresh=sparsity, index_dtype=np.int32)
    vals = np.random.rand(n, m).astype(np.float32)

    s2d = tf.add(tf.sparse_tensor_to_dense(sp_t), tf.constant(vals))
    sa = tf.sparse_add(sp_t, tf.constant(vals))

    timeit.timeit(lambda: sess.run(s2d), number=3)
    timeit.timeit(lambda: sess.run(sa), number=3)

    s2d_total = timeit.timeit(lambda: sess.run(s2d), number=num_iters)
    sa_total = timeit.timeit(lambda: sess.run(sa), number=num_iters)

  # per-iter latency; secs to millis
  return s2d_total * 1e3 / num_iters, sa_total * 1e3 / num_iters
Exemple #36
0
def get_loss_vat(inputs, predictions, mask, is_train, model, placeholders,
                 predictions_var_scope):
    """Computes the virtual adversarial loss for the provided inputs.

  Args:
    inputs: A batch of input features, where the batch is the first dimension.
    predictions: The logits predicted by a model on the provided inputs.
    mask: A tensor of booleans specifying which samples to apply the virtual
      adversarial loss to.
    is_train: A boolean placeholder specifying if this is a training or testing
      setting.
    model: The model that generated the logits.
    placeholders: Placeholders for model encodings.
    predictions_var_scope: Variable scope for obtaining the predictions.

  Returns:
    A float value representing the virtual adversarial loss.
  """
    mask = tf.cast(mask, dtype=tf.float32)
    r_vadv = generate_virtual_adversarial_perturbation(inputs,
                                                       predictions,
                                                       model,
                                                       placeholders,
                                                       mask,
                                                       predictions_var_scope,
                                                       is_train=is_train)
    predictions = tf.stop_gradient(predictions)
    logit_p = predictions
    new_inputs = tf.sparse_add(inputs, r_vadv)
    with tf.variable_scope(predictions_var_scope,
                           auxiliary_name_scope=False,
                           reuse=True):
        encoding_m, _, _ = model.get_encoding_and_params(
            inputs=new_inputs,
            is_train=is_train,
            update_batch_stats=False,
            **placeholders)
        logit_m, _, _ = model.get_predictions_and_params(encoding=encoding_m,
                                                         is_train=is_train,
                                                         **placeholders)
    num_non_zero = tf.reduce_sum(mask)
    loss = kl_divergence_with_logit(logit_p, logit_m, mask)
    return tf.reduce_sum(loss) / num_non_zero
Exemple #37
0
    def build_predictor(self, recall_at):
        self.eval_trainR = tf.sparse_placeholder(dtype=tf.float32,
                                                 shape=[None, None],
                                                 name='trainR_sparse')

        with tf.variable_scope("eval"):
            embedding_prod_cold = tf.matmul(self.U_embedding,
                                            self.V_embedding,
                                            transpose_b=True,
                                            name='pred_all_items')
            embedding_prod_warm = tf.sparse_add(embedding_prod_cold,
                                                self.eval_trainR)
            _, self.eval_preds_cold = tf.nn.top_k(embedding_prod_cold,
                                                  k=recall_at[-1],
                                                  sorted=True,
                                                  name='topK_net_cold')
            _, self.eval_preds_warm = tf.nn.top_k(embedding_prod_warm,
                                                  k=recall_at[-1],
                                                  sorted=True,
                                                  name='topK_net_warm')
Exemple #38
0
def ir_Block(X, S1, S2, config):
    ka   = config.ka
    k    = config.k    # Number of value iterations performed
    t    = config.t
    ch_i = config.ch_i # Channels in input layer
    ch_h = config.ch_h # Channels in initial hidden layer
    ch_q = config.ch_q # Channels in q layer (~actions)
    state_batch_size = config.statebatchsize # k+1 state inputs for each channel
    img_s = config.imsize
    P = []
    P_fb = []

    # reference direction
    theta_init = np.array([np.pi*3.0/4.0, np.pi/2.0, np.pi/4.0, np.pi, 0.0, np.pi*5.0/4.0, np.pi*3.0/2.0, np.pi*7.0/4.0], dtype=np.float32)

    wi = [tf.Variable(np.random.randn(ka+1) * 0.01, dtype=tf.float32) for i in range(ch_q)]
    if config.fixed is True:
        thetai = [theta_init for i in range(ch_q)]
        theta_fb = [theta_init for i in range(ch_q)]
    else:
        thetai = [tf.Variable(np.random.random(ka) * 2*np.pi, dtype=tf.float32) for i in range(ch_q)]
        theta_fb = [tf.Variable(np.random.random(ka) * 2*np.pi, dtype=tf.float32) for i in range(ch_q)]

    # coefficients in paper eq. 5 (section 6.3) on forward path
    coeff = []
    for i in range(ch_q):
        coeff_tmp = []
        theta = 0.0
        for j in range(ka):
            coeff_tmp.append(tf.reduce_sum(wi[i][:ka]*tf.cast(tf.pow(tf.div(1.0+tf.cos(theta - thetai[i]), 2.0), t), dtype=tf.float32)) + wi[i][ka])
            theta += np.pi/4.0
        coeff_tmp.insert(4, wi[i][ka])
        coeff.append(tf.stack(coeff_tmp))

    # coefficients in paper eq. 5 (section 6.3) on feedback path
    w_fb = [tf.Variable(np.random.randn(ka+1) * 0.01, dtype=tf.float32) for i in range(ch_q)]
    coeff_fb = []
    for i in range(ch_q):
        coeff_tmp = []
        theta = 0.0
        for j in range(ka):
            coeff_tmp.append(tf.reduce_sum(w_fb[i][:ka]*tf.cast(tf.pow(tf.div(1.0+tf.cos(theta - thetai[i]), 2.0), t), dtype=tf.float32)) + w_fb[i][ka])
            theta += np.pi/4.0
        coeff_tmp.insert(4, w_fb[i][ka])
        coeff_fb.append(tf.stack(coeff_tmp))
    adj_M = adjecent_sparse(config.imsize, config.imsize)

    # obtain P (transition) and P_fb (transition for feedback channel)
    for j in range(ch_q):
        tmp_p = tf.sparse_add(tf.cast(tf.SparseTensor(adj_M[0][0], adj_M[0][1]*coeff[j][0], [img_s*img_s, img_s*img_s]), tf.float32),
            tf.cast(tf.SparseTensor(adj_M[1][0], adj_M[1][1]*coeff[j][1], [img_s*img_s, img_s*img_s]), tf.float32))
        tmp_p_fb = tf.sparse_add(tf.cast(tf.SparseTensor(adj_M[0][0], adj_M[0][1]*coeff_fb[j][0], [img_s*img_s, img_s*img_s]), tf.float32),
            tf.cast(tf.SparseTensor(adj_M[1][0], adj_M[1][1]*coeff_fb[j][1], [img_s*img_s, img_s*img_s]), tf.float32))
        for i in range(2, len(adj_M)):
            tmp_p = tf.sparse_add(tmp_p, tf.cast(tf.SparseTensor(adj_M[i][0], adj_M[i][1]*coeff[j][i], [img_s*img_s, img_s*img_s]), tf.float32))
            tmp_p_fb = tf.sparse_add(tmp_p_fb, tf.cast(tf.SparseTensor(adj_M[i][0], adj_M[i][1]*coeff_fb[j][i], [img_s*img_s, img_s*img_s]), tf.float32))
        P.append(tmp_p)
        P_fb.append(tmp_p_fb)

    bias  = tf.Variable(np.random.randn(1, 1, 1, ch_h)    * 0.01, dtype=tf.float32)
    # weights from inputs to q layer (~reward in Bellman equation)
    w0    = tf.Variable(np.random.randn(3, 3, ch_i, ch_h) * 0.01, dtype=tf.float32)
    w1    = tf.Variable(np.random.randn(1, 1, ch_h, 1)    * 0.01, dtype=tf.float32)

    # feedback weights from v layer into q layer (~transition probabilities in Bellman equation)
    # only used when config.v is False
    w_o   = tf.Variable(np.random.randn(ch_q, 8)          * 0.01, dtype=tf.float32)

    # initial conv layer over image+reward prior
    h = conv2d_flipkernel(X, w0, name="h0") + bias

    r = conv2d_flipkernel(h, w1, name="r")
    r = tf.reshape(r, [-1, img_s * img_s, 1])
    r_ = tf.reshape(r, [-1, img_s * img_s])

    q = []
    for i in range(ch_q):
        tmp = tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_)))
        q.append(tmp)
    q = tf.transpose(tf.stack(q), [1,2,0])
    v = tf.reduce_max(q, axis=2, keep_dims=True, name="v")
    v_ = tf.reshape(v, [-1, img_s * img_s])

    for i in range(0, k-1):
        q1, q2 = [], []
        for i in range(ch_q):
            q1.append(tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_))))
            q2.append(tf.transpose(tf.sparse_tensor_dense_matmul(P_fb[i], tf.transpose(v_))))
        q1 = tf.transpose(tf.stack(q1), [1,2,0])
        q2 = tf.transpose(tf.stack(q2), [1,2,0])
        q = q1+q2
        v = tf.reduce_max(q, axis=2, keep_dims=True, name="v")
        v_ = tf.reshape(v, [-1, img_s * img_s])

    # do one last convolution
    q1, q2 = [], []
    for i in range(ch_q):
        q1.append(tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_))))
        q2.append(tf.transpose(tf.sparse_tensor_dense_matmul(P_fb[i], tf.transpose(v_))))
    q1 = tf.transpose(tf.stack(q1), [1,2,0])
    q2 = tf.transpose(tf.stack(q2), [1,2,0])
    q = q1+q2
    q = tf.reshape(q, [-1, img_s, img_s, ch_q])
    # CHANGE TO THEANO ORDERING
    # Since we are selecting over channels, it becomes easier to work with
    # the tensor when it is in NCHW format vs NHWC
    q = tf.transpose(q, perm=[0, 3, 1, 2])
    if config.v is True:
        v = tf.reshape(v, [-1, img_s, img_s, 1])
        v = tf.transpose(v, perm=[0, 3, 1, 2])

    # Select the conv-net channels at the state position (S1,S2).
    # This intuitively corresponds to each channel representing an action, and the convnet the Q function.
    # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample
    # TODO: performance can be improved here by substituting expensive
    #       transpose calls with better indexing for gather_nd
    bs = tf.shape(q)[0]
    rprn = tf.reshape(tf.tile(tf.reshape(tf.range(bs), [-1, 1]), [1, state_batch_size]), [-1])
    ins1 = tf.cast(tf.reshape(S1, [-1]), tf.int32)
    ins2 = tf.cast(tf.reshape(S2, [-1]), tf.int32)
    idx_in = tf.transpose(tf.stack([ins1, ins2, rprn]), [1, 0])

    if config.v is True:
        v_out = tf.transpose(extract_circle(rprn, ins1, ins2, v), [1,0,2])
        v_out = tf.squeeze(v_out)
        logits = v_out
    else:
        q_out = tf.gather_nd(tf.transpose(q, [2, 3, 0, 1]), idx_in, name="q_out")
        logits = tf.matmul(q_out, w_o)
    
    # softmax output weights
    output = tf.nn.softmax(logits, name="output")
    return logits, output