def testSmallValuesShouldVanish(self): with self.test_session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x3_v2() # sum: # [ 2] # [.1 ] # [ 6 -.2] # two values should vanish: |.1| < .21, and |-.2| < .21 sp_sum = tf.sparse_add(sp_a, sp_b, thresh=0.21) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0]]) self.assertAllEqual(sum_out.values, [2, 6]) self.assertAllEqual(sum_out.shape, [3, 3]) # only .1 vanishes sp_sum = tf.sparse_add(sp_a, sp_b, thresh=0.11) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0], [2, 1]]) self.assertAllClose(sum_out.values, [2, 6, -.2]) self.assertAllEqual(sum_out.shape, [3, 3])
def testAddSparseDense(self): np.random.seed(1618) # Make it reproducible. n, m = np.random.randint(30, size=2) for dtype in [np.float32, np.float64, np.int64, np.complex64]: for index_dtype in [np.int32, np.int64]: rand_vals_np = np.random.randn(n, m).astype(dtype) dense_np = np.random.randn(n, m).astype(dtype) with self.test_session(use_gpu=False): sparse, unused_nnz = _sparsify(rand_vals_np, index_dtype=index_dtype) s = tf.sparse_add(sparse, tf.constant(dense_np)).eval() self.assertAllEqual(dense_np + rand_vals_np, s) self.assertTrue(s.dtype == dtype) # check commutativity s = tf.sparse_add(tf.constant(dense_np), sparse).eval() self.assertAllEqual(dense_np + rand_vals_np, s) self.assertTrue(s.dtype == dtype)
def next_batch(self): ''' Draw the next batch from from the combined switchable queue. ''' source, source_lengths, target, target_lengths = self._queue.dequeue_many(self._model_feeder.ph_batch_size) # Back to sparse, then subtract one to get the real labels sparse_labels = tf.contrib.layers.dense_to_sparse(target) neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
def sp_attn_head(seq, out_sz, adj_mat, activation, nb_nodes, in_drop=0.0, coef_drop=0.0, residual=False): with tf.name_scope('sp_attn'): if in_drop != 0.0: seq = tf.nn.dropout(seq, 1.0 - in_drop) seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False) # simplest self-attention possible f_1 = tf.layers.conv1d(seq_fts, 1, 1) f_2 = tf.layers.conv1d(seq_fts, 1, 1) f_1 = tf.reshape(f_1, (nb_nodes, 1)) f_2 = tf.reshape(f_2, (nb_nodes, 1)) f_1 = adj_mat * f_1 f_2 = adj_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) if coef_drop != 0.0: coefs = tf.SparseTensor(indices=coefs.indices, values=tf.nn.dropout( coefs.values, 1.0 - coef_drop), dense_shape=coefs.dense_shape) if in_drop != 0.0: seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop) # As tf.sparse_tensor_dense_matmul expects its arguments to have rank-2, # here we make an assumption that our input is of batch size 1, and reshape appropriately. # The method will fail in all other cases! coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes]) seq_fts = tf.squeeze(seq_fts) vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts) vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, nb_nodes, out_sz]) ret = tf.contrib.layers.bias_add(vals) # residual connection if residual: if seq.shape[-1] != ret.shape[-1]: ret = ret + conv1d(seq, ret.shape[-1], 1) # activation else: seq_fts = ret + seq return activation(ret) # activation
def setupQ(self, init): # only need R choose 2 parameters sparseshape = int(self.r*(self.r-1)/2) # get list of sparse indices for upper triangular minus diag # Get pairs of indices of positions indices = list(zip(*np.triu_indices(self.r,k=1))) indices = tf.constant([list(i) for i in indices], dtype=tf.int64) Q = [] # self.vs = [] for i in range(0, self.d): for j in range(0, self.n_out[i]): for k in range(0, self.n_in[i]): vname = self._name+str(i).zfill(4)+str(j).zfill(4)+str(k).zfill(4) myvar = None if i == 0 or i == self.d-1 or self.r == 1: # Vector for first and last cores of TT myvar = tf.get_variable(vname, shape=[self.r,1], initializer=init) # myvar = tf.nn.l2_normalize(myvar) # myvar = tf.nn.dropout(myvar, keep_prob=0.8) tmp = myvar else: # sparse representation for skew symm matrix myvar = tf.get_variable(vname, shape=[sparseshape,1], initializer=init) # myvar = tf.nn.dropout(myvar, keep_prob=0.8) #clipped = tf.clip_by_value(myvar, clip_value_min=-1., clip_value_max=1.) # dense rep striu = tf.SparseTensor(indices=indices, values=tf.squeeze(myvar), dense_shape=[self.r, self.r]) triu = tf.sparse_add(striu, tf.zeros(striu.dense_shape)) # skew symmetric A = triu - tf.transpose(triu) # tmp = tf.linalg.expm(A) # Cayley transform to Orthogonal SO(r) I = tf.eye(self.r) tmp = tf.matmul(I - A , tf.matrix_inverse(I + A)) # 43.58 secs # tmp = A # invapprox = tfpmath.pinv(I + self.r*A) # 58.09 secs # A2 = tf.matmul(self.r*A,self.r*A) # invapprox = I - self.r*A + A2 - tf.matmul(A2, self.r*A) # 63 secs # tmp = tf.matmul(I - A, invapprox) # tmp = tf.linalg.lstsq(I + A, I - A, fast=True, l2_regularizer=1e-8) # 57.4 secs # tmp = A # tmp = tf.linalg.expm(A) # crazy #tmp = tmp/tf.linalg.norm(tmp, ord=2) Q.append( tmp ) # self.vs.append(myvar) return Q
def sp_attn_head(self, seq, in_sz, out_sz, adj_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False, layer_str="", sparse_inputs=False, reuse_scope=None): """ Sparse Attention Head for the GAT layer. Note: the variable scope is necessary to avoid variable duplication across snapshots""" with tf.variable_scope('struct_attn', reuse=reuse_scope): if sparse_inputs: weight_var = tf.get_variable("layer_" + str(layer_str) + "_weight_transform", shape=[in_sz, out_sz], dtype=tf.float32) new_temporal_weight_var = tf.get_variable("layer_" + str(layer_str) + "_new_weight_transform", shape=[out_sz, out_sz], dtype=tf.float32) try: seq_fts = tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, weight_var), axis=0) # [N, F] except: seq_fts = tf.expand_dims(tf.matmul(seq, new_temporal_weight_var), axis=0) # [N, F] else: seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False, name='layer_' + str(layer_str) + '_weight_transform', reuse=reuse_scope) # Additive self-attention. f_1 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a1', reuse=reuse_scope) f_2 = tf.layers.conv1d(seq_fts, 1, 1, name='layer_' + str(layer_str) + '_a2', reuse=reuse_scope) f_1 = tf.reshape(f_1, [-1, 1]) # [N, 1] f_2 = tf.reshape(f_2, [-1, 1]) # [N, 1] logits = tf.sparse_add(adj_mat * f_1, adj_mat * tf.transpose(f_2)) # adj_mat is [N, N] (sparse) leaky_relu = tf.SparseTensor(indices=logits.indices, values=self.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefficients = tf.sparse_softmax(leaky_relu) # [N, N] (sparse) if coef_drop != 0.0: coefficients = tf.SparseTensor(indices=coefficients.indices, values=tf.nn.dropout(coefficients.values, 1.0 - coef_drop), dense_shape=coefficients.dense_shape) # [N, N] (sparse) if in_drop != 0.0: seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop) # [N, D] seq_fts = tf.squeeze(seq_fts) values = tf.sparse_tensor_dense_matmul(coefficients, seq_fts) values = tf.reshape(values, [-1, out_sz]) values = tf.expand_dims(values, axis=0) ret = values # [1, N, F] if residual: residual_wt = tf.get_variable("layer_" + str(layer_str) + "_residual_weight", shape=[in_sz, out_sz], dtype=tf.float32) if sparse_inputs: ret = ret + tf.expand_dims(tf.sparse_tensor_dense_matmul(seq, residual_wt), axis=0) # [N, F] * [F, D] = [N, D]. else: ret = ret + tf.layers.conv1d(seq, out_sz, 1, use_bias=False, name='layer_' + str(layer_str) + '_residual_weight', reuse=reuse_scope) return activation(ret)
def _call(self): x = self.input # dropout if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) # convolve supports = list() H = None for i in range(len(self.support)): if self.use_theta: if H != None: H = tf.sparse_add( H, self.support[i] * self.vars['theta_' + str(i)]) else: H = self.support[i] * self.vars['theta_' + str(i)] else: if not self.featureless: pre_sup = dot(x, self.vars['weights_' + str(i)], sparse=self.sparse_inputs) # print(x.get_shape()[1]) pre_sup_2 = dot(x, tf.eye(self.input_dim), sparse=self.sparse_inputs) else: pre_sup = self.vars['weights_' + str(i)] support = dot(self.support[i], pre_sup, sparse=True) supports.append(support) # print(tf.sparse_tensor_to_dense(x)) return_without_w1 = tf.sparse_tensor_dense_matmul( self.support[i], pre_sup_2) if self.use_theta: output = dot(H, dot(x, self.vars['weight'], sparse=self.sparse_inputs), sparse=True) else: output = tf.add_n(supports) # bias if self.bias: output += self.vars['bias'] print('relu_flag', self.relu_flag) # return self.act(output), self.act(output) if self.relu_flag == False: return output, return_without_w1 return self.act(output)
def sparseGating(inputs_, gates=2): indi = tf.cast(tf.math.top_k(inputs_, gates, sorted=False).indices, dtype=tf.int64) v = tf.math.top_k(inputs_, gates, sorted=False).values sparse_indices = slices_to_dims(indi) sparse = tf.SparseTensor(indices=sparse_indices, values=tf.reshape(v, [-1]), dense_shape=tf.cast(tf.shape(inputs_), dtype=tf.int64)) c = tf.zeros_like(inputs_) d = tf.sparse_add(c, sparse) z = tf.ones_like(inputs_) * -np.inf mask = tf.less_equal(d, tf.zeros_like(d)) new_tensor = tf.multiply(z, tf.cast(mask, dtype=tf.float32)) g = tf.where(tf.is_nan(new_tensor), tf.zeros_like(new_tensor), new_tensor) g = tf.sparse_add(g, sparse) b = Lambda(lambda a: g)(inputs_) return b
def testAddSelf(self): with self.test_session(use_gpu=False) as sess: for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): sp_sum = tf.sparse_add(sp_a, sp_b) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual(sum_out.indices, [[0, 1], [1, 0], [2, 0], [2, 1]]) self.assertAllEqual(sum_out.values, [2, 4, 6, 8]) self.assertAllEqual(sum_out.dense_shape, [3, 3])
def sp_attn_head(seq, out_sz, adj_mat, adj_hop1_all_mat, adj_hop2_all_mat, adj_hop1_neig_mat, adj_hop2_neig_mat, N_hop1_neig_mat, N_hop2_neig_mat, activation, nb_nodes, in_drop=0.0, coef_drop=0.0, residual=False): with tf.name_scope('sp_attn'): if in_drop != 0.0: seq = tf.nn.dropout(seq, 1.0 - in_drop) seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False) # simplest self-attention possible ###this is the first layer of GAT f_1 = tf.layers.conv1d(seq_fts, 1, 1) f_2 = tf.layers.conv1d(seq_fts, 1, 1) f_1 = tf.reshape(f_1, (nb_nodes, 1)) f_2 = tf.reshape(f_2, (nb_nodes, 1)) f_1 = adj_mat * f_1 f_2 = adj_mat * tf.transpose(f_2, [1, 0]) logits = tf.sparse_add(f_1, f_2) lrelu = tf.SparseTensor(indices=logits.indices, values=tf.nn.leaky_relu(logits.values), dense_shape=logits.dense_shape) coefs = tf.sparse_softmax(lrelu) if coef_drop != 0.0: coefs = tf.SparseTensor(indices=coefs.indices, values=tf.nn.dropout( coefs.values, 1.0 - coef_drop), dense_shape=coefs.dense_shape) if in_drop != 0.0: seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop) coefs = tf.sparse_reshape(coefs, [nb_nodes, nb_nodes]) seq_fts = tf.squeeze(seq_fts) vals = tf.sparse_tensor_dense_matmul(coefs, seq_fts) vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, nb_nodes, out_sz]) ret = tf.contrib.layers.bias_add(vals) return activation(ret) # activation
def testAddSelfAndNegation(self): with self.test_session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x3(negate=True) sp_sum = tf.sparse_add(sp_a, sp_b, 0.1) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual(sum_out.indices, np.empty([0, 2])) self.assertAllEqual(sum_out.values, []) self.assertAllEqual(sum_out.shape, [3, 3])
def build(self): features = tf.placeholder(dtype=tf.float32, shape=[None, self.config.num_features]) w = tf.Variable(tf.ones((self.config.num_features, 1))) adj = tf.placeholder( dtype=tf.float32, shape=[self.config.num_vertices, self.config.num_vertices]) source = tf.placeholder(dtype=tf.int32, shape=[1]) strengths = self.logistic_edge_strength_function( w, features) + self.config.small_epsilon A = tf.SparseTensor(tf.where( tf.greater(adj, tf.constant(0, dtype=tf.float32))), strengths[:, 0], dense_shape=tf.shape(adj, out_type=tf.int64)) # hack for bug in tensorflow because sparse_tensor_to_dense() does not have gradient A = tf.sparse_add(tf.zeros(tf.cast(A.dense_shape, tf.int32)), A) if self.hybrid_weights: row_sum = tf.reduce_sum(adj, axis=1) col_sum = tf.reduce_sum(adj, axis=0) W = 1 / 2 * (tf.matmul(A, (adj / tf.reshape(col_sum, (1, -1)))) + tf.matmul((adj / tf.reshape(row_sum, (-1, 1))), A)) W2 = 1 / 2 * (A + W) Q_prim = self.get_stochastic_transition_matrix(W2) else: Q_prim = self.get_stochastic_transition_matrix(A) Q = self.get_transition_matrix(Q_prim, source, self.config.alpha) p = self.iterative_page_rank(Q, self.config.epsilon, self.config.max_iter) if self.mode == 'training': vertices = tf.placeholder(dtype=tf.int32, shape=[None]) destinations = tf.placeholder(dtype=tf.int32, shape=[None]) l_set = tf.sets.set_difference( tf.expand_dims(vertices, axis=0), tf.sets.set_union(tf.expand_dims(destinations, axis=0), tf.expand_dims(source, axis=0))) l_set = tf.sparse_tensor_to_dense(l_set)[0] diff = self.get_differences(p, l_set, destinations) loss = tf.reduce_sum(tf.square(w)) + self.loss_function( diff, self.config.margin_loss) # loss = tf.nn.relu(diff) self.loss = tf.reduce_sum(loss) self.vertices = vertices self.destinations = destinations else: self.result = p self.features = features self.adj = adj self.source = source
def next_batch(self): ''' Draw the next batch from from the combined switchable queue. ''' source, source_lengths, target, target_lengths = self._queue.dequeue_many( self._model_feeder.ph_batch_size) # Back to sparse, then subtract one to get the real labels sparse_labels = tf.contrib.layers.dense_to_sparse(target) neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
def testAddSelfAndNegation(self): with self.test_session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x3(negate=True) sp_sum = tf.sparse_add(sp_a, sp_b, 0.1) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual(sum_out.indices, np.empty([0, 2])) self.assertAllEqual(sum_out.values, []) self.assertAllEqual(sum_out.dense_shape, [3, 3])
def testSparseTensorDenseAddGradients(self): np.random.seed(1618) # Make it reproducible. n, m = np.random.randint(30, size=2) rand_vals_np = np.random.randn(n, m).astype(np.float32) dense_np = np.random.randn(n, m).astype(np.float32) with self.test_session(use_gpu=False): sparse, nnz = _sparsify(rand_vals_np) dense = tf.constant(dense_np, dtype=tf.float32) s = tf.sparse_add(sparse, dense) err = tf.test.compute_gradient_error([sparse.values, dense], [(nnz,), (n, m)], s, (n, m)) self.assertLess(err, 1e-3)
def testAddSelf(self): with self.test_session(use_gpu=False) as sess: for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): sp_sum = tf.sparse_add(sp_a, sp_b) sum_out = sess.run(sp_sum) self.assertEqual(sp_sum.dense_shape.get_shape(), [2]) self.assertAllEqual( sum_out.indices, [[0, 1], [1, 0], [2, 0], [2, 1]]) self.assertAllEqual(sum_out.values, [2, 4, 6, 8]) self.assertAllEqual(sum_out.shape, [3, 3])
def _apply_sparse(self, grad, var): """ :param tf.IndexedSlices grad: :param tf.Variable var: :return: group of update operations :rtype: tf.Operation """ beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype) lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype) mu_t = tf.cast(self._mu_t, var.dtype.base_dtype) mu_t_next = tf.cast(self._mu_t_next, var.dtype.base_dtype) mu_prod_t_next = tf.cast(self._mu_prod_t_next, var.dtype.base_dtype) mu_prod_t_next2 = tf.cast(self._mu_prod_t_next2, var.dtype.base_dtype) m_prev = self.get_slot(var, "m") v_prev = self.get_slot(var, "v") # called m_t in paper m = beta1_t * m_prev m = tf.assign(m_prev, m, use_locking=self._use_locking) m = tf.scatter_add(m, grad.indices, (1 - beta1_t) * grad.values, use_locking=self._use_locking) m_update = m m_ = m / ( 1 - mu_prod_t_next2 ) # bias correction (with momentum schedule (include the next t+1)) # called n_t in paper v = beta2_t * v_prev v = tf.assign(v_prev, v, use_locking=self._use_locking) v = tf.scatter_add(v, grad.indices, (1 - beta2_t) * (grad.values * grad.values), use_locking=self._use_locking) v_update = v v_ = v / (1 - beta2_power) m__ = tf.sparse_add( mu_t_next * m_, tf.IndexedSlices((1 - mu_t) * grad.values / (1 - mu_prod_t_next), grad.indices, grad.dense_shape)) step = lr_t * m__ / (tf.sqrt(v_) + epsilon_t) var_update = tf.assign_sub(var, step, use_locking=self._use_locking) return tf.group(var_update, m_update, v_update)
def _weights_jac_a( self, X, loc, scale, ): one_minus_loc = 1 - loc if isinstance(X, tf.SparseTensor): const1 = tf.log(tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc)))) else: const1 = tf.log(X/(1-X)) const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1 const = const2 * scale * loc * one_minus_loc return const
def testGradients(self): np.random.seed(1618) # Make it reproducible. with self.test_session(use_gpu=False): for n in [10, 31]: for m in [4, 17]: sp_a, nnz_a = self._randomTensor([n, m], np.float32) sp_b, nnz_b = self._randomTensor([n, m], np.float32) sp_sum = tf.sparse_add(sp_a, sp_b) nnz_sum = len(sp_sum.values.eval()) err = tf.test.compute_gradient_error( [sp_a.values, sp_b.values], [(nnz_a, ), (nnz_b, )], sp_sum.values, (nnz_sum, )) self.assertLess(err, 1e-3)
def testSparseTensorDenseAddGradients(self): np.random.seed(1618) # Make it reproducible. n, m = np.random.randint(30, size=2) rand_vals_np = np.random.randn(n, m).astype(np.float32) dense_np = np.random.randn(n, m).astype(np.float32) with self.test_session(use_gpu=False): sparse, nnz = _sparsify(rand_vals_np) dense = tf.constant(dense_np, dtype=tf.float32) s = tf.sparse_add(sparse, dense) err = tf.test.compute_gradient_error([sparse.values, dense], [(nnz, ), (n, m)], s, (n, m)) self.assertLess(err, 1e-3)
def testGradients(self): np.random.seed(1618) # Make it reproducible. with self.test_session(use_gpu=False): for n in [10, 31]: for m in [4, 17]: sp_a, nnz_a = self._randomTensor([n, m], np.float32) sp_b, nnz_b = self._randomTensor([n, m], np.float32) sp_sum = tf.sparse_add(sp_a, sp_b) nnz_sum = len(sp_sum.values.eval()) err = tf.test.compute_gradient_error([sp_a.values, sp_b.values], [(nnz_a,), (nnz_b,)], sp_sum.values, (nnz_sum,)) self.assertLess(err, 1e-3)
def _gen_negsample(self): self.model._create_loss() user_i_pos = tf.SparseTensor(indices=self.i_pos, values=tf.ones([tf.shape(self.i_pos)[0]],dtype=tf.float32), dense_shape = [tf.shape(self.user_input, out_type=tf.int64)[0],self.num_items]) # all_prob = tf.exp(self.model.all_logits) # all_prob_masked = tf.sparse_add(all_prob, user_i_pos*(-1)*all_prob) if not self.reduced: self.all_logits_masked = tf.sparse_add(self.model.all_logits / self.temperature, user_i_pos*(-np.inf)) # self.prob_negsample = all_prob_masked/(tf.reduce_sum(all_prob_masked,axis=1)[:,None]) # n * M i_pos -> prob=0 else: # for reduced sampling self.all_logits_masked = self.model.sampled_logits / self.temperature # self.negsamples = tf.reshape(tf.multinomial(self.all_logits_masked, self.num_neg, output_dtype=tf.int32), [-1, 1]) self.negsamples = tf.reshape(tf.multinomial(self.all_logits_masked, self.num_neg), [-1, 1])
def _call(self): x = self.input # dropout if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) # convolve supports = list() H = None for i in range(len(self.support)): if self.use_theta: if H != None: H = tf.sparse_add( H, self.support[i] * self.vars['theta_' + str(i)]) else: H = self.support[i] * self.vars['theta_' + str(i)] else: if not self.featureless: pre_sup = dot(x, self.vars['weights_' + str(i)], sparse=self.sparse_inputs) else: pre_sup = self.vars['weights_' + str(i)] support = dot(self.support[i], pre_sup, sparse=True) supports.append(support) if self.use_theta: output = dot(H, dot(x, self.vars['weight'], sparse=self.sparse_inputs), sparse=True) else: output = tf.add_n(supports) # output = tf.layers.batch_normalization(output, # axis=1, # center=True, # scale=True, # training=True) # bias if self.bias: output += self.vars['bias'] return self.act(output)
def _loss_helper(self, tvt): if logging_enabled == True: print("- Entered model::Model::_loss_helper Private Method") rtn = 0 # Weight decay loss. wdl = 0 for layer in self.layers: for var in layer.vars.values(): wdl = self.weight_decay * tf.nn.l2_loss(var) rtn += wdl if tvt == 'train': tf.compat.v1.summary.scalar('weight_decay_loss', wdl) task_loss_dict = self._task_loss(tvt) for loss_label, loss in task_loss_dict.items(): rtn += loss if tvt == 'train': tf.compat.v1.summary.scalar(loss_label, loss) if ec.graph_loss == '1st': node_emb_list = self._get_last_gcn_layer_outputs(tvt) laplacian_list = self._get_laplacians_for_graph_loss(tvt) gl = 0 for i, node_emb_mat in enumerate(node_emb_list): # gli = 2 * tf.trace( # dot(tf.transpose( # dot(laplacian_list[i], node_emb_mat, sparse=True)), # node_emb_mat)) # gl += gli mat = tf.matmul(node_emb_mat, tf.transpose(node_emb_mat)) gl += tf.sqrt( tf.reduce_sum( tf.square(tf.sparse_add(-mat, laplacian_list[i][0])))) gl /= ec.batch_size gl *= ec.graph_loss_alpha rtn += gl if tvt == 'train': tf.compat.v1.summary.scalar('1st_order_graph_loss', gl) if tvt == 'train': tf.compat.v1.summary.scalar('total_loss', rtn) return rtn
def build(self, input_shape): self.n_subsets = input_shape[1].value self.n_channels = input_shape[2].value self.n_vertices = int(np.log2(self.n_subsets)) self.ft = FourierTransform(self.n_vertices, self.model, dtype=self.dtype) if self.model == 5: self.fr = self.ft else: self.fr = FourierTransform(self.n_vertices, 1, dtype=self.dtype) self.coef_indices = np.concatenate( [np.zeros(1, dtype=np.int32), 2**np.arange(self.n_vertices)], axis=0) with tf.variable_scope(self.name, reuse=self._reuse) as scope: self.w = self.add_variable('w', shape=[ self.coef_indices.shape[0], self.n_channels, self.n_filters ], dtype=self.dtype, initializer=self.kernel_initializer, trainable=True) if self.use_bias: self.bias = self.add_variable( 'bias', shape=[1, 1, self.n_filters], dtype=self.dtype, trainable=True, initializer=tf.constant_initializer( np.ones((1, 1, self.n_filters)) * 0.01)) ind_subsets = self.coef_indices ind_channels = np.arange(self.n_channels) ind_filters = np.arange(self.n_filters) indices = list( itertools.product(ind_subsets, ind_channels, ind_filters)) indices = np.asarray(indices) values = tf.reshape(self.w, [-1]) W_sparse = tf.SparseTensor( indices, values, [self.n_subsets, self.n_channels, self.n_filters]) W_sdom = tf.sparse_add( tf.zeros(W_sparse.dense_shape, dtype=self.dtype), W_sparse) W_sdom = tf.transpose(W_sdom, [1, 0, 2]) self.W = self.fr.fft(W_sdom) #[n_chnanels, n_subsets, n_filters] self.W = tf.transpose(self.W, [1, 0, 2]) self.built = True
def attn_head(self, input, output_sz, num_nodes, adj, attn_drop, f_drop, activate=tf.nn.elu): with tf.name_scope('attn'): if f_drop != 0.0: input = tf.nn.dropout(input, 1.0 - f_drop) combined = tf.layers.conv1d(input, output_sz, 1, use_bias=False) f_1 = tf.layers.conv1d(combined, 1, 1) f_2 = tf.layers.conv1d(combined, 1, 1) f_1 = tf.reshape(f_1, (num_nodes, 1)) f_2 = tf.reshape(f_2, (num_nodes, 1)) f_1 = adj * f_1 f_2 = adj * tf.transpose(f_2, [1, 0]) output = tf.sparse_add(f_1, f_2) output = tf.SparseTensor(indices=output.indices, values=tf.nn.leaky_relu(output.values), dense_shape=output.dense_shape) coefs = tf.sparse_softmax(output) if attn_drop != 0.0: coefs = tf.SparseTensor(indices=coefs.indices, values=tf.nn.dropout( coefs.values, 1.0 - attn_drop), dense_shape=coefs.dense_shape) if f_drop != 0.0: combined = tf.nn.dropout(combined, 1.0 - f_drop) coefs = tf.sparse_reshape(coefs, [num_nodes, num_nodes]) combined = tf.squeeze(combined) vals = tf.sparse_tensor_dense_matmul(coefs, combined) vals = tf.expand_dims(vals, axis=0) vals.set_shape([1, num_nodes, output_sz]) ret = tf.contrib.layers.bias_add(vals) return activate(ret)
def getDecoderOutput(self, output, lstm_cell_size, token_vocab_size, out_weights, alpha_sentinel, encoder_input_sequence, batch_size, vocab_size, context, use_context_for_out=config.use_context_for_out): # outputs_list: list of tensor(batch_size, cell_size) with time_steps number of items # print "out_weights = ",out_weights w_out, b_out, w_context_out, b_context_out = out_weights alpha, sentinel_weight = alpha_sentinel # sentinel_weight: N, pred = tf.matmul(output, w_out) + b_out # (N,vocab_size) if use_context_for_out: pred += (tf.matmul(context, w_context_out) + b_context_out ) # (N,vocab_size) pred_softmax = tf.nn.softmax(pred) sentinel_weight = tf.expand_dims(sentinel_weight, 1) # N,1 pred = pred_softmax * sentinel_weight # g * rnnprob(w) r = tf.expand_dims(tf.range(batch_size), 1) encoder_length = tf.shape(encoder_input_sequence)[1] r = tf.tile(r, [1, encoder_length]) # batch_size, encoder_length r_concat = tf.stack([r, encoder_input_sequence], axis=2) # batch_size, encoder_length, 2 r_concat_flattened = tf.reshape( r_concat, [-1, 2]) # batch_size * encoder_length, 2 r_concat_flattened = tf.cast(r_concat_flattened, tf.int64) # alpha = alpha * (tf.ones(sentinel_weight.shape) ## sum of alpha is already (1-g) ## - sentinel_weight) # alpha: N,encoder_length. sentinel_weight: N,1 alpha_flattened = tf.reshape(alpha, [-1]) # batch_size * encoder_length alpha_flattened = alpha_flattened # batch_size * encoder_length dense_shape = np.array([batch_size, vocab_size], dtype=np.int64) pointer_probs = tf.SparseTensor(indices=r_concat_flattened, values=alpha_flattened, dense_shape=dense_shape) pred = tf.sparse_add(pred, pointer_probs) return pred # Note: these are probabiltiies. use sparse cross entropy with logits only after processing
def Test_Gradient(): indices = tf.placeholder(tf.int64, (None, 2)) values = tf.placeholder(tf.float32, (None, )) sparse_tensor = tf.SparseTensor(indices, values, (5, 7)) dense_tensor1 = tf.sparse_tensor_to_dense(sparse_tensor) dense_tensor2 = tf.scatter_nd(indices, values, shape=[5, 7]) dense_tensor3 = tf.sparse_add(tf.zeros((5, 7)), sparse_tensor) sum1 = tf.reduce_sum(dense_tensor1) sum2 = tf.reduce_sum(dense_tensor2) sum3 = tf.reduce_sum(dense_tensor3) print('dense_tensor1', tf.gradients(sum1, values)) #None print('dense_tensor2', tf.gradients( sum2, values)) #tf.Tensor 'gradients_1/ScatterNd_grad/GatherNd:0' print('dense_tensor3', tf.gradients( sum3, values)) #tf.Tensor 'gradients_2/SparseTensorDenseAdd_grad/GatherNd:0'
def __call__(self, x): mapped = self.net(x) batch_size = mapped.shape.as_list()[0] time_length = mapped.shape.as_list()[1] # Obtain mean and precision matrix components num_dim = len(mapped.shape.as_list()) perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2] mapped_transposed = tf.transpose(mapped, perm=perm) mapped_mean = mapped_transposed[:, :self.z_size] mapped_covar = mapped_transposed[:, self.z_size:] # tf.nn.sigmoid provides more stable performance on Physionet dataset if self.data_type == 'physionet': mapped_covar = tf.nn.sigmoid(mapped_covar) else: mapped_covar = tf.nn.softplus(mapped_covar) mapped_reshaped = tf.reshape(mapped_covar, [batch_size, self.z_size, 2*time_length]) dense_shape = [batch_size, self.z_size, time_length, time_length] idxs_1 = np.repeat(np.arange(batch_size), self.z_size*(2*time_length-1)) idxs_2 = np.tile(np.repeat(np.arange(self.z_size), (2*time_length-1)), batch_size) idxs_3 = np.tile(np.concatenate([np.arange(time_length), np.arange(time_length-1)]), batch_size*self.z_size) idxs_4 = np.tile(np.concatenate([np.arange(time_length), np.arange(1,time_length)]), batch_size*self.z_size) idxs_all = np.stack([idxs_1, idxs_2, idxs_3, idxs_4], axis=1) # ~10x times faster on CPU then on GPU with tf.device('/cpu:0'): # Obtain covariance matrix from precision one mapped_values = tf.reshape(mapped_reshaped[:, :, :-1], [-1]) prec_sparse = tf.sparse.SparseTensor(indices=idxs_all, values=mapped_values, dense_shape=dense_shape) prec_sparse = tf.sparse.reorder(prec_sparse) prec_tril = tf.sparse_add(tf.zeros(prec_sparse.dense_shape, dtype=tf.float32), prec_sparse) eye = tf.eye(num_rows=prec_tril.shape.as_list()[-1], batch_shape=prec_tril.shape.as_list()[:-2]) prec_tril = prec_tril + eye cov_tril = tf.linalg.triangular_solve(matrix=prec_tril, rhs=eye, lower=False) cov_tril = tf.where(tf.math.is_finite(cov_tril), cov_tril, tf.zeros_like(cov_tril)) num_dim = len(cov_tril.shape) perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2] cov_tril_lower = tf.transpose(cov_tril, perm=perm) z_dist = tfd.MultivariateNormalTriL(loc=mapped_mean, scale_tril=cov_tril_lower) return z_dist
def graph_attention_layer2(self, A, M, v, layer): with tf.variable_scope("layer_%s" % layer): f1 = tf.matmul(M, v[0]) f1 = A * f1 f2 = tf.matmul(M, v[1]) f2 = A * tf.transpose(f2, [1, 0]) logits = tf.sparse_add(f1, f2) unnormalized_attentions = tf.SparseTensor( indices=logits.indices, values=tf.nn.sigmoid(logits.values), dense_shape=logits.dense_shape) attentions = tf.sparse_softmax(unnormalized_attentions) attentions = tf.SparseTensor(indices=attentions.indices, values=attentions.values, dense_shape=attentions.dense_shape) return attentions
def setupQ(self, init): # only need R choose 2 parameters sparseshape = int(self.r * (self.r - 1) / 2) print(sparseshape) # get list of sparse indices for upper triangular minus diag # Get pairs of indices of positions indices = list(zip(*np.triu_indices(self.r, k=1))) indices = tf.constant([list(i) for i in indices], dtype=tf.int64) Q = [] self.vs = [] for i in range(0, self.d): for j in range(0, self.n[i]): vname = self._name + str(i) + str(j) if i == 0 or i == self.d - 1 or self.r == 1: # Vector for first and last cores of TT myvar = tf.get_variable(vname, shape=[self.r, 1], initializer=init) tmp = myvar else: # sparse representation for skew symm matrix myvar = tf.get_variable(vname, shape=[sparseshape, 1], initializer=init) # dense rep striu = tf.SparseTensor(indices=indices, values=tf.squeeze(myvar), dense_shape=[self.r, self.r]) triu = tf.sparse_add(striu, tf.zeros(striu.dense_shape)) # skew symmetric sksym = triu - tf.transpose(triu) # Cayley transform to Orthogonal SO(r) I = tf.eye(self.r) tmp = tf.matmul(I - sksym, tf.matrix_inverse(I + sksym)) Q.append(tmp) self.vs.append(myvar) return Q
def fc_layer(x, output_num, activation='none', dropout=None, name='fc'): weight = tf.get_variable( name, [47236, output_num], initializer=tf.random_normal_initializer(stddev=0.01)) bias = tf.Variable(tf.zeros(output_num)) y = tf.sparse_matmul(x, weight, a_is_sparse=True) y = tf.sparse_add(y, bias) if activation == 'relu': y = tf.nn.relu(y) elif activation == 'sigmoid': y = tf.nn.sigmoid(y) elif activation == 'none': pass if not dropout is None: y = tf.nn.dropout(y, dropout) return y
def __init__(self, sess, input_dim, emb_dim, decoder_num_steps): self.sess = sess self.input_dim = input_dim self.emb_dim = emb_dim self.decoder_num_steps = decoder_num_steps # define the input as a SparseTensor self.indices_x = tf.placeholder("int64", [None, 2]) self.values_x = tf.placeholder("float", [None]) self.dense_shape_x = tf.placeholder("int64", [2]) self.input_x = tf.SparseTensor(indices=self.indices_x, values=self.values_x, dense_shape=self.dense_shape_x) self.encoder_weight = tf.Variable( tf.truncated_normal([self.input_dim, self.emb_dim], stddev=1.0 / np.sqrt(self.input_dim))) # encode the input self.encode = tf.sparse_tensor_dense_matmul(self.input_x, self.encoder_weight) # decode by simulating decoder_num_steps projected subgradient updates self.step_size = tf.Variable(1.0) def decode_subgrad(x, W, num_steps, step_size): """ Simulates several steps of subgradient descent of an l1-min: x+ = x + step_size*(W^TW-I)sign(x) """ x = tf.matmul(x, W, transpose_b=True) for i in xrange(num_steps): x = x + ( tf.matmul(tf.matmul(tf.sign(x), W), W, transpose_b=True) - tf.sign(x)) * (step_size / (i + 1)) x = tf.layers.batch_normalization(x, axis=1) return tf.nn.relu(x) self.pred = decode_subgrad(self.encode, self.encoder_weight, self.decoder_num_steps, self.step_size) # define the squared loss self.sq_loss = tf.reduce_mean( tf.pow(tf.sparse_add(self.input_x, -self.pred), 2)) * self.input_dim self.learning_rate = tf.placeholder("float", []) self.sq_optim = tf.train.GradientDescentOptimizer( self.learning_rate).minimize(self.sq_loss)
def _s2d_add_vs_sparse_add(sparsity, n, m, num_iters=50): np.random.seed(1618) with tf.Session(graph=tf.Graph()) as sess: sp_vals = np.random.rand(n, m).astype(np.float32) sp_t, unused_nnz = _sparsify(sp_vals, thresh=sparsity, index_dtype=np.int32) vals = np.random.rand(n, m).astype(np.float32) s2d = tf.add(tf.sparse_tensor_to_dense(sp_t), tf.constant(vals)) sa = tf.sparse_add(sp_t, tf.constant(vals)) timeit.timeit(lambda: sess.run(s2d), number=3) timeit.timeit(lambda: sess.run(sa), number=3) s2d_total = timeit.timeit(lambda: sess.run(s2d), number=num_iters) sa_total = timeit.timeit(lambda: sess.run(sa), number=num_iters) # per-iter latency; secs to millis return s2d_total * 1e3 / num_iters, sa_total * 1e3 / num_iters
def get_loss_vat(inputs, predictions, mask, is_train, model, placeholders, predictions_var_scope): """Computes the virtual adversarial loss for the provided inputs. Args: inputs: A batch of input features, where the batch is the first dimension. predictions: The logits predicted by a model on the provided inputs. mask: A tensor of booleans specifying which samples to apply the virtual adversarial loss to. is_train: A boolean placeholder specifying if this is a training or testing setting. model: The model that generated the logits. placeholders: Placeholders for model encodings. predictions_var_scope: Variable scope for obtaining the predictions. Returns: A float value representing the virtual adversarial loss. """ mask = tf.cast(mask, dtype=tf.float32) r_vadv = generate_virtual_adversarial_perturbation(inputs, predictions, model, placeholders, mask, predictions_var_scope, is_train=is_train) predictions = tf.stop_gradient(predictions) logit_p = predictions new_inputs = tf.sparse_add(inputs, r_vadv) with tf.variable_scope(predictions_var_scope, auxiliary_name_scope=False, reuse=True): encoding_m, _, _ = model.get_encoding_and_params( inputs=new_inputs, is_train=is_train, update_batch_stats=False, **placeholders) logit_m, _, _ = model.get_predictions_and_params(encoding=encoding_m, is_train=is_train, **placeholders) num_non_zero = tf.reduce_sum(mask) loss = kl_divergence_with_logit(logit_p, logit_m, mask) return tf.reduce_sum(loss) / num_non_zero
def build_predictor(self, recall_at): self.eval_trainR = tf.sparse_placeholder(dtype=tf.float32, shape=[None, None], name='trainR_sparse') with tf.variable_scope("eval"): embedding_prod_cold = tf.matmul(self.U_embedding, self.V_embedding, transpose_b=True, name='pred_all_items') embedding_prod_warm = tf.sparse_add(embedding_prod_cold, self.eval_trainR) _, self.eval_preds_cold = tf.nn.top_k(embedding_prod_cold, k=recall_at[-1], sorted=True, name='topK_net_cold') _, self.eval_preds_warm = tf.nn.top_k(embedding_prod_warm, k=recall_at[-1], sorted=True, name='topK_net_warm')
def ir_Block(X, S1, S2, config): ka = config.ka k = config.k # Number of value iterations performed t = config.t ch_i = config.ch_i # Channels in input layer ch_h = config.ch_h # Channels in initial hidden layer ch_q = config.ch_q # Channels in q layer (~actions) state_batch_size = config.statebatchsize # k+1 state inputs for each channel img_s = config.imsize P = [] P_fb = [] # reference direction theta_init = np.array([np.pi*3.0/4.0, np.pi/2.0, np.pi/4.0, np.pi, 0.0, np.pi*5.0/4.0, np.pi*3.0/2.0, np.pi*7.0/4.0], dtype=np.float32) wi = [tf.Variable(np.random.randn(ka+1) * 0.01, dtype=tf.float32) for i in range(ch_q)] if config.fixed is True: thetai = [theta_init for i in range(ch_q)] theta_fb = [theta_init for i in range(ch_q)] else: thetai = [tf.Variable(np.random.random(ka) * 2*np.pi, dtype=tf.float32) for i in range(ch_q)] theta_fb = [tf.Variable(np.random.random(ka) * 2*np.pi, dtype=tf.float32) for i in range(ch_q)] # coefficients in paper eq. 5 (section 6.3) on forward path coeff = [] for i in range(ch_q): coeff_tmp = [] theta = 0.0 for j in range(ka): coeff_tmp.append(tf.reduce_sum(wi[i][:ka]*tf.cast(tf.pow(tf.div(1.0+tf.cos(theta - thetai[i]), 2.0), t), dtype=tf.float32)) + wi[i][ka]) theta += np.pi/4.0 coeff_tmp.insert(4, wi[i][ka]) coeff.append(tf.stack(coeff_tmp)) # coefficients in paper eq. 5 (section 6.3) on feedback path w_fb = [tf.Variable(np.random.randn(ka+1) * 0.01, dtype=tf.float32) for i in range(ch_q)] coeff_fb = [] for i in range(ch_q): coeff_tmp = [] theta = 0.0 for j in range(ka): coeff_tmp.append(tf.reduce_sum(w_fb[i][:ka]*tf.cast(tf.pow(tf.div(1.0+tf.cos(theta - thetai[i]), 2.0), t), dtype=tf.float32)) + w_fb[i][ka]) theta += np.pi/4.0 coeff_tmp.insert(4, w_fb[i][ka]) coeff_fb.append(tf.stack(coeff_tmp)) adj_M = adjecent_sparse(config.imsize, config.imsize) # obtain P (transition) and P_fb (transition for feedback channel) for j in range(ch_q): tmp_p = tf.sparse_add(tf.cast(tf.SparseTensor(adj_M[0][0], adj_M[0][1]*coeff[j][0], [img_s*img_s, img_s*img_s]), tf.float32), tf.cast(tf.SparseTensor(adj_M[1][0], adj_M[1][1]*coeff[j][1], [img_s*img_s, img_s*img_s]), tf.float32)) tmp_p_fb = tf.sparse_add(tf.cast(tf.SparseTensor(adj_M[0][0], adj_M[0][1]*coeff_fb[j][0], [img_s*img_s, img_s*img_s]), tf.float32), tf.cast(tf.SparseTensor(adj_M[1][0], adj_M[1][1]*coeff_fb[j][1], [img_s*img_s, img_s*img_s]), tf.float32)) for i in range(2, len(adj_M)): tmp_p = tf.sparse_add(tmp_p, tf.cast(tf.SparseTensor(adj_M[i][0], adj_M[i][1]*coeff[j][i], [img_s*img_s, img_s*img_s]), tf.float32)) tmp_p_fb = tf.sparse_add(tmp_p_fb, tf.cast(tf.SparseTensor(adj_M[i][0], adj_M[i][1]*coeff_fb[j][i], [img_s*img_s, img_s*img_s]), tf.float32)) P.append(tmp_p) P_fb.append(tmp_p_fb) bias = tf.Variable(np.random.randn(1, 1, 1, ch_h) * 0.01, dtype=tf.float32) # weights from inputs to q layer (~reward in Bellman equation) w0 = tf.Variable(np.random.randn(3, 3, ch_i, ch_h) * 0.01, dtype=tf.float32) w1 = tf.Variable(np.random.randn(1, 1, ch_h, 1) * 0.01, dtype=tf.float32) # feedback weights from v layer into q layer (~transition probabilities in Bellman equation) # only used when config.v is False w_o = tf.Variable(np.random.randn(ch_q, 8) * 0.01, dtype=tf.float32) # initial conv layer over image+reward prior h = conv2d_flipkernel(X, w0, name="h0") + bias r = conv2d_flipkernel(h, w1, name="r") r = tf.reshape(r, [-1, img_s * img_s, 1]) r_ = tf.reshape(r, [-1, img_s * img_s]) q = [] for i in range(ch_q): tmp = tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_))) q.append(tmp) q = tf.transpose(tf.stack(q), [1,2,0]) v = tf.reduce_max(q, axis=2, keep_dims=True, name="v") v_ = tf.reshape(v, [-1, img_s * img_s]) for i in range(0, k-1): q1, q2 = [], [] for i in range(ch_q): q1.append(tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_)))) q2.append(tf.transpose(tf.sparse_tensor_dense_matmul(P_fb[i], tf.transpose(v_)))) q1 = tf.transpose(tf.stack(q1), [1,2,0]) q2 = tf.transpose(tf.stack(q2), [1,2,0]) q = q1+q2 v = tf.reduce_max(q, axis=2, keep_dims=True, name="v") v_ = tf.reshape(v, [-1, img_s * img_s]) # do one last convolution q1, q2 = [], [] for i in range(ch_q): q1.append(tf.transpose(tf.sparse_tensor_dense_matmul(P[i], tf.transpose(r_)))) q2.append(tf.transpose(tf.sparse_tensor_dense_matmul(P_fb[i], tf.transpose(v_)))) q1 = tf.transpose(tf.stack(q1), [1,2,0]) q2 = tf.transpose(tf.stack(q2), [1,2,0]) q = q1+q2 q = tf.reshape(q, [-1, img_s, img_s, ch_q]) # CHANGE TO THEANO ORDERING # Since we are selecting over channels, it becomes easier to work with # the tensor when it is in NCHW format vs NHWC q = tf.transpose(q, perm=[0, 3, 1, 2]) if config.v is True: v = tf.reshape(v, [-1, img_s, img_s, 1]) v = tf.transpose(v, perm=[0, 3, 1, 2]) # Select the conv-net channels at the state position (S1,S2). # This intuitively corresponds to each channel representing an action, and the convnet the Q function. # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample # TODO: performance can be improved here by substituting expensive # transpose calls with better indexing for gather_nd bs = tf.shape(q)[0] rprn = tf.reshape(tf.tile(tf.reshape(tf.range(bs), [-1, 1]), [1, state_batch_size]), [-1]) ins1 = tf.cast(tf.reshape(S1, [-1]), tf.int32) ins2 = tf.cast(tf.reshape(S2, [-1]), tf.int32) idx_in = tf.transpose(tf.stack([ins1, ins2, rprn]), [1, 0]) if config.v is True: v_out = tf.transpose(extract_circle(rprn, ins1, ins2, v), [1,0,2]) v_out = tf.squeeze(v_out) logits = v_out else: q_out = tf.gather_nd(tf.transpose(q, [2, 3, 0, 1]), idx_in, name="q_out") logits = tf.matmul(q_out, w_o) # softmax output weights output = tf.nn.softmax(logits, name="output") return logits, output