def chebyshev5(self, x, L, Fout, K, regularization=False): N, M, Fin = x.get_shape() N, M, Fin = int(N), int(M), int(Fin) # Rescale Laplacian and store as a TF sparse tensor. Copy to not modify the shared L. L = scipy.sparse.csr_matrix(L) L = graph.rescale_L(L, lmax=2) L = L.tocoo() indices = np.column_stack((L.row, L.col)) L = tf.SparseTensor(indices, L.data, L.shape) L = tf.sparse_reorder(L) # Transform to Chebyshev basis x0 = tf.transpose(x, perm=[1, 2, 0]) # M x Fin x N x0 = tf.reshape(x0, [M, Fin*N]) # M x Fin*N x = tf.expand_dims(x0, 0) # 1 x M x Fin*N def concat(x, x_): x_ = tf.expand_dims(x_, 0) # 1 x M x Fin*N return tf.concat(0, [x, x_]) # K x M x Fin*N if K > 1: x1 = tf.sparse_tensor_dense_matmul(L, x0) x = concat(x, x1) for k in range(2, K): x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0 # M x Fin*N x = concat(x, x2) x0, x1 = x1, x2 x = tf.reshape(x, [K, M, Fin, N]) # K x M x Fin x N x = tf.transpose(x, perm=[3,1,2,0]) # N x M x Fin x K x = tf.reshape(x, [N*M, Fin*K]) # N*M x Fin*K # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature pair. W = self._weight_variable([Fin*K, Fout], regularization=regularization) x = tf.matmul(x, W) # N*M x Fout return tf.reshape(x, [N, M, Fout]) # N x M x Fout
def _inference(self, x, dropout): with tf.name_scope('gconv1'): N, M = x.get_shape() # N: number of samples, M: number of features M = int(M) # Transform to Chebyshev basis xt0 = tf.transpose(x) # M x N xt = tf.expand_dims(xt0, 0) # 1 x M x N def concat(xt, x): x = tf.expand_dims(x, 0) # 1 x M x N return tf.concat([xt, x], axis=0) # K x M x N if self.K > 1: xt1 = tf.sparse_tensor_dense_matmul(self.L, xt0) xt = concat(xt, xt1) for k in range(2, self.K): xt2 = 2 * tf.sparse_tensor_dense_matmul(self.L, xt1) - xt0 # M x N xt = concat(xt, xt2) xt0, xt1 = xt1, xt2 xt = tf.transpose(xt) # N x M x K xt = tf.reshape(xt, [-1,self.K]) # NM x K # Filter W = self._weight_variable([self.K, self.F]) y = tf.matmul(xt, W) # NM x F y = tf.reshape(y, [-1, M, self.F]) # N x M x F # Bias and non-linearity # b = self._bias_variable([1, 1, self.F]) b = self._bias_variable([1, M, self.F]) y += b # N x M x F y = tf.nn.relu(y) with tf.name_scope('fc1'): W = self._weight_variable([self.F*M, NCLASSES]) b = self._bias_variable([NCLASSES]) y = tf.reshape(y, [-1, self.F*M]) y = tf.matmul(y, W) + b return y
def _build_fm(self): """Construct the factorization machine part for the model. This is a traditional 2-order FM module. Returns: obj: prediction score made by factorization machine. """ with tf.variable_scope("fm_part") as scope: x = tf.SparseTensor( self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape, ) xx = tf.SparseTensor( self.iterator.fm_feat_indices, tf.pow(self.iterator.fm_feat_values, 2), self.iterator.fm_feat_shape, ) fm_output = 0.5 * tf.reduce_sum( tf.pow(tf.sparse_tensor_dense_matmul(x, self.embedding), 2) - tf.sparse_tensor_dense_matmul(xx, tf.pow(self.embedding, 2)), 1, keep_dims=True, ) return fm_output
def _call(self, inputs): x = inputs x = dropout_sparse(x, 1-self.dropout, self.features_nonzero) x = tf.sparse_tensor_dense_matmul(x, self.vars['weights']) x = tf.sparse_tensor_dense_matmul(self.adj, x) outputs = self.act(x) return outputs
def _inference(self, x, dropout): with tf.name_scope('gconv1'): N, M = x.get_shape() # N: number of samples, M: number of features M = int(M) # Filter W = self._weight_variable([self.K, self.F]) def filter(xt, k): xt = tf.transpose(xt) # N x M xt = tf.reshape(xt, [-1, 1]) # NM x 1 w = tf.slice(W, [k,0], [1,-1]) # 1 x F y = tf.matmul(xt, w) # NM x F return tf.reshape(y, [-1, M, self.F]) # N x M x F xt0 = tf.transpose(x) # M x N y = filter(xt0, 0) if self.K > 1: xt1 = tf.sparse_tensor_dense_matmul(self.L, xt0) y += filter(xt1, 1) for k in range(2, self.K): xt2 = 2 * tf.sparse_tensor_dense_matmul(self.L, xt1) - xt0 # M x N y += filter(xt2, k) xt0, xt1 = xt1, xt2 # Bias and non-linearity # b = self._bias_variable([1, 1, self.F]) b = self._bias_variable([1, M, self.F]) y += b # N x M x F y = tf.nn.relu(y) with tf.name_scope('fc1'): W = self._weight_variable([self.F*M, NCLASSES]) b = self._bias_variable([NCLASSES]) y = tf.reshape(y, [-1, self.F*M]) y = tf.matmul(y, W) + b return y
def _call(self, inputs): x = inputs # if self.dropout > 0: x = dropout_sparse(x, 1-self.dropout, self.features_nonzero, dtype=self.dtype) x = tf.sparse_tensor_dense_matmul(tf.cast(x, tf.float32), tf.cast(self.vars['weights'], tf.float32)) x = tf.sparse_tensor_dense_matmul(tf.cast(self.adj, tf.float32), tf.cast(x, tf.float32)) outputs = tf.cast(self.act(x), self.dtype) return outputs
def build_model(self): dense_masker01 = tf.sparse_tensor_to_dense(self.mask) dense_masker02 = tf.sparse_tensor_to_dense(self.mask1) dense_masker03 = tf.sparse_tensor_to_dense(self.mask2) with tf.name_scope('encoding'): encoding = tf.add(tf.sparse_tensor_dense_matmul(self.X, self.W) , self.b, name= 'raw_values') encoded_values = self.enc_func(encoding, name = 'encoded_values') - self.enc_func(self.b) encoding1 = tf.add(tf.sparse_tensor_dense_matmul(self.X1, self.W) , self.b, name= 'raw_values1') encoded_values1 = self.enc_func(encoding1, name = 'encoded_values1') - self.enc_func(self.b) encoding2 = tf.add(tf.sparse_tensor_dense_matmul(self.X2, self.W) , self.b, name= 'raw_values2') encoded_values2 = self.enc_func(encoding2, name = 'encoded_values2') - self.enc_func(self.b) with tf.name_scope('decoding'): decoding = tf.nn.xw_plus_b(encoded_values, self.W_prime, self.b_prime) decoded_values = self.dec_func(decoding, name = 'decoded_values') decoding1 = tf.nn.xw_plus_b(encoded_values1, self.W_prime, self.b_prime) decoded_values1 = self.dec_func(decoding1, name = 'decoded_values1') decoding2 = tf.nn.xw_plus_b(encoded_values2, self.W_prime, self.b_prime) decoded_values2 = self.dec_func(decoding2, name = 'decoded_values2') masked_decoded_values = tf.multiply(dense_masker01, decoded_values) with tf.name_scope('training_process'): diff01 = tf.squared_difference(tf.sparse_tensor_to_dense(self.Y) , decoded_values) diff02 = tf.squared_difference(tf.sparse_tensor_to_dense(self.Y1) , decoded_values1) diff03 = tf.squared_difference(tf.sparse_tensor_to_dense(self.Y2) , decoded_values2) L_R = tf.reduce_sum( tf.multiply(dense_masker01, diff01)) \ + tf.reduce_sum( tf.multiply(dense_masker02, diff02)) \ + tf.reduce_sum( tf.multiply(dense_masker03, diff03)) L_T = tf.reduce_sum( tf.log(1+ tf.exp( tf.reduce_sum( tf.multiply(encoded_values, encoded_values2), 1) - tf.reduce_sum(tf.multiply(encoded_values, encoded_values1),1)))) error = L_R + self.alpha_enc * L_T reg = 0 for param in self.params.items(): reg += tf.nn.l2_loss(param[1])* self.lambda_w loss = error + reg model_params = [p for p in self.params.values()] train_step = self._optimize(loss, model_params) tf.summary.scalar('error', error) tf.summary.scalar('loss', loss) for param in self.params.items(): tf.summary.histogram(param[0], param[1]) merged_summary = tf.summary.merge_all() return encoded_values, decoded_values, masked_decoded_values, error, loss, train_step, merged_summary
def _build_fm(self, hparams): with tf.variable_scope("fm_part") as scope: x = tf.SparseTensor(self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape) xx = tf.SparseTensor(self.iterator.fm_feat_indices, tf.pow(self.iterator.fm_feat_values, 2), self.iterator.fm_feat_shape) fm_output = 0.5 * tf.reduce_sum( tf.pow(tf.sparse_tensor_dense_matmul(x, self.embedding), 2) - \ tf.sparse_tensor_dense_matmul(xx, tf.pow(self.embedding, 2)), 1, keep_dims=True) return fm_output
def __init__(self, field_sizes=None, embed_size=10, filter_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) init_vars.append(('f1', [embed_size, filter_sizes[0], 1, 2], 'xavier', dtype)) init_vars.append(('f2', [embed_size, filter_sizes[1], 2, 2], 'xavier', dtype)) init_vars.append(('w1', [2 * 3 * embed_size, 1], 'xavier', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) l = xw l = tf.transpose(tf.reshape(l, [-1, num_inputs, embed_size, 1]), [0, 2, 1, 3]) f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), int(num_inputs / 2)), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate( tf.reshape(l, [-1, embed_size * 3 * 2]), layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.matmul(l, w1) + b1 l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def _build_linear(self): """Construct the linear part for the model. This is a linear regression. Returns: obj: prediction score made by linear regression. """ with tf.variable_scope("linear_part", initializer=self.initializer) as scope: w = tf.get_variable( name="w", shape=[self.hparams.FEATURE_COUNT, 1], dtype=tf.float32 ) b = tf.get_variable( name="b", shape=[1], dtype=tf.float32, initializer=tf.zeros_initializer(), ) x = tf.SparseTensor( self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape, ) linear_output = tf.add(tf.sparse_tensor_dense_matmul(x, w), b) self.layer_params.append(w) self.layer_params.append(b) tf.summary.histogram("linear_part/w", w) tf.summary.histogram("linear_part/b", b) return linear_output
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) # 初始化变量w, b w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def dot(x, y, sparse=False): """Wrapper for tf.matmul (sparse vs dense).""" if sparse: res = tf.sparse_tensor_dense_matmul(x, y) else: res = tf.matmul(x, y) return res
def build_model(self): dense_masker = tf.sparse_tensor_to_dense(self.mask) with tf.name_scope('encoding'): encoding = tf.add(tf.sparse_tensor_dense_matmul(self.X, self.W) , self.b, name= 'raw_values') encoded_values = self.enc_func(encoding, name = 'encoded_values') with tf.name_scope('decoding'): decoding = tf.nn.xw_plus_b(encoded_values, self.W_prime, self.b_prime) decoded_values = self.dec_func(decoding, name = 'decoded_values') masked_decoded_values = tf.multiply(dense_masker, decoded_values) with tf.name_scope('training_process'): diff = tf.squared_difference(tf.sparse_tensor_to_dense(self.Y, default_value = 0) , decoded_values) error = tf.reduce_sum( tf.multiply(dense_masker, diff) ) reg = 0 for param in self.params.items(): reg += tf.nn.l2_loss(param[1])* self.lambda_w loss = error + reg model_params = [p for p in self.params.values()] train_step = self._optimize(loss, model_params) tf.summary.scalar('error', error) tf.summary.scalar('loss', loss) for param in self.params.items(): tf.summary.histogram(param[0], param[1]) #tf.summary.histogram('predictions', decoded_values) merged_summary = tf.summary.merge_all() return encoded_values, decoded_values, masked_decoded_values, error, loss, train_step, merged_summary
def _call(self, inputs): x = inputs x = tf.nn.dropout(x, 1-self.dropout) x = tf.matmul(x, self.vars['weights']) x = tf.sparse_tensor_dense_matmul(self.adj, x) outputs = self.act(x) return outputs
def __call__(self, inputs, states, scope=None): with tf.variable_scope(scope or type(self).__name__): # this is the mode-3 matricization of W :) big_tensor = random_sparse_tensor( [self._num_units, self._num_inputs * self._num_units], self.sparsity, name='W_3') u = tf.get_variable('U', [self._num_units, self._num_units]) v = tf.get_variable('V', [self._num_units, self._num_inputs]) b = tf.get_variable('b', [self._num_units], initializer=tf.constant_initializer(0.0)) # make and flatten the outer product # have to do this with some unfortunate reshaping outer_prod = tf.matmul( tf.reshape(states, [-1, self._num_units, 1]), tf.reshape(inputs, [-1, 1, self._num_inputs])) outer_prod = tf.reshape( outer_prod, [-1, self._num_units * self._num_inputs]) tensor_prod = tf.sparse_tensor_dense_matmul( big_tensor, outer_prod, adjoint_b=True) tensor_prod = tf.transpose(tensor_prod) hidden_act = tf.matmul(states, u) input_act = tf.matmul(inputs, v) linears = tensor_prod + hidden_act linears += input_act linears += b output = self._nonlinearity(linears) return output, output
def affine_loss(output, M, weight): loss_affine = 0.0 output_t = output / 255. for Vc in tf.unstack(output_t, axis=-1): Vc_ravel = tf.reshape(tf.transpose(Vc), [-1]) loss_affine += tf.matmul(tf.expand_dims(Vc_ravel, 0), tf.sparse_tensor_dense_matmul(M, tf.expand_dims(Vc_ravel, -1))) return loss_affine * weight
def tensor_mul(lin_op, value_map): a = tensor(lin_op.data, value_map) b = tensor(lin_op.args[0], value_map) if is_sparse(a): return tf.sparse_tensor_dense_matmul(a, b) elif is_scalar(a) or is_scalar(b): return tf.mul(a, b) else: return tf.matmul(a, b)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) print('num_inputs:{0}\\t\tlayer_size:{1}'.format(num_inputs, layer_sizes)) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) # 为每个特征值初始化一个长度为10的向量 node_in = num_inputs * embed_size # 将每个特征embeding 为10维的向量, 总共16个特征,所以是160个输入 网络为[160, 500, 1] for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] print('init_vars:', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) # 将每个特征的隐含向量连起来,组成网络的输入,160维 l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('第{0}个隐藏层l.shape, wi.shape, bi.shape'.format(i), l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) # 从tensor中删除所有大小是1的维度 self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def adjoint_tensor_mul(lin_op, value): a = tensor(lin_op.data) b = value if is_sparse(a): c = tf.sparse_tensor_dense_matmul(a, b, adjoint_a=True) elif is_scalar(a) or is_scalar(b): c = tf.mul(tf.transpose(a), b) else: c = tf.matmul(a, b, transpose_a=True) return adjoint_tensor(lin_op.args[0], c)
def __init__(self, config): self._weights_indices = tf.placeholder(tf.int64) self._weights_values = tf.placeholder(config.data_type) self._weights_shape = tf.placeholder(tf.int64) self._b = tf.placeholder(config.data_type) self._initial_x = tf.placeholder(config.data_type) weights = tf.SparseTensor(self.weights_indices, self.weights_values, self.weights_shape) x = self.initial_x for i in range(config.num_iterations): # Jacobi iteration x = self.b - tf.sparse_tensor_dense_matmul(weights, x) self._final_x = x
def _testGradients(self, adjoint_a, adjoint_b, name, np_dtype, use_gpu=False): n, k, m = np.random.randint(1, 10, size=3) sp_t = self._randomTensor([n, k], np_dtype, adjoint=adjoint_a, sparse=True) dense_t = self._randomTensor([k, m], np_dtype, adjoint=adjoint_b) matmul = tf.sparse_tensor_dense_matmul( sp_t, dense_t, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name=name) with self.test_session(use_gpu=use_gpu): dense_t_shape = [m, k] if adjoint_b else [k, m] err = tf.test.compute_gradient_error(dense_t, dense_t_shape, matmul, [n, m]) print("%s gradient err = %s" % (name, err)) self.assertLess(err, 1e-3)
def testShapeInference(self): x = np.random.rand(10, 10) x[np.abs(x) < 0.5] = 0 # Make it sparse y = np.random.randn(10, 20) x_indices = np.vstack(np.where(x)).astype(np.int64).T x_values = x[np.where(x)] x_shape = x.shape x_st = tf.SparseTensor(x_indices, x_values, x_shape) result = tf.sparse_tensor_dense_matmul(x_st, y) self.assertEqual(result.get_shape(), (10, 20)) x_shape_unknown = tf.placeholder(dtype=tf.int64, shape=None) x_st_shape_unknown = tf.SparseTensor(x_indices, x_values, x_shape_unknown) result_left_shape_unknown = tf.sparse_tensor_dense_matmul( x_st_shape_unknown, y) self.assertEqual( result_left_shape_unknown.get_shape().as_list(), [None, 20]) x_shape_inconsistent = [10, 15] x_st_shape_inconsistent = tf.SparseTensor( x_indices, x_values, x_shape_inconsistent) with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"): tf.sparse_tensor_dense_matmul(x_st_shape_inconsistent, y)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('v', [input_dim, factor_order], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) p = 0.5 * tf.reshape( tf.reduce_sum(xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(xw) + \ l2_v * tf.nn.l2_loss(xv) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def project_biases(tf_features, n_features): """ Projects the biases from the feature space to calculate bias per actor :param tf_features: :param n_features: :return: """ tf_feature_biases = tf.Variable(tf.zeros([n_features, 1])) # The reduce sum is to perform a rank reduction tf_projected_biases = tf.reduce_sum( tf.sparse_tensor_dense_matmul(tf_features, tf_feature_biases), axis=1 ) return tf_feature_biases, tf_projected_biases
def _build_linear(self, hparams): with tf.variable_scope("linear_part", initializer=self.initializer) as scope: w_linear = tf.get_variable(name='w', shape=[hparams.FEATURE_COUNT, 1], dtype=tf.float32) b_linear = tf.get_variable(name='b', shape=[1], dtype=tf.float32, initializer=tf.zeros_initializer()) x = tf.SparseTensor(self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape) linear_output = tf.add(tf.sparse_tensor_dense_matmul(x, w_linear), b_linear) self.layer_params.append(w_linear) self.layer_params.append(b_linear) tf.summary.histogram("linear_part/w", w_linear) tf.summary.histogram("linear_part/b", b_linear) return linear_output
def create_theta_offset(self, weight_basis, shape, dtype, name=None): assert isinstance(weight_basis, ThetaPrime), 'weight_basis should be a ThetaPrime' if isinstance(shape, tf.TensorShape): shape = shape.as_list() # Create projection matrix ww total_dim = 1 for dim in shape: assert dim is not None and dim > 0, 'dimensions must be known' total_dim *= dim # Generate location and relative scale of non zero elements M = SRP(weight_basis.size)._make_random_matrix(weight_basis.size,total_dim) fm=find(M) # Create sparse projection matrix from small vv to full theta space ww0 = tf.SparseTensor(indices=np.array([fm[0],fm[1]]).T, values=fm[2], dense_shape=[weight_basis.size, total_dim]) ww = tf.cast(ww0, _convert_string_dtype(dtype)) # Create diagonal normalization matrix that will be filled in when all layers are created, so that we can normalize each # row of the projection matrix (with length equal to the total number of parameters in the model) once we have all its elements. # This will hold the norms of the rows of the un-normalized projection matrix. normalizer = tf.Variable(tf.zeros(weight_basis.size,_convert_string_dtype(dtype)), trainable=False, name='%s_normalizer' % name) # Pre-multiply the normalizer by the low-rank parameter vector to avoid a sparse matrix - sparse matrix product, # which is not well-supported in Tensorflow (instead of theta_full = (P*N^-1)*theta_small where P*N^-1 is a row-normalized # projection matrix, do P*(N^-1*theta_small)). (N^-1*theta_small) can be written as simply an element-wise vector division. theta_small_norm = tf.divide(weight_basis.var_2d, normalizer) # Compute delta from theta_0 using sparse projection # Note: sparse matrix must be first argument delta_theta_flat = tf.sparse_tensor_dense_matmul(ww, theta_small_norm, adjoint_a=True, adjoint_b=True) # Create theta theta_offset = tf.reshape(delta_theta_flat, shape) self.basis_matrices.append(ww) self.basis_matrix_normalizers.append(normalizer) # Note: previous versions added only ww0 to _non_trainable_weights but skipped normalizer. Here we more correctly return both. # return theta_offset, [ww0] return theta_offset, [ww0, normalizer]
def matmul_wrapper(A, B, optype): """Wrapper for handling sparse and dense versions of matmul operation. Parameters ---------- A : tf.Tensor B : tf.Tensor optype : str, {'dense', 'sparse'} Returns ------- tf.Tensor """ if optype == 'dense': return tf.matmul(A, B) elif optype == 'sparse': return tf.sparse_tensor_dense_matmul(A, B) else: raise NameError('Unknown input type in matmul_wrapper')
def nn_layer(input, input_size, output_size, name): with tf.name_scope('hidden' + name + '/'): weights_var = 1 / math.sqrt(input_size) weights = tf.Variable(tf.random_uniform(shape=[input_size, output_size], minval=-weights_var, maxval=weights_var), name='weights') biases = tf.Variable(tf.zeros([output_size]), name='biases') preactivation = tf.add(tf.sparse_tensor_dense_matmul(input, weights), biases, name='preactivation') activation = tf.nn.tanh(preactivation, name='activation') variable_summaries(weights, name='Layer' + name + '/weights') variable_summaries(biases, name='Layer' + name + '/biases') variable_summaries(preactivation, name='Layer' + name + '/preactivation') variable_summaries(activation, name='Layer' + name + '/activation') return activation
def connect_representation_graph(self, tf_features, n_components, n_features, node_name_ending): """ This representation function embeds the user/item features by passing them through a single tanh layer. :param tf_features: tf.SparseTensor The user/item features as a SparseTensor of dimensions [n_users/items, n_features] :param n_components: int The dimensionality of the resulting representation. :param n_features: int The number of features in tf_features :param node_name_ending: String Either 'user' or 'item' :return: A tuple of (tf.Tensor, list) where the first value is the resulting representation in n_components dimensions and the second value is a list containing all tf.Variables which should be subject to regularization. """ tf_tanh_weights = tf.Variable(tf.random_normal([n_features, n_components], stddev=.5), name='tanh_weights_%s' % node_name_ending) tf_repr = tf.nn.tanh(tf.sparse_tensor_dense_matmul(tf_features, tf_tanh_weights)) # Return repr layer and variables return tf_repr, [tf_tanh_weights]
def bilinear_product_sparse(vec_a, tensor, vec_b, output_size, batch_major=True): """Performs a bilinear product with a sparse tensor. If vec_a is [I x 1], vec_b is [K x 1], tensor should be the transpose of a mode one unfolding of a [I x J x K] tensor (so a [JK x I] sparse matrix). Args: vec_a: a vector tensor: transpose of mode one unfolding of sparse tensor vec_b: a vector output_size: the length of the output vectors. Turns out to be handy. batch_major: whether the vectors are actually [batch_size x {I,J}] Returns: a vector, with the length of the middle dimension of tensor when it is not unfolded. """ # turns out we want vec_a to be [I x B] # and vec_b t be [B x K] if batch_major: vec_a = tf.transpose(vec_a) else: vec_b = tf.transpose(vec_b) # we can just to this as a matmul, a reshape and a batch matmul temp = tf.sparse_tensor_dense_matmul(tensor, vec_a) # result will be [JK x batch_size] temp = tf.transpose(temp) # make sure the reshaping is reshaping properly temp = tf.reshape(temp, [vec_b.get_shape()[0].value, output_size, vec_b.get_shape()[1].value]) # now have temp = [B x J x K] # we need to add a trailing 1 to the shape of vec_b so it is [B x K x 1] # and we squeeze the result so it should be back to 2D [B x J] # print(temp.get_shape()) # print(tf.expand_dims(vec_b, 2).get_shape()) return tf.squeeze(tf.matmul(temp, tf.expand_dims(vec_b, 2)), [2])
def build_graph(self): indices = [[self.data.item[item[1]], self.data.user[item[0]]] for item in self.data.trainingData] values = [item[2] for item in self.data.trainingData] self.i_u_matrix = tf.SparseTensor( indices=indices, values=values, dense_shape=[self.num_items, self.num_users]) self.pos = tf.placeholder(tf.int32, name="positive_item") self.fnd = tf.placeholder(tf.int32, name="friend_item") self.neg = tf.placeholder(tf.int32, name="neg_holder") self.i = tf.placeholder(tf.int32, name="item_holder") with tf.name_scope("generator"): #CDAE initializer = tf.contrib.layers.xavier_initializer() self.X = tf.placeholder(tf.float32, [None, self.num_users]) self.V = tf.Variable(initializer([self.num_users, 200])) chosen_user_embeddings = tf.nn.embedding_lookup(self.V, self.u_idx) self.weights = { 'encoder': tf.Variable(initializer([self.num_users, 200])), 'decoder': tf.Variable(initializer([200, self.num_users])), } self.biases = { 'encoder': tf.Variable(initializer([200])), 'decoder': tf.Variable(initializer([self.num_users])), } self.g_params = [self.weights, self.biases, self.V] layer = tf.nn.sigmoid( tf.matmul(self.X, self.weights['encoder']) + self.biases['encoder'] + chosen_user_embeddings) self.g_output = tf.nn.sigmoid( tf.matmul(layer, self.weights['decoder']) + self.biases['decoder']) self.y_pred = tf.multiply(self.X, self.g_output) self.y_pred = tf.maximum(1e-6, self.y_pred) cross_entropy = -tf.multiply(self.X, tf.log( self.y_pred)) - tf.multiply( (1 - self.X), tf.log(1 - self.y_pred)) self.reconstruction = tf.reduce_sum(cross_entropy) + self.regU * ( tf.nn.l2_loss(self.weights['encoder']) + tf.nn.l2_loss(self.weights['decoder']) + tf.nn.l2_loss(self.biases['encoder']) + tf.nn.l2_loss(self.biases['decoder'])) g_pre = tf.train.AdamOptimizer(self.lRate) self.g_pretrain = g_pre.minimize(self.reconstruction, var_list=self.g_params) with tf.variable_scope('discriminator'): self.item_selection = tf.get_variable( 'item_selection', initializer=tf.constant_initializer(0.001), shape=[self.num_users, self.num_items]) self.g_params.append(self.item_selection) self.d_params = [self.user_embeddings, self.item_embeddings] # placeholder definition self.u_embedding = tf.nn.embedding_lookup(self.user_embeddings, self.u_idx, name='u_e') self.i_embedding = tf.nn.embedding_lookup(self.item_embeddings, self.pos, name='i_e') self.j_embedding = tf.nn.embedding_lookup(self.item_embeddings, self.neg, name='j_e') #generate virtual friends by gumbel-softmax self.virtualFriends = self.sampling(self.g_output) #one-hot #get candidate list (items) self.candidateItems = tf.transpose( tf.sparse_tensor_dense_matmul( self.i_u_matrix, tf.transpose(self.virtualFriends))) self.embedding_selection = tf.nn.embedding_lookup( self.item_selection, self.u_idx, name='e_s') self.virtual_items = self.sampling( tf.multiply(self.candidateItems, self.embedding_selection)) self.v_i_embedding = tf.matmul(self.virtual_items, self.item_embeddings, transpose_a=False, transpose_b=False) y_us = tf.reduce_sum(tf.multiply(self.u_embedding,self.i_embedding),1)\ -tf.reduce_sum(tf.multiply(self.u_embedding,self.j_embedding),1) self.d_pretrain_loss = -tf.reduce_sum(tf.log( tf.sigmoid(y_us))) + self.regU * ( tf.nn.l2_loss(self.u_embedding) + tf.nn.l2_loss( self.j_embedding) + tf.nn.l2_loss(self.i_embedding)) y_uf = tf.reduce_sum(tf.multiply(self.u_embedding, self.i_embedding), 1) - \ tf.reduce_sum(tf.multiply(self.u_embedding, self.v_i_embedding), 1) y_fs = tf.reduce_sum(tf.multiply(self.u_embedding, self.v_i_embedding), 1)-\ tf.reduce_sum(tf.multiply(self.u_embedding, self.j_embedding), 1) self.d_loss = -tf.reduce_sum(tf.log(tf.sigmoid(y_uf)))-tf.reduce_sum(tf.log(tf.sigmoid(y_fs)))+\ self.regU*(tf.nn.l2_loss(self.u_embedding)+tf.nn.l2_loss(self.i_embedding)+tf.nn.l2_loss(self.j_embedding)) # self.g_loss = 30 * tf.reduce_sum(y_uf) #better performance d_pre = tf.train.AdamOptimizer(self.lRate) self.d_pretrain = d_pre.minimize(self.d_pretrain_loss, var_list=self.d_params) self.d_output = tf.reduce_sum( tf.multiply(self.u_embedding, self.item_embeddings), 1) d_opt = tf.train.AdamOptimizer(self.lRate) self.d_update = d_opt.minimize(self.d_loss, var_list=self.d_params) g_opt = tf.train.AdamOptimizer(self.lRate) self.g_update = g_opt.minimize(self.g_loss, var_list=self.g_params)
def __create_model(self, proximity): w_init = tf.contrib.layers.xavier_initializer #w_init = tf.random_normal_initializer(mean=0.0, stddev=0.1, seed=None) #w_init = tf.keras.initializers.random_normal sizes1 = [self.D1] + self.n_hidden1 sizes2 = [self.D2] + self.n_hidden2 sizes3 = [self.D3] + self.n_hidden3 sizes4 = [self.D4] + self.n_hidden4 #feature 1 TRAINABLE = True with tf.name_scope("Train"): for i in range(1, len(sizes1)): with tf.name_scope("enc{}".format(i)): W = tf.get_variable(name='W1{}'.format(i), shape=[sizes1[i - 1], sizes1[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) b = tf.get_variable(name='b1{}'.format(i), shape=[sizes1[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) if i == 1: encoded1 = tf.sparse_tensor_dense_matmul(self.X1, W) + b else: encoded1 = tf.matmul(encoded1, W) + b encoded1 = tf.nn.relu(encoded1) tf.summary.histogram('Weight', W) tf.summary.histogram('bias', b) tf.summary.histogram('activations', encoded1) #encoded1 = tf.Print(encoded1, [encoded1], message = "feature 1 encoder triggered") #feature 2 TRAINABLE = True with tf.name_scope("Region"): for i in range(1, len(sizes2)): with tf.name_scope("enc{}".format(i)): W = tf.get_variable(name='W2{}'.format(i), shape=[sizes2[i - 1], sizes2[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) b = tf.get_variable(name='b2{}'.format(i), shape=[sizes2[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) if i == 1: encoded2 = tf.sparse_tensor_dense_matmul(self.X2, W) + b else: encoded2 = tf.matmul(encoded2, W) + b encoded2 = tf.nn.relu(encoded2) tf.summary.histogram('Weight', W) tf.summary.histogram('bias', b) tf.summary.histogram('activations', encoded2) #encoded2 = tf.Print(encoded2, [encoded2], message = "feature 2 encoder triggered") #feature 3 TRAINABLE = True with tf.name_scope("School"): for i in range(1, len(sizes3)): with tf.name_scope("enc{}".format(i)): W = tf.get_variable(name='W3{}'.format(i), shape=[sizes3[i - 1], sizes3[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) b = tf.get_variable(name='b3{}'.format(i), shape=[sizes3[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) if i == 1: encoded3 = tf.sparse_tensor_dense_matmul(self.X3, W) + b else: encoded3 = tf.matmul(encoded3, W) + b encoded3 = tf.nn.relu(encoded3) tf.summary.histogram('Weight', W) tf.summary.histogram('bias', b) tf.summary.histogram('activations', encoded3) #encoded3 = tf.Print(encoded3, [encoded3], message = "feature 3 encoder triggered") #feature 4 TRAINABLE = True with tf.name_scope("House"): for i in range(1, len(sizes4)): with tf.name_scope("enc{}".format(i)): W = tf.get_variable(name='W4{}'.format(i), shape=[sizes4[i - 1], sizes4[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) b = tf.get_variable(name='b4{}'.format(i), shape=[sizes4[i]], dtype=tf.float32, initializer=w_init(), trainable=TRAINABLE) if i == 1: encoded4 = tf.sparse_tensor_dense_matmul(self.X4, W) + b else: encoded4 = tf.matmul(encoded4, W) + b encoded4 = tf.nn.relu(encoded4) tf.summary.histogram('Weight', W) tf.summary.histogram('bias', b) tf.summary.histogram('activations', encoded4) #encoded4 = tf.Print(encoded4, [encoded4], message = "feature 4 encoder triggered") #W-MU/SIGMA AND B-MU/SIGMA IS SHARED BETWEEN ALL FEATURES #SHAPE: THOUGH WE USED SIZES1[-1], KEEP IN MIND THAT'S SAME FOR ALL SHAPES """ W_mu = tf.get_variable(name='W_mu', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init()) b_mu = tf.get_variable(name='b_mu', shape=[self.L], dtype=tf.float32, initializer=w_init()) self.embedding1 = tf.matmul(encoded1, W_mu) + b_mu self.embedding2 = tf.matmul(encoded2, W_mu) + b_mu self.embedding3 = tf.matmul(encoded3, W_mu) + b_mu self.embedding4 = tf.matmul(encoded4, W_mu) + b_mu with tf.name_scope("shared"): with tf.name_scope("mu"): tf.summary.histogram('Weight', W_mu) tf.summary.histogram('bias', b_mu) mu_embed_activations = [self.embedding1, self.embedding2, self.embedding3, self.embedding4] tf.summary.histogram('activations', tf.concat(mu_embed_activations, 0)) """ '''self.embedding1 = tf.nn.sigmoid(tf.matmul(encoded1, W_mu) + b_mu) + 1 + 1e-14 self.embedding2 = tf.nn.sigmoid(tf.matmul(encoded2, W_mu) + b_mu) + 1 + 1e-14 self.embedding3 = tf.nn.sigmoid(tf.matmul(encoded3, W_mu) + b_mu) + 1 + 1e-14 self.embedding4 = tf.nn.sigmoid(tf.matmul(encoded4, W_mu) + b_mu) + 1 + 1e-14''' """ W_sigma = tf.get_variable(name='W_sigma', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init()) b_sigma = tf.get_variable(name='b_sigma', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma1 = tf.matmul(encoded1, W_sigma) + b_sigma self.sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14 #self.sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14 log_sigma2 = tf.matmul(encoded2, W_sigma) + b_sigma self.sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14 #self.sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14 log_sigma3 = tf.matmul(encoded3, W_sigma) + b_sigma self.sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14 #self.sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14 log_sigma4 = tf.matmul(encoded4, W_sigma) + b_sigma self.sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14 #self.sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14 with tf.name_scope("shared"): with tf.name_scope("sigma"): tf.summary.histogram('Weight', W_sigma) tf.summary.histogram('bias', b_sigma) sigma_embed_activations = [self.sigma1, self.sigma2, self.sigma3, self.sigma4] tf.summary.histogram('activations', tf.concat(sigma_embed_activations, 0)) """ ##############EXPERIMENTAL FEATURES. PLEASE REMOVE IF DOESN'T WORK############################################ W_mu1 = tf.get_variable(name='W_mu1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init()) b_mu1 = tf.get_variable(name='b_mu1', shape=[40], dtype=tf.float32, initializer=w_init()) W_mu2 = tf.get_variable(name='W_mu2', shape=[40, self.L], dtype=tf.float32, initializer=w_init()) b_mu2 = tf.get_variable(name='b_mu2', shape=[self.L], dtype=tf.float32, initializer=w_init()) embedding1_t = tf.nn.relu(tf.matmul(encoded1, W_mu1) + b_mu1) self.embedding1 = tf.matmul(embedding1_t, W_mu2) + b_mu2 embedding2_t = tf.nn.relu(tf.matmul(encoded2, W_mu1) + b_mu1) self.embedding2 = tf.matmul(embedding2_t, W_mu2) + b_mu2 embedding3_t = tf.nn.relu(tf.matmul(encoded3, W_mu1) + b_mu1) self.embedding3 = tf.matmul(embedding3_t, W_mu2) + b_mu2 embedding4_t = tf.nn.relu(tf.matmul(encoded4, W_mu1) + b_mu1) self.embedding4 = tf.matmul(embedding4_t, W_mu2) + b_mu2 W_sigma1 = tf.get_variable(name='W_sigma1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init()) W_sigma2 = tf.get_variable(name='W_sigma2', shape=[40, self.L], dtype=tf.float32, initializer=w_init()) b_sigma1 = tf.get_variable(name='b_sigma1', shape=[40], dtype=tf.float32, initializer=w_init()) b_sigma2 = tf.get_variable(name='b_sigma2', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma1t = tf.nn.relu(tf.matmul(encoded1, W_sigma1) + b_sigma1) log_sigma1 = tf.matmul(log_sigma1t, W_sigma2) + b_sigma2 self.sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14 #self.sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14 log_sigma2t = tf.nn.relu(tf.matmul(encoded2, W_sigma1) + b_sigma1) log_sigma2 = tf.matmul(log_sigma2t, W_sigma2) + b_sigma2 self.sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14 #self.sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14 log_sigma3t = tf.nn.relu(tf.matmul(encoded3, W_sigma1) + b_sigma1) log_sigma3 = tf.matmul(log_sigma3t, W_sigma2) + b_sigma2 self.sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14 #self.sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14 log_sigma4t = tf.nn.relu(tf.matmul(encoded4, W_sigma1) + b_sigma1) log_sigma4 = tf.matmul(log_sigma4t, W_sigma2) + b_sigma2 self.sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14 #self.sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14 ######################################################################################################################## #####################END OF EXPERIMENTAL, DELETE IF DOESN'T WORK######################################################## ####################################################################################################################### if proximity == 'second-order': #feature 1 for i in range(1, len(sizes1)): W = tf.get_variable(name='W_ctx1{}'.format(i), shape=[sizes1[i - 1], sizes1[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b_ctx1{}'.format(i), shape=[sizes1[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded1 = tf.sparse_tensor_dense_matmul(self.X1, W) + b else: encoded1 = tf.matmul(encoded1, W) + b encoded1 = tf.nn.relu(encoded1) #feature 2 for i in range(1, len(sizes2)): W = tf.get_variable(name='W_ctx2{}'.format(i), shape=[sizes2[i - 1], sizes2[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b_ctx2{}'.format(i), shape=[sizes2[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded2 = tf.sparse_tensor_dense_matmul(self.X2, W) + b else: encoded2 = tf.matmul(encoded2, W) + b encoded2 = tf.nn.relu(encoded2) #feature 3 for i in range(1, len(sizes3)): W = tf.get_variable(name='W_ctx3{}'.format(i), shape=[sizes3[i - 1], sizes3[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b_ctx3{}'.format(i), shape=[sizes3[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded3 = tf.sparse_tensor_dense_matmul(self.X3, W) + b else: encoded3 = tf.matmul(encoded3, W) + b encoded3 = tf.nn.relu(encoded3) #feature 4 for i in range(1, len(sizes4)): W = tf.get_variable(name='W_ctx4{}'.format(i), shape=[sizes4[i - 1], sizes4[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b_ctx4{}'.format(i), shape=[sizes4[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded4 = tf.sparse_tensor_dense_matmul(self.X4, W) + b else: encoded4 = tf.matmul(encoded4, W) + b encoded4 = tf.nn.relu(encoded4) ################ USE INTERCHANGABLY WITH THE HIGHER DIMENSION##################################################### """ W_mu = tf.get_variable(name='W_mu_ctx', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init()) b_mu = tf.get_variable(name='b_mu_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init()) self.ctx_mu1 = tf.matmul(encoded1, W_mu) + b_mu self.ctx_mu2 = tf.matmul(encoded2, W_mu) + b_mu self.ctx_mu3 = tf.matmul(encoded3, W_mu) + b_mu self.ctx_mu4 = tf.matmul(encoded4, W_mu) + b_mu ''' self.ctx_mu1 = tf.nn.sigmoid(tf.matmul(encoded1, W_mu) + b_mu) + 1 + 1e-14 self.ctx_mu2 = tf.nn.sigmoid(tf.matmul(encoded2, W_mu) + b_mu) + 1 + 1e-14 self.ctx_mu3 = tf.nn.sigmoid(tf.matmul(encoded3, W_mu) + b_mu) + 1 + 1e-14 self.ctx_mu4 = tf.nn.sigmoid(tf.matmul(encoded4, W_mu) + b_mu) + 1 + 1e-14 ''' W_sigma = tf.get_variable(name='W_sigma_ctx', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init()) b_sigma = tf.get_variable(name='b_sigma_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma1 = tf.matmul(encoded1, W_sigma) + b_sigma self.ctx_sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14 #self.ctx_sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14 log_sigma2 = tf.matmul(encoded2, W_sigma) + b_sigma self.ctx_sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14 #self.ctx_sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14 log_sigma3 = tf.matmul(encoded3, W_sigma) + b_sigma self.ctx_sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14 #self.ctx_sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14 log_sigma4 = tf.matmul(encoded4, W_sigma) + b_sigma self.ctx_sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14 #self.ctx_sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14 """ #############HIGHER DIMENSION VERSION ############################################## W_mu1 = tf.get_variable(name='W_mu_ctx1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init()) b_mu1 = tf.get_variable(name='b_mu_ctx1', shape=[40], dtype=tf.float32, initializer=w_init()) W_mu2 = tf.get_variable(name='W_mu_ctx2', shape=[40, self.L], dtype=tf.float32, initializer=w_init()) b_mu2 = tf.get_variable(name='b_mu_ctx2', shape=[self.L], dtype=tf.float32, initializer=w_init()) ctx_mu1_t = tf.nn.relu(tf.matmul(encoded1, W_mu1) + b_mu1) self.ctx_mu1 = tf.matmul(ctx_mu1_t, W_mu2) + b_mu2 ctx_mu2_t = tf.nn.relu(tf.matmul(encoded2, W_mu1) + b_mu1) self.ctx_mu2 = tf.matmul(ctx_mu2_t, W_mu2) + b_mu2 ctx_mu3_t = tf.nn.relu(tf.matmul(encoded3, W_mu1) + b_mu1) self.ctx_mu3 = tf.matmul(ctx_mu3_t, W_mu2) + b_mu2 ctx_mu4_t = tf.nn.relu(tf.matmul(encoded4, W_mu1) + b_mu1) self.ctx_mu4 = tf.matmul(ctx_mu4_t, W_mu2) + b_mu2 W_sigma1 = tf.get_variable(name='W_sigma_ctx1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init()) W_sigma2 = tf.get_variable(name='W_sigma_ctx2', shape=[40, self.L], dtype=tf.float32, initializer=w_init()) b_sigma1 = tf.get_variable(name='b_sigma_ctx1', shape=[40], dtype=tf.float32, initializer=w_init()) b_sigma2 = tf.get_variable(name='b_sigma_ctx2', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma1t = tf.nn.relu(tf.matmul(encoded1, W_sigma1) + b_sigma1) log_sigma1 = tf.matmul(log_sigma1t, W_sigma2) + b_sigma2 self.ctx_sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14 #self.ctx_sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14 log_sigma2t = tf.nn.relu(tf.matmul(encoded2, W_sigma1) + b_sigma1) log_sigma2 = tf.matmul(log_sigma2t, W_sigma2) + b_sigma2 self.ctx_sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14 #self.ctx_sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14 log_sigma3t = tf.nn.relu(tf.matmul(encoded3, W_sigma1) + b_sigma1) log_sigma3 = tf.matmul(log_sigma3t, W_sigma2) + b_sigma2 self.ctx_sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14 #self.ctx_sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14 log_sigma4t = tf.nn.relu(tf.matmul(encoded4, W_sigma1) + b_sigma1) log_sigma4 = tf.matmul(log_sigma4t, W_sigma2) + b_sigma2 self.ctx_sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update the row_factors, else update the column factors. sp_input: Please refer to comments for update_row_factors and update_col_factors. transpose_input: If true, logically transpose the input. Returns: A tuple consisting of the following two elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. """ assert isinstance(sp_input, ops.SparseTensor) if update_row_factors: left = self._row_factors right = self._col_factors_cache row_weights = self._row_wt_cache col_weights = self._col_wt_cache sharding_func = WALSModel._get_sharding_func( self._input_rows, self._num_row_shards) right_length = self._input_cols else: left = self._col_factors right = self._row_factors_cache row_weights = self._col_wt_cache col_weights = self._row_wt_cache sharding_func = WALSModel._get_sharding_func( self._input_cols, self._num_col_shards) right_length = self._input_rows transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = tf.split(1, 2, sp_input.indices) if transpose_input: update_indices, all_ids = tf.unique(col_ids[:, 0]) col_ids = tf.expand_dims(tf.cast(all_ids, tf.int64), 1) else: update_indices, all_ids = tf.unique(row_ids[:, 0]) row_ids = tf.expand_dims(tf.cast(all_ids, tf.int64), 1) num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64) row_shape = tf.constant([right_length], tf.int64) col_shape = [num_rows] new_sp_indices = tf.concat(1, [row_ids, col_ids]) new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input else tf.concat(0, [col_shape, row_shape])) new_sp_input = tf.SparseTensor(indices=new_sp_indices, values=sp_input.values, shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * tf.matmul(right, right, transpose_a=True)) if self._regularization is not None: total_lhs += self._regularization if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * tf.sparse_tensor_dense_matmul( new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = tf.transpose( tf.matrix_solve(total_lhs, tf.transpose(total_rhs))) else: row_weights_slice = tf.gather(row_weights, update_indices) partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs") total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs total_rhs = tf.expand_dims(total_rhs, -1) new_left_values = tf.squeeze( tf.batch_matrix_solve(total_lhs, total_rhs), [2]) return (new_left_values, self.scatter_update(left, update_indices, new_left_values, sharding_func))
def __create_model(self, proximity): w_init = tf.contrib.layers.xavier_initializer sizes = [self.D] + self.n_hidden for i in range(1, len(sizes)): W = tf.get_variable(name='W{}'.format(i), shape=[sizes[i - 1], sizes[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b{}'.format(i), shape=[sizes[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded = tf.sparse_tensor_dense_matmul(self.X, W) + b else: encoded = tf.matmul(encoded, W) + b encoded = tf.nn.relu(encoded) W_mu = tf.get_variable(name='W_mu', shape=[sizes[-1], self.L], dtype=tf.float32, initializer=w_init()) b_mu = tf.get_variable(name='b_mu', shape=[self.L], dtype=tf.float32, initializer=w_init()) self.embedding = tf.matmul(encoded, W_mu) + b_mu W_sigma = tf.get_variable(name='W_sigma', shape=[sizes[-1], self.L], dtype=tf.float32, initializer=w_init()) b_sigma = tf.get_variable(name='b_sigma', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma = tf.matmul(encoded, W_sigma) + b_sigma self.sigma = tf.nn.elu(log_sigma) + 1 + 1e-14 if proximity == 'second-order': for i in range(1, len(sizes)): W = tf.get_variable(name='W_ctx{}'.format(i), shape=[sizes[i - 1], sizes[i]], dtype=tf.float32, initializer=w_init()) b = tf.get_variable(name='b_ctx{}'.format(i), shape=[sizes[i]], dtype=tf.float32, initializer=w_init()) if i == 1: encoded = tf.sparse_tensor_dense_matmul(self.X, W) + b else: encoded = tf.matmul(encoded, W) + b encoded = tf.nn.relu(encoded) W_mu = tf.get_variable(name='W_mu_ctx', shape=[sizes[-1], self.L], dtype=tf.float32, initializer=w_init()) b_mu = tf.get_variable(name='b_mu_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init()) self.ctx_mu = tf.matmul(encoded, W_mu) + b_mu W_sigma = tf.get_variable(name='W_sigma_ctx', shape=[sizes[-1], self.L], dtype=tf.float32, initializer=w_init()) b_sigma = tf.get_variable(name='b_sigma_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init()) log_sigma = tf.matmul(encoded, W_sigma) + b_sigma self.ctx_sigma = tf.nn.elu(log_sigma) + 1 + 1e-14
def FirstGCNLayerWithActiveFun_NoX(norm_adj_mat, W, b): return tf.nn.relu(tf.add(tf.sparse_tensor_dense_matmul(norm_adj_mat, W), b))
def call(self, inputs): final_w = tf.transpose( tf.sparse_tensor_dense_matmul(self.ancestry_sparse_tensor, self.w)) return tf.matmul(inputs, final_w) + self.b
def rotate_gconv_kernels(kernel, periodicity=2 * np.pi, diskMask=True): """ Rotates the set of SE2 kernels. Rotation of SE2 kernels involves planar rotations and a shift in orientation, see e.g. the left-regular representation L_g of the roto-translation group on SE(2) images, (Eq. 3) of the MICCAI 2018 paper. INPUT: - kernel, a tensor flow tensor with expected shape: [Height, Width, nbOrientations, ChannelsIN, ChannelsOUT] INPUT (optional): - periodicity, rotate in total over 2*np.pi or np.pi - disk_mask, True or False, specifying whether or not to mask the kernels spatially OUTPUT: - set_of_rotated_kernels, a tensorflow tensor with dimensions: [nbOrientations, Height, Width, nbOrientations, ChannelsIN, ChannelsOUT] I.e., for each rotation angle a rotated (shift-twisted) version of the input kernel. """ # Rotation of an SE2 kernel consists of two parts: # PART 1. Planar rotation # PART 2. A shift in theta direction # Unpack the shape of the input kernel kernelSizeH, kernelSizeW, orientations_nb, channelsIN, channelsOUT = map( int, kernel.shape) print("SE2N-SE2N BASE KERNEL SHAPE:", kernel.get_shape()) # Debug # PART 1 (planar rotation) # Flatten the baseline kernel # Resulting shape: [kernelSizeH*kernelSizeW,orientations_nb*channelsIN*channelsOUT] # kernel_flat = tf.reshape(kernel, [ kernelSizeH * kernelSizeW, orientations_nb * channelsIN * channelsOUT ]) # Generate a set of rotated kernels via rotation matrix multiplication # For efficiency purpose, the rotation matrix is implemented as a sparse matrix object # Result: The non-zero indices and weights of the rotation matrix idx, vals = rotation_matrix.MultiRotationOperatorMatrixSparse( [kernelSizeH, kernelSizeW], orientations_nb, periodicity=periodicity, diskMask=diskMask) # The corresponding sparse rotation matrix # Resulting shape: [nbOrientations*kernelSizeH*kernelSizeW,kernelSizeH*kernelSizeW] # rotOp_matrix = tf.SparseTensor(idx, vals, [ orientations_nb * kernelSizeH * kernelSizeW, kernelSizeH * kernelSizeW ]) # Matrix multiplication (each 2D plane is now rotated) # Resulting shape: [nbOrientations*kernelSizeH*kernelSizeW, orientations_nb*channelsIN*channelsOUT] # kernels_planar_rotated = tf.sparse_tensor_dense_matmul( rotOp_matrix, kernel_flat) kernels_planar_rotated = tf.reshape(kernels_planar_rotated, [ orientations_nb, kernelSizeH, kernelSizeW, orientations_nb, channelsIN, channelsOUT ]) # PART 2 (shift in theta direction) set_of_rotated_kernels = [None] * orientations_nb for orientation in range(orientations_nb): # [kernelSizeH,kernelSizeW,orientations_nb,channelsIN,channelsOUT] kernels_temp = kernels_planar_rotated[orientation] # [kernelSizeH,kernelSizeW,channelsIN,channelsOUT,orientations_nb] kernels_temp = tf.transpose(kernels_temp, [0, 1, 3, 4, 2]) # [kernelSizeH*kernelSizeW*channelsIN*channelsOUT*orientations_nb] kernels_temp = tf.reshape(kernels_temp, [ kernelSizeH * kernelSizeW * channelsIN * channelsOUT, orientations_nb ]) # Roll along the orientation axis roll_matrix = tf.constant(np.roll(np.identity(orientations_nb), orientation, axis=1), dtype=tf.float32) kernels_temp = tf.matmul(kernels_temp, roll_matrix) kernels_temp = tf.reshape(kernels_temp, [ kernelSizeH, kernelSizeW, channelsIN, channelsOUT, orientations_nb ]) # [Nx,Ny,Nin,Nout,Ntheta] kernels_temp = tf.transpose(kernels_temp, [0, 1, 4, 2, 3]) set_of_rotated_kernels[orientation] = kernels_temp return tf.stack(set_of_rotated_kernels)
#weight1 = tf.get_variable("weight", [TRIGRAM_D, L1_N], # initializer=tf.random_uniform_initializer( # minval=-l1_par_range, # maxval=l1_par_range, # dtype=tf.float32), # partitioner=partitioner) weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range), name='weight') bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range), name='bias') variable_summaries(weight1, 'L1_weights') variable_summaries(bias1, 'L1_biases') with tf.device(weight1.device): dense_w1 = tf.gather(weight1, dense_cols, name="L1_dense_w") # query_l1 = tf.matmul(tf.to_float(query_batch),weight1)+bias1 query_l1 = tf.sparse_tensor_dense_matmul(query_batch, dense_w1) + bias1 # doc_l1 = tf.matmul(tf.to_float(doc_batch),weight1)+bias1 doc_l1 = tf.sparse_tensor_dense_matmul(doc_batch, dense_w1) + bias1 query_l1_out = tf.nn.relu(query_l1) doc_l1_out = tf.nn.relu(doc_l1) with tf.name_scope('L2'): # Hidden layer 2 [input: 300, output: 300] l2_par_range = np.sqrt(6.0 / (L1_N + L2_N)) weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range), name='weight') bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range), name='bias') variable_summaries(weight2, 'L2_weights') variable_summaries(bias2, 'L2_biases')
import tensorflow as tf sp_ids = [[0, 0], [1, 0]] sp_vals = [3.0, 4.0] inp_x = tf.SparseTensor(indices=sp_ids, values=sp_vals, dense_shape=[3, 2]) inp_v = tf.Variable([[0.5, 0.6, 0.7], [1.6, 1.7, 1.8]]) inp_y = tf.constant([1. , 1., 1.,]) cross = tf.sparse_tensor_dense_matmul(tf.sparse_reorder(inp_x), inp_v) #cross2 = tf.nn.embedding_lookup_sparse(inp_v, sp_ids, sp_vals) pred = tf.reduce_sum(cross, 1) loss = pred - inp_y opt = tf.train.AdamOptimizer(1e-1).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) print sess.run(inp_v) print sess.run(pred) sess.run(opt) print sess.run(inp_v)
def compact_bilinear_pooling_layer(bottom1, bottom2, output_dim, sum_pool=True, rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None, seed_h_1=1, seed_s_1=3, seed_h_2=5, seed_s_2=7, sequential=True, compute_size=128): """ Compute compact bilinear pooling over two bottom inputs. Reference: Yang Gao, et al. "Compact Bilinear Pooling." in Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2016). Akira Fukui, et al. "Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding." arXiv preprint arXiv:1606.01847 (2016). Args: bottom1: 1st input, 4D Tensor of shape [batch_size, height, width, input_dim1]. bottom2: 2nd input, 4D Tensor of shape [batch_size, height, width, input_dim2]. output_dim: output dimension for compact bilinear pooling. sum_pool: (Optional) If True, sum the output along height and width dimensions and return output shape [batch_size, output_dim]. Otherwise return [batch_size, height, width, output_dim]. Default: True. rand_h_1: (Optional) an 1D numpy array containing indices in interval `[0, output_dim)`. Automatically generated from `seed_h_1` if is None. rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape as `rand_h_1`. Automatically generated from `seed_s_1` if is None. rand_h_2: (Optional) an 1D numpy array containing indices in interval `[0, output_dim)`. Automatically generated from `seed_h_2` if is None. rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape as `rand_h_2`. Automatically generated from `seed_s_2` if is None. sequential: (Optional) if True, use the sequential FFT and IFFT instead of tf.batch_fft or tf.batch_ifft to avoid out-of-memory (OOM) error. Note: sequential FFT and IFFT are only available on GPU Default: True. compute_size: (Optional) The maximum size of sub-batch to be forwarded through FFT or IFFT in one time. Large compute_size may be faster but can cause OOM and FFT failure. This parameter is only effective when sequential == True. Default: 128. Returns: Compact bilinear pooled results of shape [batch_size, output_dim] or [batch_size, height, width, output_dim], depending on `sum_pool`. """ # Static shapes are needed to construction count sketch matrix input_dim1 = bottom1.get_shape().as_list()[-1] input_dim2 = bottom2.get_shape().as_list()[-1] # Step 0: Generate vectors and sketch matrix for tensor count sketch # This is only done once during graph construction, and fixed during each # operation if rand_h_1 is None: np.random.seed(seed_h_1) rand_h_1 = np.random.randint(output_dim, size=input_dim1) if rand_s_1 is None: np.random.seed(seed_s_1) rand_s_1 = 2 * np.random.randint(2, size=input_dim1) - 1 sparse_sketch_matrix1 = _generate_sketch_matrix(rand_h_1, rand_s_1, output_dim) if rand_h_2 is None: np.random.seed(seed_h_2) rand_h_2 = np.random.randint(output_dim, size=input_dim2) if rand_s_2 is None: np.random.seed(seed_s_2) rand_s_2 = 2 * np.random.randint(2, size=input_dim2) - 1 sparse_sketch_matrix2 = _generate_sketch_matrix(rand_h_2, rand_s_2, output_dim) # Step 1: Flatten the input tensors and count sketch bottom1_flat = tf.reshape(bottom1, [-1, input_dim1]) bottom2_flat = tf.reshape(bottom2, [-1, input_dim2]) # Essentially: # sketch1 = bottom1 * sparse_sketch_matrix # sketch2 = bottom2 * sparse_sketch_matrix # But tensorflow only supports left multiplying a sparse matrix, so: # sketch1 = (sparse_sketch_matrix.T * bottom1.T).T # sketch2 = (sparse_sketch_matrix.T * bottom2.T).T sketch1 = tf.transpose( tf.sparse_tensor_dense_matmul(sparse_sketch_matrix1, bottom1_flat, adjoint_a=True, adjoint_b=True)) sketch2 = tf.transpose( tf.sparse_tensor_dense_matmul(sparse_sketch_matrix2, bottom2_flat, adjoint_a=True, adjoint_b=True)) # Step 2: FFT fft1 = _fft(tf.complex(real=sketch1, imag=tf.zeros_like(sketch1)), sequential, compute_size) fft2 = _fft(tf.complex(real=sketch2, imag=tf.zeros_like(sketch2)), sequential, compute_size) # Step 3: Elementwise product fft_product = tf.multiply(fft1, fft2) # Step 4: Inverse FFT and reshape back # Compute output shape dynamically: [batch_size, height, width, output_dim] cbp_flat = tf.real(_ifft(fft_product, sequential, compute_size)) output_shape = tf.add(tf.multiply(tf.shape(bottom1), [1, 1, 1, 0]), [0, 0, 0, output_dim]) cbp = tf.reshape(cbp_flat, output_shape) # Step 5: Sum pool over spatial dimensions, if specified if sum_pool: cbp = tf.reduce_sum(cbp, reduction_indices=[1, 2]) return cbp
ind = tf.concat([the_range, b_2], 1) res = tf.gather_nd(a, ind) return res # Create the network, tf variables and cost function here. #x = tf.placeholder("float", [None, n_input]) #y = tf.placeholder("float", [None, n_classes]) x = tf.sparse_placeholder(tf.float64) y = tf.sparse_placeholder(tf.float64) W = tf.Variable(tf.zeros([n_input, n_classes - 1], dtype=tf.float64), dtype=tf.float64) #Matrix_Mul= tf.matmul(x,W) Matrix_Mul = tf.sparse_tensor_dense_matmul(x, W) Zeros = tf.zeros([tf.shape(x)[0], 1], tf.float64) Matrix_concat = tf.concat([Matrix_Mul, Zeros], 1) Mx = tf.expand_dims(tf.reduce_max(Matrix_concat, reduction_indices=[1]), 1) Ax = tf.add(tf.exp(-Mx), tf.expand_dims(tf.reduce_sum(tf.exp(Matrix_Mul - Mx), 1), 1)) T = tf.one_hot((n_classes - 1) * tf.ones([tf.shape(x)[0]], tf.int64), depth=n_classes, on_value=np.float64(0.0), off_value=np.float64(1.0), dtype=tf.float64) ## (y==c)*e^<x,vc> pre_temp = tf.multiply(T, tf.exp(Matrix_concat))
def gen_rotated_filters(w, filter_type, input_layer, nr_orients_out, basis_filters=None, rot_info=None): """ Generate the rotated filters either by phase manipulation or direct rotation of planar filter. Cyclic permutation of channels is performed for kernels on the group G. Args: w: coefficients used to perform a linear combination of basis filters filter_type: either 'steerable' or 'standard' input_layer (bool): whether 1st layer convolution or not nr_orients_out: number of output filter orientations basis_filters: atomic basis filters rot_info: array to determine how to rotate filters Returns: rot_filters: rotated steerable basis filters, with cyclic permutation if not the first layer """ if filter_type == "steerable": # if using steerable filters, then rotate by phase manipulation rot_filters = [None] * nr_orients_out for orientation in range(nr_orients_out): rot_info_tmp = tf.expand_dims(rot_info[..., orientation], -1) filter_tmp = w * rot_info_tmp * basis_filters # phase manipulation rot_filters[orientation] = filter_tmp # [nr_orients_out, J, K, K, nr_orients_in, filters_in, filters_out] (M: nr frequencies, R: nr radial profile params) rot_filters = tf.stack(rot_filters) # Linear combination of basis filters # [nr_orients_out, K, K, nr_orients_in, filters_in, filters_out] rot_filters = tf.reduce_sum(rot_filters, axis=1) # Get real part of filters # [nr_orients_out, K, K, nr_orients_in, filters_in, filters_out] rot_filters = tf.math.real(rot_filters, name="filters") else: # if using regular kernels, rotate by sparse matrix multiplication # [K, K, nr_orients_in, filters_in, filters_out] filter_shape = w.get_shape().as_list() # Flatten the filter filter_flat = tf.reshape( w, [ filter_shape[0] * filter_shape[1], filter_shape[2] * filter_shape[3] * filter_shape[4], ], ) # Generate a set of rotated kernels via rotation matrix multiplication idx, vals = MultiRotationOperatorMatrixSparse( [filter_shape[0], filter_shape[1]], nr_orients_out, periodicity=2 * np.pi, diskMask=True, ) # Sparse rotation matrix rotOp_matrix = tf.SparseTensor( idx, vals, [ nr_orients_out * filter_shape[0] * filter_shape[1], filter_shape[0] * filter_shape[1], ], ) # Matrix multiplication rot_filters = tf.sparse_tensor_dense_matmul(rotOp_matrix, filter_flat) # [nr_orients_out * K * K, filters_in * filters_out] # Reshape the filters to [nr_orients_out, K, K, nr_orients_in, filters_in, filters_out] rot_filters = tf.reshape( rot_filters, [ nr_orients_out, filter_shape[0], filter_shape[1], filter_shape[2], filter_shape[3], filter_shape[4], ], ) # Do not cycle filter for input convolution f: Z2 -> G if input_layer is False: shape_list = rot_filters.get_shape().as_list() # cycle channels - [nr_orients_out, K, K, nr_orients_in, filters_in, filters_out] rot_filters = cycle_channels(rot_filters, shape_list) return rot_filters
def __init__(self, layer_sizes=None, layer_acts=None, layer_keeps=None, layer_l2=None, kernel_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w1', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('k1', [factor_order * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(('w%d' % i, [layer_input, layer_output], 'tnormal',)) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] l = tf.nn.dropout( utils.activate( tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) + b0[i] for i in range(num_inputs)], 1), layer_acts[0]), layer_keeps[0]) w1 = self.vars['w1'] k1 = self.vars['k1'] b1 = self.vars['b1'] z = tf.reduce_sum(tf.reshape(l, [-1, num_inputs, factor_order]), 1) p = tf.reshape( tf.matmul(tf.reshape(z, [-1, factor_order, 1]), tf.reshape(z, [-1, 1, factor_order])), [-1, factor_order * factor_order]) l = tf.nn.dropout( utils.activate( tf.matmul(l, w1) + tf.matmul(p, k1) + b1, layer_acts[1]), layer_keeps[1]) for i in range(2, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), layer_keeps[i]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: for i in range(num_inputs): self.loss += layer_l2[0] * tf.nn.l2_loss(w0[i]) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] # bi = self.vars['b%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) if kernel_l2 is not None: self.loss += kernel_l2 * tf.nn.l2_loss(k1) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
#x_values=tf.placeholder(tf.float32,[None]) #x_shape=tf.placeholder(tf.int64,[2]) x = tf.sparse_placeholder(tf.float64) y = tf.placeholder(tf.float64, [None, 1]) #parametor to train V = tf.Variable(tf.zeros([n_features, k], dtype=tf.float64), dtype=tf.float64, name="vk") b = tf.Variable(tf.zeros([1], dtype=tf.float64), dtype=tf.float64, name="b") w = tf.Variable(tf.random_normal((n_features, 1), dtype=tf.float64), dtype=tf.float64, name="w") #model logic #x=tf.SparseTensor(x_indices,x_values,x_shape) vx = tf.sparse_tensor_dense_matmul(x, V) vx_sq = tf.multiply(vx, vx) xx = tf.square(x) vsq_xsq = tf.sparse_tensor_dense_matmul(xx, V * V) biterm = vx_sq - vsq_xsq # here i just add one k to 1 full_layer,you can add as many layers as you like, # the only difference with my fm implementation is this line of code preds = tf.nn.sigmoid( tf.sparse_tensor_dense_matmul(x, w) + 0.5 * full_layer(biterm, 1) + b) cost = tf.reduce_mean(-y * tf.log(tf.clip_by_value(preds, 1e-10, 1.0)) - (1 - y) * tf.log(tf.clip_by_value(1 - preds, 1e-10, 1.0))) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) threshold = tf.constant(0.5, dtype=tf.float64) plabel = tf.cast(threshold < y, tf.float64) accuracy = tf.metrics.accuracy(y, plabel)
def buildRecGCN(self): self.isSocial = tf.placeholder(tf.int32) self.isSocial = tf.cast(self.isSocial, tf.bool) self.isAttentive = tf.placeholder(tf.int32) self.isAttentive = tf.cast(self.isAttentive, tf.bool) self.sampledItems = tf.placeholder(tf.int32) self.d_weights = dict() ego_embeddings = tf.concat( [self.user_embeddings, self.item_embeddings], axis=0) indices = [[ self.data.user[item[0]], self.num_users + self.data.item[item[1]] ] for item in self.data.trainingData] indices += [[ self.num_users + self.data.item[item[1]], self.data.user[item[0]] ] for item in self.data.trainingData] values = [ float(item[2]) / sqrt(len(self.data.trainSet_u[item[0]])) / sqrt(len(self.data.trainSet_i[item[1]])) for item in self.data.trainingData ] * 2 norm_adj = tf.SparseTensor(indices=indices, values=values, dense_shape=[ self.num_users + self.num_items, self.num_users + self.num_items ]) initializer = tf.contrib.layers.xavier_initializer() all_embeddings = [ego_embeddings] for k in range(self.n_layers_D): self.d_weights['attention_m1%d' % k] = tf.Variable( initializer([self.embed_size, self.embed_size]), name='attention_m1%d' % k) self.d_weights['attention_m2%d' % k] = tf.Variable( initializer([self.embed_size, self.embed_size]), name='attention_m2%d' % k) self.d_weights['attention_v%d' % k] = tf.Variable( initializer([1, self.embed_size * 2]), name='attention_v1%d' % k) vals, indexes = tf.nn.top_k(self.alternativeNeighborhood, self.K) for k in range(self.n_layers_D): new_embeddings = tf.sparse_tensor_dense_matmul( norm_adj, ego_embeddings) #social attention (applying attention may be a little time-consuming) # selectedItemEmbeddings = tf.gather(ego_embeddings[self.num_users:],self.sampledItems) # indexes = tf.cast(indexes,tf.float32) # userEmbeddings = tf.matmul(ego_embeddings[:self.num_users],self.d_weights['attention_m1%d' % k]) # itemEmbeddings = tf.matmul(selectedItemEmbeddings, self.d_weights['attention_m2%d' % k]) # attentionEmbeddings = tf.concat([indexes,userEmbeddings],axis=1) # attentionEmbeddings = tf.concat([attentionEmbeddings, itemEmbeddings], axis=1) # # def attention(embedding): # alternativeNeighors,u_embedding,i_embedding = tf.split(tf.reshape(embedding,[1,self.K+2*self.embed_size]),[self.K,self.embed_size,self.embed_size],axis=1) # alternativeNeighors = tf.cast(alternativeNeighors[0],tf.int32) # friendsEmbedding = tf.gather(ego_embeddings[:self.num_users],alternativeNeighors) # friendsEmbedding = tf.matmul(friendsEmbedding,self.d_weights['attention_m1%d' % k]) # i_embedding = tf.reshape(tf.concat([i_embedding] * self.K, 1), [self.K, self.embed_size]) # res = tf.reduce_sum(tf.multiply(self.d_weights['attention_v%d' % k],tf.sigmoid(tf.concat([friendsEmbedding + u_embedding, i_embedding],1))), 1) # weights = tf.nn.softmax(res) # socialEmbedding = tf.matmul(tf.reshape(weights,[1,self.K]),tf.gather(ego_embeddings[:self.num_users],alternativeNeighors)) # return socialEmbedding[0] #attentive_socialEmbeddings = tf.vectorized_map(fn=lambda em: attention(em),elems=attentionEmbeddings) nonattentive_socialEmbeddings = tf.matmul( self.alternativeNeighborhood, ego_embeddings[:self.num_users]) / self.K def without_attention(): return nonattentive_socialEmbeddings def with_attention(): return nonattentive_socialEmbeddings #to use attention, this part should be modified def without_social(): return new_embeddings def with_social(embeddings): socialEmbeddings = tf.cond(self.isAttentive, lambda: with_attention(), lambda: without_attention()) return tf.concat( [(embeddings[:self.num_users] + socialEmbeddings), embeddings[self.num_users:]], 0) ego_embeddings = tf.cond(self.isSocial, lambda: with_social(new_embeddings), lambda: without_social()) # normalize the distribution of embeddings. norm_embeddings = tf.math.l2_normalize(ego_embeddings, axis=1) all_embeddings += [norm_embeddings] all_embeddings = tf.reduce_sum(all_embeddings, 0) self.multi_user_embeddings, self.multi_item_embeddings = tf.split( all_embeddings, [self.num_users, self.num_items], 0) self.neg_idx = tf.placeholder(tf.int32, name="neg_holder") self.neg_item_embedding = tf.nn.embedding_lookup( self.multi_item_embeddings, self.neg_idx) self.u_embedding = tf.nn.embedding_lookup(self.multi_user_embeddings, self.u_idx) self.v_embedding = tf.nn.embedding_lookup(self.multi_item_embeddings, self.v_idx)
def generateUserEmbeddingFromSocialNeighbors(self, current_user_embedding): user_embedding_from_social_neighbors = tf.sparse_tensor_dense_matmul( self.social_neighbors_sparse_matrix, current_user_embedding) return user_embedding_from_social_neighbors
import tensorflow as tf from util import load_full_graph, elapse import time import numpy as np d = 0.85 graph = 'web-Stanford.txt' #'small_graph.txt') graph = 'LCC.txt' n, indices, values = load_full_graph(graph) print("reading dataset done") with tf.device('/device:GPU:0'): m = tf.SparseTensor(indices=indices, values=values, dense_shape=[n, n]) p = tf.get_variable("pagerank", trainable=False, initializer=tf.constant(1.0 / n, shape=[n, 1])) new_p = d * tf.sparse_tensor_dense_matmul(m, p) + (1 - d) / n delta = tf.abs(new_p - p) / p # make sure assign happens later with tf.control_dependencies([delta]): assignment = p.assign(new_p) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) start = time.time() sess.run(p.initializer) i = 0 max_iter = 1000 while i < max_iter: res = sess.run([delta, assignment]) norm = np.sort(np.reshape(res[0], -1)) norm = norm[int(n * 0.99)] print("iteration {}:".format(i))
def initModel(self): super(SEPT, self).initModel() self.neg_idx = tf.placeholder(tf.int32, name="neg_holder") self._create_variable() self.bs_matrix = self.get_birectional_social_matrix() self.rating_mat = self.buildSparseRatingMatrix() social_mat, sharing_mat = self.get_social_related_views( self.bs_matrix, self.rating_mat) social_mat = self._convert_sp_mat_to_sp_tensor(social_mat) sharing_mat = self._convert_sp_mat_to_sp_tensor(sharing_mat) self.user_embeddings = tf.Variable(tf.truncated_normal( shape=[self.num_users, self.emb_size], stddev=0.005), name='U') / 2 self.item_embeddings = tf.Variable(tf.truncated_normal( shape=[self.num_items, self.emb_size], stddev=0.005), name='V') / 2 # initialize adjacency matrices ui_mat = self.get_adj_mat() ui_mat = self._convert_sp_mat_to_sp_tensor(ui_mat) friend_view_embeddings = self.user_embeddings sharing_view_embeddings = self.user_embeddings all_social_embeddings = [friend_view_embeddings] all_sharing_embeddings = [sharing_view_embeddings] ego_embeddings = tf.concat( [self.user_embeddings, self.item_embeddings], axis=0) all_embeddings = [ego_embeddings] aug_embeddings = ego_embeddings all_aug_embeddings = [ego_embeddings] #multi-view convolution: LightGCN structure for k in range(self.n_layers): # friend view friend_view_embeddings = tf.sparse_tensor_dense_matmul( social_mat, friend_view_embeddings) norm_embeddings = tf.math.l2_normalize(friend_view_embeddings, axis=1) all_social_embeddings += [norm_embeddings] # sharing view sharing_view_embeddings = tf.sparse_tensor_dense_matmul( sharing_mat, sharing_view_embeddings) norm_embeddings = tf.math.l2_normalize(sharing_view_embeddings, axis=1) all_sharing_embeddings += [norm_embeddings] # preference view ego_embeddings = tf.sparse_tensor_dense_matmul( ui_mat, ego_embeddings) norm_embeddings = tf.math.l2_normalize(ego_embeddings, axis=1) all_embeddings += [norm_embeddings] # unlabeled sample view aug_embeddings = tf.sparse_tensor_dense_matmul( self.sub_mat['sub_mat'], aug_embeddings) norm_embeddings = tf.math.l2_normalize(aug_embeddings, axis=1) all_aug_embeddings += [norm_embeddings] # multi-view convolution: NGCF structure # initializer = tf.contrib.layers.xavier_initializer() # self.weights = dict() # for k in range(self.n_layers): # for view in range(4): # self.weights['W_%d_1_%d' %(k,view)] = tf.Variable( # initializer([self.emb_size,self.emb_size]), name='W_%d_1_%d' %(k,view)) # self.weights['W_%d_2_%d' %(k,view)] = tf.Variable( # initializer([self.emb_size,self.emb_size]), name='W_%d_2_%d' %(k,view)) # # for k in range(self.n_layers): # #friend view # side_embeddings = tf.sparse_tensor_dense_matmul(social_mat,friend_view_embeddings) # sum_embeddings = tf.matmul(side_embeddings+friend_view_embeddings, self.weights['W_%d_1_0' % k]) # bi_embeddings = tf.multiply(friend_view_embeddings, side_embeddings) # bi_embeddings = tf.matmul(bi_embeddings, self.weights['W_%d_2_0' % k]) # friend_view_embeddings = tf.nn.leaky_relu(sum_embeddings+bi_embeddings) # norm_embeddings = tf.math.l2_normalize(friend_view_embeddings, axis=1) # all_social_embeddings += [norm_embeddings] # #sharing view # side_embeddings = tf.sparse_tensor_dense_matmul(sharing_mat,sharing_view_embeddings) # sum_embeddings = tf.matmul(side_embeddings+sharing_view_embeddings, self.weights['W_%d_1_1' % k]) # bi_embeddings = tf.multiply(sharing_view_embeddings, side_embeddings) # bi_embeddings = tf.matmul(bi_embeddings, self.weights['W_%d_2_1' % k]) # sharing_view_embeddings = tf.nn.leaky_relu(sum_embeddings+bi_embeddings) # norm_embeddings = tf.math.l2_normalize(sharing_view_embeddings, axis=1) # all_sharing_embeddings += [norm_embeddings] # #preference view # side_embeddings = tf.sparse_tensor_dense_matmul(ui_mat, ego_embeddings) # sum_embeddings = tf.matmul(side_embeddings+ego_embeddings, self.weights['W_%d_1_2' % k]) # bi_embeddings = tf.multiply(ego_embeddings, side_embeddings) # bi_embeddings = tf.matmul(bi_embeddings, self.weights['W_%d_2_2' % k]) # ego_embeddings = tf.nn.leaky_relu(sum_embeddings+bi_embeddings) # norm_embeddings = tf.math.l2_normalize(ego_embeddings, axis=1) # all_embeddings += [norm_embeddings] # # unlabeled sample view # side_embeddings = tf.sparse_tensor_dense_matmul(self.sub_mat['sub_mat'], aug_embeddings) # sum_embeddings = tf.matmul(side_embeddings+aug_embeddings, self.weights['W_%d_1_3' % k]) # bi_embeddings = tf.multiply(aug_embeddings, side_embeddings) # bi_embeddings = tf.matmul(bi_embeddings, self.weights['W_%d_2_3' % k]) # aug_embeddings = tf.nn.leaky_relu(sum_embeddings+bi_embeddings) # norm_embeddings = tf.math.l2_normalize(aug_embeddings, axis=1) # all_aug_embeddings += [norm_embeddings] # averaging the view-specific embeddings self.friend_view_embeddings = tf.reduce_sum(all_social_embeddings, axis=0) self.sharing_view_embeddings = tf.reduce_sum(all_sharing_embeddings, axis=0) all_embeddings = tf.reduce_sum(all_embeddings, axis=0) self.rec_user_embeddings, self.rec_item_embeddings = tf.split( all_embeddings, [self.num_users, self.num_items], 0) aug_embeddings = tf.reduce_sum(all_aug_embeddings, axis=0) self.aug_user_embeddings, self.aug_item_embeddings = tf.split( aug_embeddings, [self.num_users, self.num_items], 0) # embedding look-up self.batch_user_emb = tf.nn.embedding_lookup(self.rec_user_embeddings, self.u_idx) self.batch_pos_item_emb = tf.nn.embedding_lookup( self.rec_item_embeddings, self.v_idx) self.batch_neg_item_emb = tf.nn.embedding_lookup( self.rec_item_embeddings, self.neg_idx)
def generateUserEmebddingFromConsumedItems(self, current_item_embedding): user_embedding_from_consumed_items = tf.sparse_tensor_dense_matmul( self.consumed_items_sparse_matrix, current_item_embedding) return user_embedding_from_consumed_items
def gcn_layer(norm_adj_mat, h=None, W=None, b=None): if h is None: return tf.add(tf.sparse_tensor_dense_matmul(norm_adj_mat, tf.sparse_tensor_dense_matmul(W, h)), b) else: return tf.add(tf.matmul(tf.sparse_tensor_dense_matmul(norm_adj_mat, h), W), b)
self.loss=tf.reduce_mean(tf.pow(self.y_input-self.decoder_2,2)) self.optimizer=tf.train.GradientDescentOptimizer(learningRate).minimize(self.loss) self.init=tf.global_variables_initializer() def train(self,execRange=1000): self.sess.run(self.init) for curIteration in range(execRange): _,curLoss=self.sess.run([self.optimizer,self.loss],feed_dict={self.x_input:self.X,self.y_input:self.Y}) if(curIteration % 100==0): print("The loss at step {} is {}".format(curIteration,curLoss)) import tensorflow as tf import numpy as np with tf.Session() as sess: sparse_place = tf.sparse_placeholder(tf.float64) some_dense_tensor = tf.placeholder("float64",(2,2)) mul_result = tf.sparse_tensor_dense_matmul(sparse_place, some_dense_tensor) dense_version = tf.sparse_tensor_to_dense(sparse_place) sess.run(dense_version, feed_dict={sparse_place: tf.SparseTensorValue([[0,1], [2,2]], [1.5, 2.9], [3, 3]),some_dense_tensor:np.array([[1,2],[3,4]])}) sparse_input = tf.sparse_placeholder(dtype=tf.float32, shape=[100, 100]) coo_matrix = scipy.sparse.coo_matrix(...) # Wrap `coo_matrix` in the `tf.SparseTensorValue` form that TensorFlow expects. # SciPy stores the row and column coordinates as separate vectors, so we must # stack and transpose them to make an indices matrix of the appropriate shape. tf_coo_matrix = tf.SparseTensorValue( indices=np.array([coo_matrix.rows, coo_matrix.cols]).T, values=coo_matrix.data, dense_shape=coo_matrix.shape)
minval = minval_init, maxval = maxval_init)) Bias_2 = tf.variable(tf.random_uniform(shape = [L_2_node], minval = minval_init, maxval = maxval_init)) ## third layer ## Weigth_3 = tf.Variable(tf.random_uniform(shape = [L_2_node, OUTPUT_D], minval = minval_init, maxval = maxval_init)) Bias_3 = tf.Variable(tf.random_uniform(shape = [OUTPUT_D], minval = minval_init, maxval = maxval_init)) ## 3) define the full-connection layer operation query_1_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(query_batch, Weight_1) + Bias_1 ) doc_1_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(doc_batch, Weight_1) + Bias_1 ) query_2_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(query_1_out_batch, Weight_2) + Bias_2 ) doc_2_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(doc_1_out_batch, Weight_2) + Bias_2 ) query_3_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(query_2_out_batch, Weight_3) + Bias_3 ) doc_3_out = tf.nn.relu( tf.sparse_tensor_dense_matmul(doc_2_out_batch, Weight_3) + Bias_3
def SecondGCNLayerWithoutActiveFun(norm_adj_mat, h, W, b): return tf.add(tf.matmul(tf.sparse_tensor_dense_matmul(norm_adj_mat, h), W), b)
def build_model(_indices, _values, _values2, _shape, _y, _ind, feature_cnt, field_cnt, params): eta = tf.constant(params['eta']) _x = tf.SparseTensor(_indices, _values, _shape) # m * feature_cnt sparse tensor _xx = tf.SparseTensor(_indices, _values2, _shape) model_params = [] tmp = [] init_value = params['init_value'] dim = params['dim'] layer_sizes = params['layer_sizes'] # w_linear = tf.Variable(tf.truncated_normal([feature_cnt, 1], stddev=init_value, mean=0), name='w_linear', # dtype=tf.float32) w_linear = tf.Variable( tf.truncated_normal( [feature_cnt, 1], stddev=init_value, mean=0 ), #tf.random_uniform([feature_cnt, 1], minval=-0.05, maxval=0.05), name='w_linear', dtype=tf.float32) bias = tf.Variable(tf.truncated_normal([1], stddev=init_value, mean=0), name='bias') model_params.append(bias) model_params.append(w_linear) preds = bias # linear part preds += tf.sparse_tensor_dense_matmul(_x, w_linear, name='contr_from_linear') w_fm = tf.Variable(tf.truncated_normal([feature_cnt, dim], stddev=init_value / math.sqrt(float(dim)), mean=0), name='w_fm', dtype=tf.float32) model_params.append(w_fm) # fm order 2 interactions if params['is_use_fm_part']: preds = preds + 0.5 * tf.reduce_sum( tf.pow(tf.sparse_tensor_dense_matmul(_x, w_fm), 2) - tf.sparse_tensor_dense_matmul(_xx, tf.pow(w_fm, 2)), 1, keep_dims=True) ## deep neural network if params['is_use_dnn_part']: w_fm_nn_input = tf.reshape( tf.gather(w_fm, _ind) * tf.expand_dims(_values, 1), [-1, field_cnt * dim]) print(w_fm_nn_input.shape) # tmp.append(tf.shape(tf.expand_dims(_values, 1))) # tmp.append(tf.shape(w_fm_nn_input)) # tmp.append(tf.shape(tf.gather(w_fm, _ind) * tf.expand_dims(_values, 1))) # tmp.append(tf.shape(tf.gather(w_fm, _ind))) #w_nn_layers = [] hidden_nn_layers = [] hidden_nn_layers.append(w_fm_nn_input) last_layer_size = field_cnt * dim layer_idx = 0 w_nn_params = [] b_nn_params = [] for layer_size in layer_sizes: cur_w_nn_layer = tf.Variable(tf.truncated_normal( [last_layer_size, layer_size], stddev=init_value / math.sqrt(float(10)), mean=0), name='w_nn_layer' + str(layer_idx), dtype=tf.float32) cur_b_nn_layer = tf.Variable( tf.truncated_normal([layer_size], stddev=init_value, mean=0), name='b_nn_layer' + str(layer_idx) ) #tf.get_variable('b_nn_layer' + str(layer_idx), [layer_size], initializer=tf.constant_initializer(0.0)) cur_hidden_nn_layer = tf.nn.xw_plus_b(hidden_nn_layers[layer_idx], cur_w_nn_layer, cur_b_nn_layer) if params['activations'][layer_idx] == 'tanh': cur_hidden_nn_layer = tf.nn.tanh(cur_hidden_nn_layer) elif params['activations'][layer_idx] == 'sigmoid': cur_hidden_nn_layer = tf.nn.sigmoid(cur_hidden_nn_layer) elif params['activations'][layer_idx] == 'relu': cur_hidden_nn_layer = tf.nn.relu(cur_hidden_nn_layer) #cur_hidden_nn_layer = tf.matmul(hidden_nn_layers[layer_idx], cur_w_nn_layer) #w_nn_layers.append(cur_w_nn_layer) hidden_nn_layers.append(cur_hidden_nn_layer) layer_idx += 1 last_layer_size = layer_size model_params.append(cur_w_nn_layer) model_params.append(cur_b_nn_layer) w_nn_params.append(cur_w_nn_layer) b_nn_params.append(cur_b_nn_layer) w_nn_output = tf.Variable(tf.truncated_normal([last_layer_size, 1], stddev=init_value, mean=0), name='w_nn_output', dtype=tf.float32) nn_output = tf.matmul(hidden_nn_layers[-1], w_nn_output) model_params.append(w_nn_output) w_nn_params.append(w_nn_output) preds += nn_output if params['loss'] == 'cross_entropy_loss': # 'loss': 'log_loss' error = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=tf.reshape(preds, [-1]), labels=tf.reshape(_y, [-1]))) elif params['loss'] == 'square_loss': preds = tf.sigmoid(preds) error = tf.reduce_mean(tf.squared_difference(preds, _y)) elif params['loss'] == 'log_loss': preds = tf.sigmoid(preds) error = tf.reduce_mean(tf.losses.log_loss(predictions=preds, labels=_y)) lambda_w_linear = tf.constant(params['reg_w_linear'], name='lambda_w_linear') lambda_w_fm = tf.constant(params['reg_w_fm'], name='lambda_w_fm') lambda_w_nn = tf.constant(params['reg_w_nn'], name='lambda_nn_fm') lambda_w_l1 = tf.constant(params['reg_w_l1'], name='lambda_w_l1') # l2_norm = tf.multiply(lambda_w_linear, tf.pow(bias, 2)) + tf.reduce_sum( # tf.add(tf.multiply(lambda_w_linear, tf.pow(w_linear, 2)), # tf.multiply(lambda_w_fm, tf.pow(w_fm, 2)))) + tf.reduce_sum( # tf.multiply(lambda_w_nn, tf.pow(w_nn_output, 2))) # l2_norm = tf.multiply(lambda_w_linear, tf.pow(bias, 2)) \ # + tf.multiply(lambda_w_linear, tf.reduce_sum(tf.pow(w_linear, 2))) l2_norm = tf.multiply(lambda_w_linear, tf.reduce_sum(tf.pow(w_linear, 2))) l2_norm += tf.multiply(lambda_w_l1, tf.reduce_sum(tf.abs(w_linear))) if params['is_use_fm_part'] or params['is_use_dnn_part']: l2_norm += tf.multiply(lambda_w_fm, tf.reduce_sum(tf.pow(w_fm, 2))) if params['is_use_dnn_part']: for i in range(len(w_nn_params)): l2_norm += tf.multiply(lambda_w_nn, tf.reduce_sum(tf.pow(w_nn_params[i], 2))) for i in range(len(b_nn_params)): l2_norm += tf.multiply(lambda_w_nn, tf.reduce_sum(tf.pow(b_nn_params[i], 2))) # tmp.append(tf.shape(tf.pow(w_linear, 2))) # tmp.append(tf.shape(tf.pow(w_fm, 2))) loss = tf.add(error, l2_norm) if params['optimizer'] == 'adadelta': train_step = tf.train.AdadeltaOptimizer(eta).minimize( loss, var_list=model_params) # elif params['optimizer'] == 'sgd': train_step = tf.train.GradientDescentOptimizer( params['learning_rate']).minimize(loss, var_list=model_params) elif params['optimizer'] == 'adam': train_step = tf.train.AdamOptimizer(params['learning_rate']).minimize( loss, var_list=model_params) elif params['optimizer'] == 'ftrl': train_step = tf.train.FtrlOptimizer(params['learning_rate']).minimize( loss, var_list=model_params) else: train_step = tf.train.GradientDescentOptimizer( params['learning_rate']).minimize(loss, var_list=model_params) tf.summary.scalar('square_error', error) tf.summary.scalar('loss', loss) tf.summary.histogram('linear_weights_hist', w_linear) if params['is_use_fm_part']: tf.summary.histogram('fm_weights_hist', w_fm) if params['is_use_dnn_part']: for idx in range(len(w_nn_params)): tf.summary.histogram('nn_layer' + str(idx) + '_weights', w_nn_params[idx]) merged_summary = tf.summary.merge_all() return train_step, loss, error, preds, merged_summary, tmp
def fit(self, documents, inst_labels, feat_labels, feat_tasks, model_dir, validation_set=None): """ Fit the model: encoder and predictor Parameters ---------- documents : a dictionary of (str, list) pairs. d[inst] = word_seq inst: str, instance id word_seq: a list of word strings inst_labels : dictionary. d[inst][label] = prob(label|inst) inst: str, instance id label: str, label name feat_labels : dictionary. d[feat][label] = prob(label|feat) feat: str, feature string label: str, label name feat_tasks : list. [f1, f2, ...] each element is a feature string. Returns ------------ self : object Returns self. """ if len(inst_labels) == 0 and len(feat_labels) == 0: sys.stderr.write( 'learner::fit(): neither instance label nor feature label is provided. Nothing to fit.\n' ) return self # ========================================================================= # extract input data # make meta data as member variables, so that they can be accessed from outside self.inst_labels = inst_labels self.feat_labels = feat_labels self.feat_tasks = feat_tasks # print 'documents', documents sys.stderr.write('learner::fit(): Extracting features ...\n') self.feat_idx = get_feature_idx(documents, self.max_vocab, feat_labels, feat_tasks) _, self.feat_doc_freq = get_feature_stats(documents, self.feat_idx) if self.is_sparse: self.inst_idx, doc = featurize_bow(documents, self.feat_idx, tf='count', idf=self.feat_doc_freq) elif self.dense_architecture == DenseArch.TWO_LAYER_TFIDF_EMB: self.inst_idx, doc = featurize_seq_sp(documents, self.feat_idx) else: self.feat_idx = dict([('PADDING_TOKEN', 0)] + [(k, v + 1) for k, v in self.feat_idx.items()]) self.inst_idx, doc = featurize_seq(documents, self.feat_idx) # print 'self.inst_idx', self.inst_idx # print 'self.feat_idx', self.feat_idx # print 'doc', doc.dict, doc.shape #print 'doc', doc[1], doc.shape self.label_idx = get_label_idx(inst_labels, feat_labels) # print 'self.label_idx', self.label_idx i_target = get_i_target(self.inst_idx, self.label_idx, inst_labels) # print 'i_target', i_target feat_doc_assn = get_feat_inst_assn(self.inst_idx, self.feat_idx, feat_labels, documents) # print 'feat_doc_assn', feat_doc_assn.dict, feat_doc_assn.shape # print 'feat_doc_assn', feat_doc_assn.shape f_target = get_f_target(self.feat_idx, self.label_idx, feat_labels) # print 'feat_labels', feat_labels # print 'f_target', f_target if self.is_sparse: g_doc = None g_target = None # g_doc = get_g_doc_bow(doc, self.feat_idx, feat_tasks) # g_target = get_g_target_bow(doc, self.feat_idx, feat_tasks) # print 'g_doc', g_doc.dict, g_doc.shape pass else: g_doc = get_g_doc_seq(doc, self.feat_idx, feat_tasks) g_target = get_g_target_seq(doc, self.feat_idx, feat_tasks) # print 'g_doc', g_doc, g_doc.shape # print 'g_target', g_target if self.dense_architecture == DenseArch.ONE_LAYER: self.embedding_size = len(self.label_idx) if validation_set != None: val_documents, val_labels = validation_set val_max_perf = -1.0 # ========================================================================= # computational graph for training, different than the model itself. It leads to the objective value. sys.stderr.write('learner::fit(): Building computational graph ...\n') # no hidden layer self._build_inputs() self._build_variables() objective = tf.constant(0, dtype=tf.float32) # build up objective: will use i_target, f_target, g_target if self.is_sparse: if len(inst_labels) > 0: i_logits = tf.sparse_tensor_dense_matmul(self.ph_i_doc, self.W) i_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( labels=self.ph_i_target, logits=i_logits)) objective += tf.cond(self.ph_i_dotrain, lambda: i_loss, lambda: 0.) if len(feat_labels) > 0: f_logits = tf.sparse_tensor_dense_matmul(self.ph_f_doc, self.W) f_pred = self._feature_label_dist(f_logits, self.ph_feat_doc_assn) f_loss = -tf.reduce_sum( self.ph_f_target * tf.log(f_pred + 1e-10)) # cross entropy objective += tf.cond(self.ph_f_dotrain, lambda: f_loss, lambda: tf.zeros([1])) else: # prepend intermediate/encoding module before softmax. Now it's just an embedding lookup. Can add LSTM. if len(inst_labels) > 0: i_logits = self._dense_arch(self.W_emb, self.W, self.ph_i_doc) i_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( labels=self.ph_i_target, logits=i_logits)) objective += tf.cond(self.ph_i_dotrain, lambda: i_loss, lambda: 0.) if len(feat_labels) > 0: f_logits = self._dense_arch(self.W_emb, self.W, self.ph_f_doc) f_pred = self._feature_label_dist(f_logits, self.ph_feat_doc_assn) f_loss = -tf.reduce_sum( self.ph_f_target * tf.log(f_pred + 1e-10)) objective += tf.cond(self.ph_f_dotrain, lambda: f_loss, lambda: tf.zeros([1])) if len(feat_tasks) > 0: if self.dense_architecture != DenseArch.ONE_LAYER: g_logits = self._dense_arch( self.W_emb, self.W_g, self.ph_g_doc, reduce_dim=2) # [g x N x l x k] => [g x N x k] objective += tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.ph_g_target, logits=g_logits)) loss_op = objective # ========================================================================= # gradient pipeline self._opt = tf.train.AdamOptimizer( learning_rate=self.learning_rate) # seems to be better than Adam # self._opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate) # seems to be better than Adam grads_and_vars = self._opt.compute_gradients(loss_op) train_op = self._opt.apply_gradients(grads_and_vars, name="train_op") # assign ops self.loss_op = loss_op self.train_op = train_op # training init_op = tf.global_variables_initializer() self.session.run(init_op) sys.stderr.write('learner::fit(): Performing gradient descend ...\n') # ========================================================================= # perform mini-batch SGD training batches = prepare_batch_intervals(len(self.inst_idx), self.batch_size) val_perf_drop_counter = 0 for t in range(1, self.num_epoch + 1): np.random.shuffle(batches) total_cost = 0.0 i = 1 for start, end in batches: cost = self._batch_fit(range(start, end), doc, i_target, feat_doc_assn, f_target, g_doc, g_target) total_cost += cost # print 'batch', i, i*self.batch_size, cost i += 1 sys.stderr.write('epoch,' + str(t) + ',total_cost,' + str(total_cost) + "\n") # print 'epoch', t, 'total_cost', total_cost if t % self.validation_interval == 0 and validation_set != None: self._refresh_model_params() val_max_perf, is_lower = self._eval_validation_set( val_documents, val_labels, val_max_perf, model_dir) if is_lower: val_perf_drop_counter += 1 else: # clear counter val_perf_drop_counter = 0 if val_perf_drop_counter >= STOP_AFTER_VAL_PERF_DROP_COUNT: break sys.stderr.write('epoch,' + str(t) + ',total_cost,' + str(total_cost) + "\n") # ========================================================================= if validation_set != None: self._refresh_model_params() self._eval_validation_set(val_documents, val_labels, val_max_perf, model_dir) else: # no validation set: save the final model self._refresh_model_params() self.save_model(model_dir) return self
def _build_model(self): self.graph = tf.Graph() with self.graph.as_default(): tf.set_random_seed(self.random_seed) # set random seed # input data self.features = tf.placeholder(tf.int32, shape=[None, self.n_features_total], name='input_features') self.labels = tf.placeholder(tf.float32, shape=[None, 1], name='labels') self.dropout_keep = tf.placeholder(tf.float32, name='keep_prob') # split the input according to field information self.field_values = tf.split( self.features, num_or_size_splits=self.n_features_list, axis=1) # the first embedding layer pretrain_fm_weights = self._process_fm_params() with tf.variable_scope('embedding_layer'): self.b0 = tf.Variable(tf.constant(0.0), name='bias_0') self.W0 = dict() l1 = self.b0 * tf.ones_like(self.labels) for i in range(self.n_fields): self.W0[i] = tf.Variable(pretrain_fm_weights[i], name='embedding_%d' % i) l1 = tf.concat([ l1, tf.sparse_tensor_dense_matmul(self.field_values[i], self.W0[i]) ], axis=1) self.layer1 = l1 # [None, n_fields * embedding_dim + 1] self.layer1 = tf.nn.dropout(self.layer1, keep_prob=self.dropout_keep) # the second layer, which is the first fully connected layer with tf.variable_scope('fc_layer_1'): self.b1 = tf.Variable(tf.constant( 0.0, shape=[1, self.hidden_units[0]]), name='bias_1') self.W1 = tf.Variable(tf.truncated_normal(shape=[ self.n_fields * self.embedding_dim + 1, self.hidden_units[0] ], mean=0.0, stddev=0.1), name='W_1') self.layer2 = tf.nn.relu( tf.matmul(self.layer1, self.W1) + self.b1) # the third layer, which is the second fully connected layer with tf.variable_scope('fc_layer_2'): self.b2 = tf.Variable(tf.constant( 0.0, shape=[1, self.hidden_units[1]]), name='bias_2') self.W2 = tf.Variable(tf.truncated_normal( shape=[self.hidden_units[0], self.hidden_units[1]], mean=0.0, stddev=0.1), name='W_2') self.layer3 = tf.nn.relu( tf.matmul(self.layer2, self.W2) + self.b2) # the last layer, which is the output layer with tf.variable_scope('out_layer'): self.b3 = tf.Variable(tf.constant(0.0, shape=[1, 1]), name='bias_3') self.W3 = tf.Variable(tf.truncated_normal( shape=[self.hidden_units[1], 1], mean=0.0, stddev=0.1), name='W_3') self.fnn = tf.matmul(self.layer3, self.W3) + self.b3 # compute loss and accuracy with tf.variable_scope('loss'): self.predictions = tf.sigmoid(self.fnn) self.loss_f = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.fnn)) total_vars = tf.trainable_variables() self.loss = self.loss_f + tf.add_n( [l2_regularizer(self.norm_coef)(v) for v in total_vars]) self.correct = tf.equal( tf.cast(tf.greater(self.predictions, 0.5), tf.float32), self.labels) self.accuracy = tf.reduce_mean( tf.cast(self.correct, tf.float32)) # build optimizer with tf.variable_scope('optimizer'): if self.optimizer_type.lower() == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.lr) elif self.optimizer_type.lower() == 'adam': self.optimizer = tf.train.AdamOptimizer( learning_rate=self.lr) elif self.optimizer_type.lower() == 'rmsprop': self.optimizer = tf.train.RMSPropOptimizer( learning_rate=self.lr, momentum=self.momentum) elif self.optimizer_type.lower() == 'momentum': self.optimizer = tf.train.MomentumOptimizer( learning_rate=self.lr, momentum=self.momentum) self.optimizer_loss = self.optimizer.minimize(self.loss) # create session and init tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) self.saver = tf.train.Saver() init = tf.global_variables_initializer() self.sess.run(init)
def predict(self, documents, raw=False): """ Predict labels for each document Parameters ---------- documents : a dictionary of (str, list) pairs. d[inst] = word_seq inst: str, instance id word_seq: a list of word strings raw: bool, raw=True means output raw predicted matrix. raw=False output cooked dictionary Returns ------------ A dictionary of predictions. d[inst][label] = prob(label|inst) inst: str, instance id label: str, label name prob(label|inst): float """ # the computation graph for prediction is different than that for training # predict_op = tf.argmax(main_task_logits, 1, name="predict_op") # predict_proba_op = tf.nn.softmax(main_task_logits, name="predict_proba_op") # predict_log_proba_op = tf.log(predict_proba_op, name="predict_log_proba_op") # feed_dict = {self._documents: docs} # return self.session.run(self.predict_op, feed_dict=feed_dict) # sys.stderr.write('learner::predict(): performing prediction ...\n') if self.is_sparse: inst_idx, doc = featurize_bow(documents, self.feat_idx, tf='count', df=self.feat_doc_freq) X = tf.sparse_placeholder(tf.float32, [None, None]) W = tf.placeholder(tf.float32, [None, None]) logits = tf.sparse_tensor_dense_matmul(X, W) predict_proba_op = tf.nn.softmax(logits, name="predict_proba_op") p = self.session.run(predict_proba_op, feed_dict={ X: tf.SparseTensorValue( indices=doc.dict.keys(), values=doc.dict.values(), dense_shape=doc.shape), W: self.param_W }) else: inst_idx, doc = featurize_seq(documents, self.feat_idx) X = tf.placeholder(tf.int32, [None, None]) W_emb = tf.placeholder( tf.float32, (len(self.feat_idx) + 1, self.embedding_size)) W = tf.placeholder(tf.float32, [None, None]) logits = self._dense_arch(W_emb, W, X) predict_proba_op = tf.nn.softmax(logits, name="predict_proba_op") p = self.session.run(predict_proba_op, feed_dict={ X: doc, W_emb: self.param_W_emb, W: self.param_W }) if raw: return p pred = {} for inst_id, i in inst_idx.items(): pred[inst_id] = {} for label_id, j in self.label_idx.items(): pred[inst_id][label_id] = p[i, j] return pred
def GCNLayer(self, gcn_in, in_dim, gcn_dim, batch_size, max_nodes, max_labels, adj, num_layers=1, name="GCN"): """ GCN Layer Implementation Parameters ---------- gcn_in: Input to GCN Layer in_dim: Dimension of input to GCN Layer gcn_dim: Hidden state dimension of GCN batch_size: Batch size max_nodes: Maximum number of nodes in graph max_labels: Maximum number of edge labels adj: Adjacency matrix indices num_layers: Number of GCN Layers name Name of the layer (used for creating variables, keep it different for different layers) Returns ------- out List of output of different GCN layers with first element as input itself, i.e., [gcn_in, gcn_layer1_out, gcn_layer2_out ...] """ out = [] out.append(gcn_in) for layer in range(num_layers): gcn_in = out[ -1] # out contains the output of all the GCN layers, intitally contains input to first GCN Layer if len(out) > 1: in_dim = gcn_dim # After first iteration the in_dim = gcn_dim with tf.name_scope('%s-%d' % (name, layer)): act_sum = tf.zeros([batch_size, max_nodes, gcn_dim]) for lbl in range(max_labels): with tf.variable_scope('label-%d_name-%s_layer-%d' % (lbl, name, layer)) as scope: w_in = tf.get_variable( 'w_in', [in_dim, gcn_dim], initializer=tf.contrib.layers.xavier_initializer(), regularizer=self.regularizer) b_in = tf.get_variable( 'b_in', [1, gcn_dim], initializer=tf.constant_initializer(0.0), regularizer=self.regularizer) w_out = tf.get_variable( 'w_out', [in_dim, gcn_dim], initializer=tf.contrib.layers.xavier_initializer(), regularizer=self.regularizer) b_out = tf.get_variable( 'b_out', [1, gcn_dim], initializer=tf.constant_initializer(0.0), regularizer=self.regularizer) w_loop = tf.get_variable( 'w_loop', [in_dim, gcn_dim], initializer=tf.contrib.layers.xavier_initializer(), regularizer=self.regularizer) if self.p.wGate: w_gin = tf.get_variable( 'w_gin', [in_dim, 1], initializer=tf.contrib.layers. xavier_initializer(), regularizer=self.regularizer) b_gin = tf.get_variable( 'b_gin', [1], initializer=tf.constant_initializer(0.0), regularizer=self.regularizer) w_gout = tf.get_variable( 'w_gout', [in_dim, 1], initializer=tf.contrib.layers. xavier_initializer(), regularizer=self.regularizer) b_gout = tf.get_variable( 'b_gout', [1], initializer=tf.constant_initializer(0.0), regularizer=self.regularizer) w_gloop = tf.get_variable( 'w_gloop', [in_dim, 1], initializer=tf.contrib.layers. xavier_initializer(), regularizer=self.regularizer) with tf.name_scope('in_arcs-%s_name-%s_layer-%d' % (lbl, name, layer)): inp_in = tf.tensordot( gcn_in, w_in, axes=[2, 0]) + tf.expand_dims(b_in, axis=0) in_t = tf.stack([ tf.sparse_tensor_dense_matmul( adj[i][lbl], inp_in[i]) for i in range(batch_size) ]) if self.p.dropout != 1.0: in_t = tf.nn.dropout(in_t, keep_prob=self.p.dropout) if self.p.wGate: inp_gin = tf.tensordot(gcn_in, w_gin, axes=[ 2, 0 ]) + tf.expand_dims(b_gin, axis=0) in_gate = tf.stack([ tf.sparse_tensor_dense_matmul( adj[i][lbl], inp_gin[i]) for i in range(batch_size) ]) in_gsig = tf.sigmoid(in_gate) in_act = in_t * in_gsig else: in_act = in_t with tf.name_scope('out_arcs-%s_name-%s_layer-%d' % (lbl, name, layer)): inp_out = tf.tensordot(gcn_in, w_out, axes=[ 2, 0 ]) + tf.expand_dims(b_out, axis=0) out_t = tf.stack([ tf.sparse_tensor_dense_matmul( tf.sparse_transpose(adj[i][lbl]), inp_out[i]) for i in range(batch_size) ]) if self.p.dropout != 1.0: out_t = tf.nn.dropout(out_t, keep_prob=self.p.dropout) if self.p.wGate: inp_gout = tf.tensordot( gcn_in, w_gout, axes=[2, 0]) + tf.expand_dims( b_gout, axis=0) out_gate = tf.stack([ tf.sparse_tensor_dense_matmul( tf.sparse_transpose(adj[i][lbl]), inp_gout[i]) for i in range(batch_size) ]) out_gsig = tf.sigmoid(out_gate) out_act = out_t * out_gsig else: out_act = out_t with tf.name_scope('self_loop'): inp_loop = tf.tensordot(gcn_in, w_loop, axes=[2, 0]) if self.p.dropout != 1.0: inp_loop = tf.nn.dropout(inp_loop, keep_prob=self.p.dropout) if self.p.wGate: inp_gloop = tf.tensordot(gcn_in, w_gloop, axes=[2, 0]) loop_gsig = tf.sigmoid(inp_gloop) loop_act = inp_loop * loop_gsig else: loop_act = inp_loop act_sum += in_act + out_act + loop_act gcn_out = tf.nn.relu(act_sum) out.append(gcn_out) return out
def __init__(self, layer_sizes=None, layer_acts=None, layer_keeps=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): init_vars = [] num_inputs = len(layer_sizes[0]) embedding_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = embedding_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('f1', [embedding_order, layer_sizes[2], 1, 2], 'tnormal', dtype)) init_vars.append(('f2', [embedding_order, layer_sizes[3], 2, 2], 'tnormal', dtype)) init_vars.append(('w1', [2 * 3 * embedding_order, 1], 'tnormal', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] l = tf.nn.dropout( utils.activate( tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) + b0[i] for i in range(num_inputs)], 1), layer_acts[0]), layer_keeps[0]) l = tf.transpose(tf.reshape(l, [-1, num_inputs, embedding_order, 1]), [0, 2, 1, 3]) f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), num_inputs / 2), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate( tf.reshape(l, [-1, embedding_order * 3 * 2]), layer_acts[1]), layer_keeps[1]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.nn.dropout( utils.activate( tf.matmul(l, w1) + b1, layer_acts[2]), layer_keeps[2]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def _build_graph(self): """ Build a computation graph that represents the model """ rnn_inputs = self._build_input() # rnn_inputs: a list of num_step tensors, # each tensor of size (batch_size, query_embed_size). # self.rank=3 if self.rank==1: self.rnn_inputs = [tf.reshape(q, [-1, self.query_embed_size]) for q in tf.split(rnn_inputs, self.num_step, axis=1)] self.rnn_inputs_new=[a for i, a in enumerate(self.rnn_inputs) if i< self.num_step-1] #print(len(self.rnn_inputs_new)) cell = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell = tf.contrib.rnn.MultiRNNCell( [cell] * self.num_layer, state_is_tuple=True) init_state = self.cell.zero_state(tf.shape(self.tails)[0], tf.float32) ##### making backward cells cell_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell_bw = tf.contrib.rnn.MultiRNNCell( [cell_bw] * self.num_layer, state_is_tuple=True) init_state_bw = self.cell_bw.zero_state(tf.shape(self.tails)[0], tf.float32) # rnn_outputs: a list of num_step tensors, # each tensor of size (batch_size, rnn_state_size). self.rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( self.cell, self.cell_bw, self.rnn_inputs_new, initial_state_fw=init_state, initial_state_bw=init_state_bw, scope='f1') self.W_0 = tf.Variable(np.random.randn( self.rnn_state_size*2, self.num_operator + 1), dtype=tf.float32) self.b_0 = tf.Variable(np.zeros( (1, self.num_operator + 1)), dtype=tf.float32) # attention_operators: a list of num_step lists, # each inner list has num_operator tensors, # each tensor of size (batch_size, 1). # Each tensor represents the attention over an operator. self.attention_operators = [tf.split( tf.nn.softmax( tf.matmul(rnn_output, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output in self.rnn_outputs] # attention_memories: (will be) a list of num_step tensors, # each of size (batch_size, t+1), # where t is the current step (zero indexed). # Each tensor represents the attention over currently populated memory cells. # self.attention_memories = [] # memories: (will be) a tensor of size (batch_size, t+1, num_entity), # where t is the current step (zero indexed) # Then tensor represents currently populated memory cells. self.memories = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.database = {r: tf.sparse_placeholder( dtype=tf.float32, name="database_%d" % r) for r in xrange(self.num_operator/2)} for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read = self.memories[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read = tf.transpose(memory_read) for r in xrange(self.num_operator/2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators[t][r], self.attention_operators[t][r+self.num_operator/2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read) * self.attention_operators[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1.-self.dropout) # Populate a new cell in memory by concatenating. self.memories = tf.concat( [self.memories, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions1 = memory_read # -------------- end ------------------- self.predictions = self.predictions1 if self.rank==3: self.rnn_inputs = [tf.reshape(q, [-1, self.query_embed_size]) for q in tf.split(rnn_inputs, self.num_step, axis=1)] self.rnn_inputs_new=[a for i, a in enumerate(self.rnn_inputs) if i< self.num_step-1] #print(len(self.rnn_inputs_new)) cell = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell = tf.contrib.rnn.MultiRNNCell( [cell] * self.num_layer, state_is_tuple=True) init_state = self.cell.zero_state(tf.shape(self.tails)[0], tf.float32) cell2 = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell2 = tf.contrib.rnn.MultiRNNCell( [cell2] * self.num_layer, state_is_tuple=True) init_state2 = self.cell2.zero_state(tf.shape(self.tails)[0], tf.float32) cell3 = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell3 = tf.contrib.rnn.MultiRNNCell( [cell3] * self.num_layer, state_is_tuple=True) init_state3 = self.cell3.zero_state(tf.shape(self.tails)[0], tf.float32) ##### making backward cells cell_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell_bw = tf.contrib.rnn.MultiRNNCell( [cell_bw] * self.num_layer, state_is_tuple=True) init_state_bw = self.cell_bw.zero_state(tf.shape(self.tails)[0], tf.float32) cell2_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell2_bw = tf.contrib.rnn.MultiRNNCell( [cell2_bw] * self.num_layer, state_is_tuple=True) init_state2_bw = self.cell2_bw.zero_state(tf.shape(self.tails)[0], tf.float32) cell3_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell3_bw = tf.contrib.rnn.MultiRNNCell( [cell3_bw] * self.num_layer, state_is_tuple=True) init_state3_bw = self.cell3_bw.zero_state(tf.shape(self.tails)[0], tf.float32) # rnn_outputs: a list of num_step tensors, # each tensor of size (batch_size, rnn_state_size). self.rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( self.cell, self.cell_bw, self.rnn_inputs_new, initial_state_fw=init_state, initial_state_bw=init_state_bw, scope='f1') self.rnn_outputs2, _, _= tf.contrib.rnn.static_bidirectional_rnn( self.cell2, self.cell2_bw, self.rnn_inputs_new, initial_state_fw=init_state2, initial_state_bw=init_state2_bw, scope='f2') self.rnn_outputs3, _, _= tf.contrib.rnn.static_bidirectional_rnn( self.cell3, self.cell3_bw, self.rnn_inputs_new, initial_state_fw=init_state3, initial_state_bw=init_state3_bw, scope='f3') # making a NN here with 128 hidden units and leaky relu self.W_0 = tf.Variable(np.random.randn( self.rnn_state_size*2, self.num_operator + 1), dtype=tf.float32) self.b_0 = tf.Variable(np.zeros( (1, self.num_operator + 1)), dtype=tf.float32) # attention_operators: a list of num_step lists, # each inner list has num_operator tensors, # each tensor of size (batch_size, 1). # Each tensor represents the attention over an operator. self.attention_operators = [tf.split( tf.nn.softmax( tf.matmul(rnn_output, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output in self.rnn_outputs] self.attention_operators2 = [tf.split( tf.nn.softmax( tf.matmul(rnn_output2, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output2 in self.rnn_outputs2] self.attention_operators3 = [tf.split( tf.nn.softmax( tf.matmul(rnn_output3, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output3 in self.rnn_outputs3] # attention_memories: (will be) a list of num_step tensors, # each of size (batch_size, t+1), # where t is the current step (zero indexed). # Each tensor represents the attention over currently populated memory cells. # self.attention_memories = [] # memories: (will be) a tensor of size (batch_size, t+1, num_entity), # where t is the current step (zero indexed) # Then tensor represents currently populated memory cells. self.memories = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.memories2 = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.memories3 = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.database = {r: tf.sparse_placeholder( dtype=tf.float32, name="database_%d" % r) for r in xrange(self.num_operator/2)} for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read = self.memories[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read = tf.transpose(memory_read) for r in xrange(self.num_operator/2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators[t][r], self.attention_operators[t][r+self.num_operator/2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read) * self.attention_operators[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1.-self.dropout) # Populate a new cell in memory by concatenating. self.memories = tf.concat( [self.memories, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions1 = memory_read # -------------- second for ------------ for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read2 = self.memories2[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read2 = tf.transpose(memory_read2) for r in xrange(self.num_operator / 2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators2[t][r], self.attention_operators2[t][r + self.num_operator / 2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read2) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read2) * self.attention_operators2[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories2 = tf.concat( [self.memories2, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions2 = memory_read2 # -------------- third for ------------ for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read3 = self.memories3[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read3 = tf.transpose(memory_read3) for r in xrange(self.num_operator / 2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators3[t][r], self.attention_operators3[t][r + self.num_operator / 2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read3) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read3) * self.attention_operators3[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories3 = tf.concat( [self.memories3, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions3 = memory_read3 # -------------- end ------------------- self.predictions = self.predictions1 + self.predictions2+self.predictions3 if self.rank==4: self.rnn_inputs = [tf.reshape(q, [-1, self.query_embed_size]) for q in tf.split(rnn_inputs, self.num_step, axis=1)] self.rnn_inputs_new=[a for i, a in enumerate(self.rnn_inputs) if i< self.num_step-1] #print(len(self.rnn_inputs_new)) cell = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell = tf.contrib.rnn.MultiRNNCell( [cell] * self.num_layer, state_is_tuple=True) init_state = self.cell.zero_state(tf.shape(self.tails)[0], tf.float32) cell2 = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell2 = tf.contrib.rnn.MultiRNNCell( [cell2] * self.num_layer, state_is_tuple=True) init_state2 = self.cell2.zero_state(tf.shape(self.tails)[0], tf.float32) cell3 = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell3 = tf.contrib.rnn.MultiRNNCell( [cell3] * self.num_layer, state_is_tuple=True) init_state3 = self.cell3.zero_state(tf.shape(self.tails)[0], tf.float32) cell4 = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell4 = tf.contrib.rnn.MultiRNNCell( [cell4] * self.num_layer, state_is_tuple=True) init_state4 = self.cell4.zero_state(tf.shape(self.tails)[0], tf.float32) ##### making backward cells cell_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell_bw = tf.contrib.rnn.MultiRNNCell( [cell_bw] * self.num_layer, state_is_tuple=True) init_state_bw = self.cell_bw.zero_state(tf.shape(self.tails)[0], tf.float32) cell2_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell2_bw = tf.contrib.rnn.MultiRNNCell( [cell2_bw] * self.num_layer, state_is_tuple=True) init_state2_bw = self.cell2_bw.zero_state(tf.shape(self.tails)[0], tf.float32) cell3_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell3_bw = tf.contrib.rnn.MultiRNNCell( [cell3_bw] * self.num_layer, state_is_tuple=True) init_state3_bw = self.cell3_bw.zero_state(tf.shape(self.tails)[0], tf.float32) cell4_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cell4_bw = tf.contrib.rnn.MultiRNNCell( [cell4_bw] * self.num_layer, state_is_tuple=True) init_state4_bw = self.cell4_bw.zero_state(tf.shape(self.tails)[0], tf.float32) # rnn_outputs: a list of num_step tensors, # each tensor of size (batch_size, rnn_state_size). self.rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( self.cell, self.cell_bw, self.rnn_inputs_new, initial_state_fw=init_state, initial_state_bw=init_state_bw, scope='f1') self.rnn_outputs2, _, _= tf.contrib.rnn.static_bidirectional_rnn( self.cell2, self.cell2_bw, self.rnn_inputs_new, initial_state_fw=init_state2, initial_state_bw=init_state2_bw, scope='f2') self.rnn_outputs3, _, _= tf.contrib.rnn.static_bidirectional_rnn( self.cell3, self.cell3_bw, self.rnn_inputs_new, initial_state_fw=init_state3, initial_state_bw=init_state3_bw, scope='f3') self.rnn_outputs4, _, _= tf.contrib.rnn.static_bidirectional_rnn( self.cell4, self.cell4_bw, self.rnn_inputs_new, initial_state_fw=init_state4, initial_state_bw=init_state4_bw, scope='f4') # making a NN here with 128 hidden units and leaky relu self.W_0 = tf.Variable(np.random.randn( self.rnn_state_size*2, self.num_operator + 1), dtype=tf.float32) self.b_0 = tf.Variable(np.zeros( (1, self.num_operator + 1)), dtype=tf.float32) # attention_operators: a list of num_step lists, # each inner list has num_operator tensors, # each tensor of size (batch_size, 1). # Each tensor represents the attention over an operator. self.attention_operators = [tf.split( tf.nn.softmax( tf.matmul(rnn_output, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output in self.rnn_outputs] self.attention_operators2 = [tf.split( tf.nn.softmax( tf.matmul(rnn_output2, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output2 in self.rnn_outputs2] self.attention_operators3 = [tf.split( tf.nn.softmax( tf.matmul(rnn_output3, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output3 in self.rnn_outputs3] self.attention_operators4 = [tf.split( tf.nn.softmax( tf.matmul(rnn_output4, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output4 in self.rnn_outputs4] # attention_memories: (will be) a list of num_step tensors, # each of size (batch_size, t+1), # where t is the current step (zero indexed). # Each tensor represents the attention over currently populated memory cells. # self.attention_memories = [] # memories: (will be) a tensor of size (batch_size, t+1, num_entity), # where t is the current step (zero indexed) # Then tensor represents currently populated memory cells. self.memories = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.memories2 = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.memories3 = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.memories4 = tf.expand_dims( tf.one_hot( indices=self.tails, depth=self.num_entity), 1) self.database = {r: tf.sparse_placeholder( dtype=tf.float32, name="database_%d" % r) for r in xrange(self.num_operator/2)} for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read = self.memories[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read = tf.transpose(memory_read) for r in xrange(self.num_operator/2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators[t][r], self.attention_operators[t][r+self.num_operator/2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read) * self.attention_operators[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1.-self.dropout) # Populate a new cell in memory by concatenating. self.memories = tf.concat( [self.memories, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions1 = memory_read # -------------- second for ------------ for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read2 = self.memories2[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read2 = tf.transpose(memory_read2) for r in xrange(self.num_operator / 2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators2[t][r], self.attention_operators2[t][r + self.num_operator / 2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read2) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read2) * self.attention_operators2[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories2 = tf.concat( [self.memories2, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions2 = memory_read2 # -------------- third for ------------ for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read3 = self.memories3[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read3 = tf.transpose(memory_read3) for r in xrange(self.num_operator / 2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators3[t][r], self.attention_operators3[t][r + self.num_operator / 2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read3) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read3) * self.attention_operators3[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories3 = tf.concat( [self.memories3, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions3 = memory_read3 # -------------- forth for ------------ for t in xrange(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read4 = self.memories4[:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read4 = tf.transpose(memory_read4) for r in xrange(self.num_operator / 2): for op_matrix, op_attn in zip( [self.database[r], tf.sparse_transpose(self.database[r])], [self.attention_operators4[t][r], self.attention_operators4[t][r + self.num_operator / 2]]): product = tf.sparse_tensor_dense_matmul(op_matrix, memory_read4) database_results.append(tf.transpose(product) * op_attn) database_results.append(tf.transpose(memory_read4) * self.attention_operators4[t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum(self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout(added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories4 = tf.concat( [self.memories4, tf.expand_dims(added_database_results, 1)], axis=1) else: self.predictions4 = memory_read4 # -------------- end ------------------- self.predictions = self.predictions1 + self.predictions2+self.predictions3+self.predictions4 print(self.rank) self.final_loss = - tf.reduce_sum(self.targets * tf.log(tf.maximum(self.predictions, self.thr)), 1) if not self.accuracy: self.in_top = tf.nn.in_top_k( predictions=self.predictions, targets=self.heads, k=self.top_k) else: _, indices = tf.nn.top_k(self.predictions, self.top_k, sorted=False) self.in_top = tf.equal(tf.squeeze(indices), self.heads) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) gvs = self.optimizer.compute_gradients(tf.reduce_mean(self.final_loss)) capped_gvs = map( lambda (grad, var): self._clip_if_not_None(grad, var, -5., 5.), gvs) self.optimizer_step = self.optimizer.apply_gradients(capped_gvs)