def __init__(self, sess, n_nodes, args): self.sess = sess self.result_dir = args.result_dir self.dataset_name = args.dataset_name self.n_nodes = n_nodes self.n_hidden = args.n_hidden self.n_embedding = args.n_embedding self.dropout = args.dropout self.learning_rate = args.learning_rate self.max_iteration = args.max_iteration self.shape = np.array([self.n_nodes, self.n_nodes]) self.adjacency = tf.sparse_placeholder(tf.float32, shape=self.shape, name='adjacency') self.norm_adj_mat = tf.sparse_placeholder(tf.float32, shape=self.shape, name='norm_adj_mat') self.keep_prob = tf.placeholder(tf.float32) self.W_0_mu = None self.W_1_mu = None self.W_0_sigma = None self.W_1_sigma = None self.mu_np = [] self.sigma_np = [] self._build_VGAE()
def __init__(self, **hparam ): ''' vocab_size, emb_size, enc_func, dec_func, is_tied_params, lambda_w, learning_rate, type_of_opt (adadelta)rho, (adam)beta1, beta2, epsilon ''' self.vocab_size = hparam['vocab_size'] if 'vocab_size' in hparam else 100000 self.emb_size = hparam['emb_size'] if 'emb_size' in hparam else 64 self.is_tied_params = hparam['is_tied_params'] if 'is_tied_params' in hparam else False self.init_value = hparam['init_value'] if 'init_value' in hparam else 0.01 self.lambda_w = hparam['lambda_w'] if 'lambda_w' in hparam else 0.001 self.lr = hparam['learning_rate'] if 'learning_rate' in hparam else 0.001 self.opt = hparam['type_of_opt'] if 'type_of_opt' in hparam else 'adam' self.rho = hparam['rho'] if 'rho' in hparam else 0.95 self.epsilon = hparam['epsilon'] if 'epsilon' in hparam else 1e-8 self.beta1 = hparam['beta1'] if 'beta1' in hparam else 0.9 self.beta2 = hparam['beta2'] if 'beta2' in hparam else 0.999 self.enc_func = self.get_activation_func(hparam['enc_func'] if 'enc_func' in hparam else 'tanh') self.dec_func = self.get_activation_func(hparam['dec_func'] if 'dec_func' in hparam else 'tanh') self.summary_path = hparam['tf_summary_file'] if 'tf_summary_file' in hparam else 'log_tmp_path' self.saver = None self.X = tf.sparse_placeholder(tf.float32) self.Y = tf.sparse_placeholder(tf.float32) self.mask = tf.sparse_placeholder(tf.float32) self.params = {} self.W = tf.Variable( tf.truncated_normal([self.vocab_size, self.emb_size], stddev=self.init_value / math.sqrt(float(self.emb_size)), mean=0), name='encoder_W' , dtype=tf.float32 ) self.b = tf.Variable(tf.truncated_normal([self.emb_size], stddev=self.init_value * 0.001, mean=0), name='encoder_bias', dtype=tf.float32 ) self.params['W'] = self.W self.params['b'] = self.b if not self.is_tied_params: self.W_prime = tf.Variable( tf.truncated_normal([self.emb_size, self.vocab_size], stddev=self.init_value / math.sqrt(float(self.emb_size)), mean=0), name='decoder_W' , dtype=tf.float32 ) self.params['W_prime'] = self.W_prime else: self.W_prime = tf.transpose(self.W) self.b_prime = tf.Variable(tf.truncated_normal([self.vocab_size], stddev=self.init_value * 0.001, mean=0), name='decoder_W', dtype=tf.float32 ) self.params['b_prime'] = self.b_prime self.encoded_values, self.decoded_values, self.masked_decoded_values, self.error, self.loss, self.train_step, self.summary = self.build_model() self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.log_writer = tf.summary.FileWriter(self.summary_path, graph = self.sess.graph) self._glo_ite_counter = 0
def test_edit_distance(): graph = tf.Graph() with graph.as_default(): truth = tf.sparse_placeholder(tf.int32) hyp = tf.sparse_placeholder(tf.int32) editDist = tf.edit_distance(hyp, truth, normalize=False) with tf.Session(graph=graph) as session: truthTest = sparse_tensor_feed([[0,1,2], [0,1,2,3,4]]) hypTest = sparse_tensor_feed([[3,4,5], [0,1,2,2]]) feedDict = {truth: truthTest, hyp: hypTest} dist = session.run([editDist], feed_dict=feedDict) print(dist)
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) # 初始化变量w, b w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, filter_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) init_vars.append(('f1', [embed_size, filter_sizes[0], 1, 2], 'xavier', dtype)) init_vars.append(('f2', [embed_size, filter_sizes[1], 2, 2], 'xavier', dtype)) init_vars.append(('w1', [2 * 3 * embed_size, 1], 'xavier', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) l = xw l = tf.transpose(tf.reshape(l, [-1, num_inputs, embed_size, 1]), [0, 2, 1, 3]) f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), int(num_inputs / 2)), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate( tf.reshape(l, [-1, embed_size * 3 * 2]), layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.matmul(l, w1) + b1 l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def testMatchingTensorInfoProtoMaps(self): sig1 = _make_signature({ "x": tf.placeholder(tf.int32, [2]), }, { "x": tf.placeholder(tf.int32, [2]), }) sig2 = _make_signature({ "x": tf.placeholder(tf.int32, [2]), }, { "x": tf.sparse_placeholder(tf.int64, [2]), }) self.assertTrue( tensor_info.tensor_info_proto_maps_match(sig1.inputs, sig2.inputs)) self.assertFalse( tensor_info.tensor_info_proto_maps_match(sig1.outputs, sig2.outputs)) sig3 = _make_signature({ "x": tf.placeholder(tf.int32, [None]), }, { "x": tf.placeholder(tf.int32, [2]), }) self.assertFalse( tensor_info.tensor_info_proto_maps_match(sig1.inputs, sig3.inputs)) self.assertTrue( tensor_info.tensor_info_proto_maps_match(sig1.outputs, sig3.outputs))
def __init__(self,args): super(seqMLP, self).__init__() self.args = args self.batch_size=args.batch_size self.input_data = tf.placeholder(tf.float32,[self.args.batch_size,self.args.sentence_length,self.args.word_dim],name='inputdata') self.output_data = tf.sparse_placeholder(tf.float32, name='outputdata') #[None, 114] self.dense_outputdata= tf.sparse_tensor_to_dense(self.output_data) self.keep_prob = tf.placeholder(tf.float32,name='keep_prob_NER') self.entMentIndex = tf.placeholder(tf.int32,[None,5],name='ent_mention_index') self.entCtxLeftIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxleft_index') self.entCtxRightIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxright_index') self.pos_f1 = tf.placeholder(tf.float32,[None,5,1]) self.pos_f2 = tf.placeholder(tf.float32,[None,10,1]) self.pos_f3 = tf.placeholder(tf.float32,[None,10,1]) self.figerHier = np.asarray(cPickle.load(open('data/figer/figerhierarchical.p','rb')),np.float32) #add the hierarchy features self.layers={} self.layers['fullyConnect'] = layers_lib.FullyConnection(self.args.class_size) used = tf.sign(tf.reduce_max(tf.abs(self.input_data),reduction_indices=2)) self.length = tf.cast(tf.reduce_sum(used,reduction_indices=1),tf.int32) with tf.device('/gpu:0'): self.prediction,self.loss_lm = self.cl_loss_from_embedding(self.input_data) print 'self.loss_lm:',self.loss_lm _,self.adv_loss = self.adversarial_loss() print 'self.adv_loss:',self.adv_loss self.loss = tf.add(self.loss_lm,self.adv_loss)
def add_placeholders(self): # the batch_size and max_stepsize每步都是变长的。 self.input_tensor = tf.placeholder(tf.float32, [None, None, n_input + (2 * n_input * n_context)], name='input') # 语音log filter bank or MFCC features self.text = tf.sparse_placeholder(tf.int32, name='text') # 文本 self.seq_length = tf.placeholder(tf.int32, [None], name='seq_length') # 序列长 self.keep_dropout = tf.placeholder(tf.float32)
def _run_test_als(self, use_factors_weights_cache): with self.test_session(): col_init = np.random.rand(7, 3) als_model = factorization_ops.WALSModel( 5, 7, 3, col_init=col_init, row_weights=None, col_weights=None, use_factors_weights_cache=use_factors_weights_cache) als_model.initialize_op.run() als_model.worker_init.run() als_model.row_update_prep_gramian_op.run() als_model.initialize_row_update_op.run() process_input_op = als_model.update_row_factors(self._wals_inputs)[1] process_input_op.run() row_factors1 = [x.eval() for x in als_model.row_factors] wals_model = factorization_ops.WALSModel( 5, 7, 3, col_init=col_init, row_weights=0, col_weights=0, use_factors_weights_cache=use_factors_weights_cache) wals_model.initialize_op.run() wals_model.worker_init.run() wals_model.row_update_prep_gramian_op.run() wals_model.initialize_row_update_op.run() process_input_op = wals_model.update_row_factors(self._wals_inputs)[1] process_input_op.run() row_factors2 = [x.eval() for x in wals_model.row_factors] for r1, r2 in zip(row_factors1, row_factors2): self.assertAllClose(r1, r2, atol=1e-3) # Here we test partial column updates. sp_c = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[2, 0], shuffle=True).eval() sp_feeder = tf.sparse_placeholder(tf.float32) feed_dict = {sp_feeder: sp_c} als_model.col_update_prep_gramian_op.run() als_model.initialize_col_update_op.run() process_input_op = als_model.update_col_factors(sp_input=sp_feeder)[1] process_input_op.run(feed_dict=feed_dict) col_factors1 = [x.eval() for x in als_model.col_factors] feed_dict = {sp_feeder: sp_c} wals_model.col_update_prep_gramian_op.run() wals_model.initialize_col_update_op.run() process_input_op = wals_model.update_col_factors(sp_input=sp_feeder)[1] process_input_op.run(feed_dict=feed_dict) col_factors2 = [x.eval() for x in wals_model.col_factors] for c1, c2 in zip(col_factors1, col_factors2): self.assertAllClose(c1, c2, rtol=5e-3, atol=1e-2)
def __init__(self, mode): self.mode = mode # image self.inputs = tf.placeholder(tf.float32, [None, FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) # SparseTensor required by ctc_loss op self.labels = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] # self.seq_len = tf.placeholder(tf.int32, [None]) # l2 self._extra_train_ops = []
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) print('num_inputs:{0}\\t\tlayer_size:{1}'.format(num_inputs, layer_sizes)) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) # 为每个特征值初始化一个长度为10的向量 node_in = num_inputs * embed_size # 将每个特征embeding 为10维的向量, 总共16个特征,所以是160个输入 网络为[160, 500, 1] for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] print('init_vars:', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) # 将每个特征的隐含向量连起来,组成网络的输入,160维 l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('第{0}个隐藏层l.shape, wi.shape, bi.shape'.format(i), l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) # 从tensor中删除所有大小是1的维度 self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def get_train_model(): # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs, features = convolutional_layers() # print features.get_shape() # inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]]) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op. targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) # Defining the cell # Can be: # tf.nn.rnn_cell.RNNCell # tf.nn.rnn_cell.GRUCell # cell = tf.contrib.rnn.LSTMCell(common.num_hidden, state_is_tuple=True) # Stacking rnn cells stack = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(0, common.num_layers)], state_is_tuple=True) # The second output is the last state and we will no use that outputs, _ = tf.nn.dynamic_rnn(stack, features, seq_len, dtype=tf.float32) shape = tf.shape(features) batch_s, max_timesteps = shape[0], shape[1] # Reshaping to apply the same weights over the timesteps outputs = tf.reshape(outputs, [-1, common.num_hidden]) # Truncated normal with mean 0 and stdev=0.1 # Tip: Try another initialization # see https://www.tensorflow.org/versions/r0.9/api_docs/python/contrib.layers.html#initializers W = tf.Variable(tf.truncated_normal([common.num_hidden, common.num_classes], stddev=0.1), name="W") # Zero initialization # Tip: Is tf.zeros_initializer the same? b = tf.Variable(tf.constant(0., shape=[common.num_classes]), name="b") # Doing the affine projection logits = tf.matmul(outputs, W) + b # Reshaping back to the original shape logits = tf.reshape(logits, [batch_s, -1, common.num_classes]) # Time major logits = tf.transpose(logits, (1, 0, 2)) return logits, inputs, targets, seq_len, W, b
def testBuildInputMap(self): x = tf.placeholder(tf.int32, [2]) y = tf.sparse_placeholder(tf.string, [None]) sig = _make_signature({"x": x, "y": y}, {}) input_map = tensor_info.build_input_map(sig.inputs, {"x": x, "y": y}) self.assertEquals(len(input_map), 4) self.assertEquals(input_map[x.name], x) self.assertEquals(input_map[y.indices.name], y.indices) self.assertEquals(input_map[y.values.name], y.values) self.assertEquals(input_map[y.dense_shape.name], y.dense_shape)
def _run_test_als_transposed(self, use_factors_weights_cache): with self.test_session(): col_init = np.random.rand(7, 3) als_model = factorization_ops.WALSModel( 5, 7, 3, col_init=col_init, row_weights=None, col_weights=None, use_factors_weights_cache=use_factors_weights_cache) als_model.initialize_op.run() als_model.worker_init.run() wals_model = factorization_ops.WALSModel( 5, 7, 3, col_init=col_init, row_weights=[0] * 5, col_weights=[0] * 7, use_factors_weights_cache=use_factors_weights_cache) wals_model.initialize_op.run() wals_model.worker_init.run() sp_feeder = tf.sparse_placeholder(tf.float32) # Here test partial row update with identical inputs but with transposed # input for als. sp_r_t = np_matrix_to_tf_sparse(INPUT_MATRIX, [3, 1], transpose=True).eval() sp_r = np_matrix_to_tf_sparse(INPUT_MATRIX, [3, 1]).eval() feed_dict = {sp_feeder: sp_r_t} als_model.row_update_prep_gramian_op.run() als_model.initialize_row_update_op.run() process_input_op = als_model.update_row_factors(sp_input=sp_feeder, transpose_input=True)[1] process_input_op.run(feed_dict=feed_dict) # Only updated row 1 and row 3, so only compare these rows since others # have randomly initialized values. row_factors1 = [als_model.row_factors[0].eval()[1], als_model.row_factors[0].eval()[3]] feed_dict = {sp_feeder: sp_r} wals_model.row_update_prep_gramian_op.run() wals_model.initialize_row_update_op.run() process_input_op = wals_model.update_row_factors(sp_input=sp_feeder)[1] process_input_op.run(feed_dict=feed_dict) # Only updated row 1 and row 3, so only compare these rows since others # have randomly initialized values. row_factors2 = [wals_model.row_factors[0].eval()[1], wals_model.row_factors[0].eval()[3]] for r1, r2 in zip(row_factors1, row_factors2): self.assertAllClose(r1, r2, atol=1e-3)
def testBuildOutputMap(self): x = tf.placeholder(tf.int32, [2]) y = tf.sparse_placeholder(tf.string, [None]) sig = _make_signature({}, {"x": x, "y": y}) def _get_tensor(name): return tf.get_default_graph().get_tensor_by_name(name) output_map = tensor_info.build_output_map(sig.outputs, _get_tensor) self.assertEquals(len(output_map), 2) self.assertEquals(output_map["x"], x) self.assertEquals(output_map["y"].indices, y.indices) self.assertEquals(output_map["y"].values, y.values) self.assertEquals(output_map["y"].dense_shape, y.dense_shape)
def __init__(self,args): ''' @time: 2016/12/20 @editor: wujs @function: also need to return the candidates entity mentions lstm representation ''' super(seqCNN, self).__init__() self.args = args self.batch_size=args.batch_size self.input_data = tf.placeholder(tf.float32,[self.args.batch_size,self.args.sentence_length,self.args.word_dim],name='inputdata') print 'self.input_data:',self.input_data self.output_data = tf.sparse_placeholder(tf.float32, name='outputdata') self.keep_prob = tf.placeholder(tf.float32,name='keep_prob_NER') self.pos_f1 = tf.placeholder(tf.float32,[None,5,1]) self.pos_f2 = tf.placeholder(tf.float32,[None,10,1]) self.pos_f3 = tf.placeholder(tf.float32,[None,10,1]) self.entMentIndex = tf.placeholder(tf.int32,[None,5],name='ent_mention_index') self.entCtxLeftIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxleft_index') self.entCtxRightIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxright_index') if args.datasets == 'figer': self.hier = np.asarray(cPickle.load(open('data/figer/figerhierarchical.p','rb')),np.float32) #add the hierarchy features else: self.hier = np.asarray(cPickle.load(open('data/OntoNotes/OntoNoteshierarchical.p','rb')),np.float32) self.pred_bias = tf.Variable(tf.zeros([self.args.class_size]), name="pred_bias") self.layers={} self.layers['CNN'] = layers_lib.CNN(filters=[1,2,3,4,5],word_embedding_size=self.args.word_dim+1,num_filters=5) self.layers['fullyConnect_ment'] = layers_lib.FullyConnection(self.args.class_size,name='FullyConnection_ment') # 90 is the row of type hierical self.layers['fullyConnect_ctx'] = layers_lib.FullyConnection(self.args.class_size,name='FullyConnection_ctx') #self.layers['fullyConnect_ctx'] = layers_lib.FullyConnection(np.shape(self.hier)[0],name='FullyConnection_ctx') self.dense_outputdata= tf.sparse_tensor_to_dense(self.output_data) print 'self.dense_outputdata:', self.dense_outputdata self.prediction,self.loss_lm = self.cl_loss_from_embedding(self.input_data) print 'self.loss_lm:',self.loss_lm _,self.adv_loss = self.adversarial_loss() print 'self.adv_loss:',self.adv_loss self.loss = tf.add(self.loss_lm,self.adv_loss)
def testRepr(self): sig = _make_signature({ "x": tf.placeholder(tf.string, [2]), }, { "y": tf.placeholder(tf.int32, [2]), "z": tf.sparse_placeholder(tf.float32, [2, 10]), }) outputs = tensor_info.parse_tensor_info_map(sig.outputs) self.assertEquals( repr(outputs["y"]), "<hub.ParsedTensorInfo shape=(2,) dtype=int32 is_sparse=False>") self.assertEquals( repr(outputs["z"]), "<hub.ParsedTensorInfo shape=(2, 10) dtype=float32 is_sparse=True>")
def testConvertTensors(self): a = tf.placeholder(tf.int32, [None]) protomap = _make_signature({"a": a}, {}).inputs # convert constant in0 = [1, 2, 3] output = tensor_info.convert_to_input_tensors(protomap, {"a": in0}) self.assertEquals(output["a"].dtype, a.dtype) # check sparsity in1 = tf.sparse_placeholder(tf.int32, []) with self.assertRaisesRegexp(TypeError, "dense"): tensor_info.convert_to_input_tensors(protomap, {"a": in1}) # check args mismatch with self.assertRaisesRegexp(TypeError, "missing"): tensor_info.convert_to_input_tensors(protomap, {"b": in1})
def testSparseTensors(self): square_spec = hub.create_module_spec(sparse_square_module_fn) with tf.Graph().as_default(): square = hub.Module(square_spec) v = tf.sparse_placeholder(dtype=tf.int64, name="v") y = square(v) with tf.Session().as_default(): indices = [[0, 0], [0, 1], [1, 1]] values = [10, 2, 1] shape = [2, 2] v1 = tf.SparseTensorValue(indices, values, shape) v2 = y.eval(feed_dict={v: v1}) v4 = y.eval(feed_dict={v: v2}) self.assertAllEqual(v4.indices, indices) # Unchanged. self.assertAllEqual(v4.values, [t**4 for t in values]) # Squared twice. self.assertAllEqual(v4.dense_shape, shape) # Unchanged.
def __init__(self, **hparam): ''' Constructor ''' self.alpha_enc = hparam['alpha_enc'] if 'alpha_enc' in hparam else 0.1 self.X1 = tf.sparse_placeholder(tf.float32) self.Y1 = tf.sparse_placeholder(tf.float32) self.mask1 = tf.sparse_placeholder(tf.float32) self.X2 = tf.sparse_placeholder(tf.float32) self.Y2 = tf.sparse_placeholder(tf.float32) self.mask2 = tf.sparse_placeholder(tf.float32) config.logger.info(str(hparam)) super().__init__(**hparam)
def testParsingTensorInfoProtoMaps(self): sig = _make_signature({ "x": tf.placeholder(tf.string, [2]), }, { "y": tf.placeholder(tf.int32, [2]), "z": tf.sparse_placeholder(tf.float32, [2, 10]), }) inputs = tensor_info.parse_tensor_info_map(sig.inputs) self.assertEquals(set(inputs.keys()), set(["x"])) self.assertEquals(inputs["x"].get_shape(), [2]) self.assertEquals(inputs["x"].dtype, tf.string) self.assertFalse(inputs["x"].is_sparse) outputs = tensor_info.parse_tensor_info_map(sig.outputs) self.assertEquals(set(outputs.keys()), set(["y", "z"])) self.assertEquals(outputs["y"].get_shape(), [2]) self.assertEquals(outputs["y"].dtype, tf.int32) self.assertFalse(outputs["y"].is_sparse) self.assertEquals(outputs["z"].get_shape(), [2, 10]) self.assertEquals(outputs["z"].dtype, tf.float32) self.assertTrue(outputs["z"].is_sparse)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('v', [input_dim, factor_order], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) p = 0.5 * tf.reshape( tf.reduce_sum(xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(xw) + \ l2_v * tf.nn.l2_loss(xv) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def build_model(self): output_layer = tf.layers.Dense( self.pea_size, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) # 全连接层 # Create placeholder self.placeholders = { 'support': tf.sparse_placeholder(tf.float32), 'features': tf.sparse_placeholder( tf.float32, shape=tf.constant( [self.max_input_seq_len, self.max_input_seq_len], dtype=tf.int64)), } self.advantage = tf.placeholder(tf.float32, shape=[self.batch_size], name="advantage") self.outputs = tf.placeholder( tf.float32, shape=[self.batch_size, self.max_output_seq_len], name="outputs") self.enc_input_weights = tf.placeholder( tf.int32, shape=[self.batch_size, self.max_input_seq_len], name="enc_input_weights") self.dec_input_weights = tf.placeholder( tf.int32, shape=[self.batch_size, self.max_output_seq_len - 1], name="dec_input_weights") self.inputs = tf.placeholder(tf.float32, shape=[ self.batch_size, self.max_input_seq_len, self.input_vec_len ], name="inputs") self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step') self.learning_rate = tf.placeholder('float32', None, name='learning_rate') # ======================== Two Layers of GCN ========================= GraphConvolution_1 = GraphConvolution(input_dim=self.max_input_seq_len, output_dim=self.input_vec_len, placeholders=self.placeholders, act=lambda x: x, featureless=True, sparse_inputs=True) inputs = self.placeholders['features'] outputGCN = GraphConvolution_1(inputs) # ======================== encoder define ========================= # Calculate the lengths encoder_inputs = [] for _ in range(self.batch_size): encoder_inputs.append(outputGCN) self.encoder_inputs = tf.stack(encoder_inputs) enc_input_lens = tf.reduce_sum(self.enc_input_weights, axis=1) dec_input_lens = tf.reduce_sum(self.dec_input_weights, axis=1) - 1 self.max_batch_len = tf.reduce_max(enc_input_lens) # self.targets = tf.stack(self.output[1:], axis=1) enc_cell = self._create_rnn_cell() # encoder outputs and state encoder_outputs, encoder_state = tf.nn.dynamic_rnn(enc_cell, self.encoder_inputs, enc_input_lens, dtype=tf.float32) self.encoder_outputs = encoder_outputs self.encoder_state = encoder_state # Tile inputs if forward only # branch # Sized decoder cell # ======================== decoder define ========================= dec_cell_0 = self._create_rnn_cell() if self.use_attention: # branch attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=enc_input_lens) dec_cell = tf.contrib.seq2seq.AttentionWrapper( cell=dec_cell_0, attention_mechanism=attention_mechanism, attention_layer_size=self.rnn_size, name='Attention_Wrapper') batch_size = self.batch_size decoder_initial_state = dec_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_state) ''' attention_mechanism_fw = tf.contrib.seq2seq.BahdanauAttention(num_units=self.rnn_size, memory=encoder_outputs_fw, memory_sequence_length=enc_input_lens_fw) dec_cell_fw = tf.contrib.seq2seq.AttentionWrapper(cell=dec_cell_0, attention_mechanism=attention_mechanism_fw, attention_layer_size=self.rnn_size, name='Attention_Wrapper') batch_size_fw = self.batch_size * self.beam_width decoder_initial_state_fw = dec_cell_fw.zero_state(batch_size=batch_size_fw, dtype=tf.float32).clone(cell_state=encoder_state_fw) ''' else: decoder_initial_state = encoder_state # decoder_initial_state_fw = encoder_state_fw # branch self.decoder_initial_state = decoder_initial_state # self.decoder_initial_state_fw = decoder_initial_state_fw ##################################### forward inference ##################################### shifted_START_ID = START_ID - 2 shifted_END_ID = END_ID - 2 embedding_lookup = np.array([[float(i)] for i in range(2, self.pea_size + 2)], dtype='float32') # embedding_lookup = np.array([[2.0], # start id # [3.0], # pea 0-0 # [4.0], # pea 0-0 # [5.0], # pea 0-1 # [6.0], # pea 0-0 # [7.0], # pea 0-0 # [8.0], # pea 0-0 # [9.0], # pea 0-1 # [10.0], # pea 0-0 # [11.0], # pea 0-0 # [12.0], # pea 0-0 # [13.0], # pea 0-1 # [14.0], # pea 0-0 # [15.0], # pea 0-0 # [16.0], # pea 0-0 # [17.0], # pea 0-1 # [18.0]], # # [19.0], # [20.0], # [21.0], # [22.0], # [23.0], # [24.0], # [25.0], # [26.0], # [27.0],], # pea 1-1 # dtype='float32') self.start_tokens = tf.tile([START_ID], [self.batch_size]), # my_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding=embedding_lookup, # start_tokens=tf.tile([START_ID],[self.batch_size]), # end_token=0) my_helper = tf.contrib.seq2seq.SampleEmbeddingHelper( embedding=embedding_lookup, start_tokens=tf.tile([START_ID], [self.batch_size]), end_token=self.pea_size + 1, softmax_temperature=1.0, seed=int(time.time())) my_decoder = tf.contrib.seq2seq.BasicDecoder( dec_cell, my_helper, decoder_initial_state, output_layer=output_layer # applied per timestep ) print('dynamic decode started....') # actor_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(my_decoder, maximum_iterations=self.max_batch_len-2) actor_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, maximum_iterations=self.max_input_seq_len ) # maximum_iterations=self.max_batch_len+1) # actor_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(my_decoder) self.actor_logits = actor_outputs.rnn_output self.infer_probs = tf.nn.softmax(self.actor_logits) predicted_ids = actor_outputs.sample_id # predicted_ids = actor_outputs.predicted_ids self.predicted_ids = predicted_ids #################################### backward update ##################################### self.outputs_list = tf.unstack(self.outputs, axis=1) decoder_inputs = tf.stack(self.outputs_list[:-2], axis=1) decoder_inputs = tf.reshape( decoder_inputs, [self.batch_size, self.max_input_seq_len, 1]) self.decoder_inputs = decoder_inputs ## print node # decoder_inputs = tf.Print(decoder_inputs, [self.outputs_list, decoder_inputs], "***decoder input***", summarize=100) train_helper = tf.contrib.seq2seq.TrainingHelper( decoder_inputs, dec_input_lens) # Basic Decoder train_decoder = tf.contrib.seq2seq.BasicDecoder( dec_cell, train_helper, decoder_initial_state, output_layer) # Decode train_outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( train_decoder) # logits # cur_batch_max_len = tf.reduce_max(dec_input_lens) cur_batch_max_len = self.max_input_seq_len # logits = output_layer(outputs.rnn_output) train_logits = train_outputs.rnn_output self.train_predicted_ids = train_outputs.sample_id # self.predicted_ids_with_logits=tf.nn.top_k(logits) # Pad logits to the same shape as targets # train_logits = tf.concat([train_logits,tf.ones([self.batch_size, # self.max_output_seq_len-1-cur_batch_max_len, # self.pea_size])],axis=1) # targets self.targets = tf.stack(self.outputs_list[1:-1], axis=1) self.targets = tf.cast( tf.reshape(self.targets, [self.batch_size, self.max_input_seq_len]), tf.int32) # self.shifted_targets = (self.targets-2) * self.dec_input_weights self.shifted_targets = self.targets - 2 # this is negative log of chosen action self.train_logits = train_logits self.probs = tf.nn.softmax(train_logits) self.log_probs = tf.log(tf.nn.softmax(train_logits)) self.neg_log_prob1 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_logits, labels=self.shifted_targets) # self.neg_log_prob1 = tf.Print(self.neg_log_prob1, [self.shifted_targets, self.neg_log_prob1], \ # "train_logits and neg_log_prob1", summarize=100) # self.neg_log_prob2 = tf.reduce_sum(-tf.log(tf.nn.softmax(train_logits, name='act_prob')) * tf.one_hot(self.shifted_targets, self.pea_size), axis=2) self.neg_log_prob2 = tf.reduce_sum( tf.nn.softmax(train_logits, name='act_prob') * tf.one_hot(self.shifted_targets, self.pea_size), axis=2) # self.neg_log_prob2 = -tf.log(tf.nn.softmax(train_logits, name='act_prob')) * tf.one_hot(self.shifted_targets, self.pea_size+1) # self.log_probs = tf.transpose(log_probs,[1,0]) # self.neg_log_probs1 = tf.reduce_sum((self.neg_log_prob1 * tf.cast(self.dec_input_weights, dtype=tf.float32)), axis=1) self.neg_log_probs2 = tf.reduce_sum(self.neg_log_prob1, axis=1) # self.neg_log_probs2 = self.neg_log_probs1 / tf.cast(tf.reduce_sum(self.dec_input_weights, axis=1), dtype=tf.float32) # self.neg_log_probs2 = self.neg_log_probs1 mean_neg_log_probs = tf.reduce_mean(self.neg_log_probs2) mean_advantage = tf.reduce_mean(self.advantage) # self.neg_log_probs2 = tf.reduce_sum(self.neg_log_prob1, axis=1) reinforce = self.advantage * self.neg_log_probs2 self.actor_loss = tf.reduce_mean(reinforce) # self.actor_loss = tf.Print(self.actor_loss, [self.actor_loss, reinforce], \ # "no====", summarize=100) self.learning_rate_op = self.learning_rate # self.learning_rate_op = tf.maximum(1e-6, # tf.train.exponential_decay( # self.init_learning_rate, # self.learning_rate_step, # 1000, # 0.9, # staircase=True)) # optimizer = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01) optimizer = tf.train.AdamOptimizer(self.init_learning_rate) # optimizer = tf.train.AdamOptimizer(learning_rate=self.init_learning_rate, beta1=0.8, beta2=0.888, epsilon=1e-08) grads_and_vars = optimizer.compute_gradients(self.actor_loss) for idx, (grad, var) in enumerate(grads_and_vars): if grad is not None: grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_gradient_norm), var) self.actor_update = optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # actor_optimizer = tf.train.AdamOptimizer(0.001) # self.actor_update = actor_optimizer.minimize(self.actor_loss) # Get all trainable variables parameters = tf.trainable_variables() # Calculate gradients # gradients = tf.gradients(self.actor_loss, parameters) # Clip gradients # clipped_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm) # Optimization # optimizer = tf.train.AdamOptimizer(learning_rate=self.init_learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08) # optimizer = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01) # Update operator # self.actor_update = optimizer.apply_gradients(zip(clipped_gradients, parameters),global_step=self.global_step) # Summarize tf.summary.scalar('loss', self.actor_loss) tf.summary.scalar('nlog_probs', mean_neg_log_probs) tf.summary.scalar('advantage', mean_advantage) # tf.summary.scalar('learning_rate', self.learning_rate_op) for p in parameters: tf.summary.histogram(p.op.name, p) # for p in gradients: # tf.summary.histogram(p.op.name,p) # Summarize operator self.summary_op = tf.summary.merge_all() # Save self.saver = tf.train.Saver(tf.global_variables()) ##################################### backward inference ##################################### '''
def __init__(self, args, sess, name="gcn"): self.input_size = args.input_size self.output_size = args.output_size self.num_supports = args.num_supports self.features_size = args.features_size self.hidden_size = args.hidden_size self.num_labels = args.output_size self.l2_rate = args.l2_rate self.sess = sess self.max_grad_norm = args.max_grad_norm self.learning_rate = tf.Variable(float(args.learning_rate), trainable=False, name="learning_rate") self.lr_decay_op = self.learning_rate.assign( tf.multiply(self.learning_rate, args.lr_decay)) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) with tf.name_scope("data"): self.support = [ tf.sparse_placeholder(tf.float32) for _ in range(self.num_supports) ] self.features = tf.sparse_placeholder(tf.float32, shape=self.features_size) self.labels = tf.placeholder(tf.float32, [None, self.num_labels]) self.labels_mask = tf.placeholder(tf.int32) self.num_features_nonzero = tf.placeholder(tf.int32) self.is_train = tf.placeholder(tf.bool, name="istrain") self.dropout = tf.cond(self.is_train, lambda: args.dropout, lambda: 0.0) with tf.name_scope("gcn"): outputs, vars1 = self.graph_convolution(self.features, self.input_size, self.hidden_size, sparse_inputs=True) outputs, _ = self.graph_convolution(outputs, self.hidden_size, self.output_size, act=lambda x: x) self.outputs = outputs with tf.name_scope("loss"): self.loss = utils.masked_softmax_cross_entropy( self.outputs, self.labels, self.labels_mask) for var in vars1.values(): self.loss += self.l2_rate * tf.nn.l2_loss(var) tf.summary.scalar("loss", self.loss) with tf.name_scope("accuracy"): self.accuracy = utils.masked_accuracy(self.outputs, self.labels, self.labels_mask) with tf.name_scope("train"): self.train_op = self.optimizer.minimize(self.loss) init_op = tf.global_variables_initializer() self.sess.run(init_op) self.summary = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables()) self.tvars = tf.trainable_variables()
def __init__(self, field_sizes=None, embed_size=10, filter_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) init_vars.append(('f1', [embed_size, filter_sizes[0], 1, 2], 'xavier', dtype)) init_vars.append(('f2', [embed_size, filter_sizes[1], 2, 2], 'xavier', dtype)) init_vars.append(('w1', [2 * 3 * embed_size, 1], 'xavier', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) print('init_vars: ', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) l = xw l = tf.transpose(tf.reshape(l, [-1, num_inputs, embed_size, 1]), [0, 2, 1, 3]) # 变为 16 x 10 矩阵 f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d(tf.transpose(l, [0, 1, 3, 2]), int(num_inputs / 2)), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d(tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate(tf.reshape(l, [-1, embed_size * 3 * 2]), layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.matmul(l, w1) + b1 l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \ train_seq_len # THE MAIN CODE! graph = tf.Graph() with graph.as_default(): # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features]) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op. targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) # Defining the cell # Can be: # tf.nn.rnn_cell.RNNCell # tf.nn.rnn_cell.GRUCell cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True) # Stacking rnn cells stack = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) # The second output is the last state and we will no use that
def __init__(self, corpus=None, vocab_size=10, h_layers=[8, 4], act=tf.nn.relu, dropout=0.0, learning_rate=1e-3, pos_sample_size=512, embedding_size_w=128, embedding_size_d=2, n_neg_samples=64, window_size=8, window_batch_size=128, friendly_print=False): """Geo-Vec model as described in the report model section.""" self.corpus = corpus self.vocab_size = vocab_size self.h_layers = h_layers self.act = act self.dropout = dropout self.learning_rate = learning_rate self.pos_sample_size = pos_sample_size self.embedding_size_w = embedding_size_w self.embedding_size_d = embedding_size_d self.n_neg_samples = n_neg_samples self.window_size = window_size self.window_batch_size = window_batch_size # use for plotting self._loss_vals, self._acc_vals = [], [] # placeholders # s = [self.vocab_size, self.vocab_size] self.placeholders = { 'A_o': tf.sparse_placeholder(tf.float32), 'L_o': tf.sparse_placeholder(tf.float32), 'A_i': tf.sparse_placeholder(tf.float32), 'L_i': tf.sparse_placeholder(tf.float32), 'idx_i': tf.placeholder(tf.int64), 'idx_o': tf.placeholder(tf.int64), 'val_i': tf.placeholder(tf.float32), 'val_o': tf.placeholder(tf.float32), 'train_dataset': tf.placeholder(tf.int32), 'train_labels': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()) } # model self.aux_losses = None dummy = sp2tf(ss.eye(self.vocab_size)) self.init_model(x=dummy) self.samples = None self.current_sample = 0 # saver self.saver = tf.train.Saver() # optimizer self.init_optimizer() # sess self.trained = 0 # self.sess = tf.Session(graph=self.graph) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def build_train_net_with_multiple_LSTM(data_shape, class_count, net_builder, number_of_last_layers, manipulator=None, train_only_last_layers=False, train_last_n=None, train_first_n=None): keep_prob = tf.placeholder(tf.float32, name='keep_prob') learning_rate = tf.placeholder(tf.float32, name='learning_rate') variable_summaries(learning_rate) train_phase = tf.placeholder(tf.bool, name='train_phase') input_data = tf.placeholder(tf.uint8, shape=data_shape, name='input_data') targets_all = [] for last_layer_index in range(number_of_last_layers): targets_all.append( tf.sparse_placeholder(tf.int32, name='TARGETS_{}'.format(last_layer_index))) seq_len_all = [] for last_layer_index in range(number_of_last_layers): seq_len_all.append( tf.placeholder(tf.int32, [None], name='seq_len_{}'.format(last_layer_index))) if manipulator is None: net = tf.cast(input_data, tf.float32) else: net = manipulator().transform(input_data, train_phase=train_phase) transformed_data = net decoded_all, logits_all, logits_t_all, log_prob_all, mask_all = net_builder( class_count, net, train_phase, keep_prob, seq_len_all, number_of_last_layers) ler_all = [] for decoded, targets in zip(decoded_all, targets_all): ler_all.append(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) trn_loss_all = [] trn_loss_reduce_all = [] print(len(targets_all), len(logits_t_all), len(seq_len_all)) for i, (targets, logits_t, seq_len) in enumerate(zip(targets_all, logits_t_all, seq_len_all)): trn_loss_all.append( tf.nn.ctc_loss(targets, logits_t, seq_len, ctc_merge_repeated=True)) trn_loss_reduce_all.append( tf.reduce_mean(trn_loss_all[-1], name='trn_loss')) trn_loss = tf.add_n(trn_loss_reduce_all) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): layers_to_train = [] if train_only_last_layers or train_last_n is not None or train_first_n is not None: layer_names = [] for v in tf.trainable_variables(): layer_name = v.name.split('/')[0] if layer_name not in layer_names and "batch" not in layer_name: print(layer_name) layer_names.append(layer_name) if layer_name not in layer_names and "batch" in layer_name: print(layer_name) print("LAYER NAMES: {}".format(layer_names)) if train_last_n is not None: layers_to_train += layer_names[-train_last_n - number_of_last_layers * 2:-number_of_last_layers * 2] if train_first_n is not None: layers_to_train += layer_names[:train_first_n] if train_only_last_layers or train_last_n is not None or train_first_n is not None: layers_to_train += layer_names[-number_of_last_layers * 2:] variables_to_train = [] for layer in layers_to_train: variables_to_train += tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, layer) print("LAYERS TO TRAIN: {}".format(layers_to_train)) print("VARIABLES TO TRAIN: {}".format(variables_to_train)) grads_and_vars = tf.train.AdamOptimizer( learning_rate).compute_gradients(trn_loss, var_list=variables_to_train) optimizer = tf.train.AdamOptimizer(learning_rate).apply_gradients( grads_and_vars) else: grads_and_vars = tf.train.AdamOptimizer( learning_rate).compute_gradients(trn_loss) optimizer = tf.train.AdamOptimizer(learning_rate).apply_gradients( grads_and_vars) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=None) return (init, saver, keep_prob, learning_rate, train_phase, input_data, transformed_data, targets_all, seq_len_all, logits_all, logits_t_all, decoded_all, log_prob_all, trn_loss_all, trn_loss, ler_all, mask_all, optimizer, grads_and_vars)
val_G_u, val_G_v, val_A, val_B, val_u_features_side, val_v_features_side, val_u_indices, val_v_indices = \ split_tr_val_te(G_u, G_v, A, B, u_features_side, v_features_side, val_u_indices, val_v_indices) test_G_u, test_G_v, test_A, test_B, test_u_features_side, test_v_features_side, test_u_indices, test_v_indices = \ split_tr_val_te(G_u, G_v, A, B, u_features_side, v_features_side, test_u_indices, test_v_indices) u_features, v_features = sparse_to_tuple(u_features), sparse_to_tuple(v_features) A_full, B_full = sparse_to_tuple(A), sparse_to_tuple(B) G_u_full, G_v_full = sparse_to_tuple(G_u), sparse_to_tuple(G_v) # 创建模型输入 placeholders = { "u_features": tf.sparse_placeholder(tf.float32, shape=u_features[2]), "v_features": tf.sparse_placeholder(tf.float32, shape=v_features[2]), "u_features_nonzero": tf.placeholder(tf.int32, shape=()), "v_features_nonzero": tf.placeholder(tf.int32, shape=()), "u_features_side": tf.placeholder(tf.float32, shape=(None, u_features_side.shape[1])), "v_features_side": tf.placeholder(tf.float32, shape=(None, u_features_side.shape[1])), 'u_indices': tf.placeholder(tf.int32, shape=(None,)), 'v_indices': tf.placeholder(tf.int32, shape=(None,)), 'class_values': tf.placeholder(tf.float32, shape=class_values.shape), "labels": tf.placeholder(tf.int32, shape=(None,)), 'dropout': tf.placeholder_with_default(0., shape=()), 'dropout2': tf.placeholder_with_default(0., shape=()),
def build_train_net(data_shape, class_count, net_builder, loss="ctc", manipulator=None, logits_regularization=0): keep_prob = tf.placeholder(tf.float32, name='keep_prob') learning_rate = tf.placeholder(tf.float32, name='learning_rate') variable_summaries(learning_rate) train_phase = tf.placeholder(tf.bool, name='train_phase') input_data = tf.placeholder(tf.uint8, shape=data_shape, name='input_data') if loss == "sce": targets = tf.placeholder(tf.int32, name='TARGETS') str_targets = tf.sparse_placeholder(tf.int32, name='str_targets') elif loss == "ctc": targets = tf.sparse_placeholder(tf.int32, name='TARGETS') seq_len = tf.placeholder(tf.int32, [None], name='seq_len') if manipulator is None: net = tf.cast(input_data, tf.float32) else: net = manipulator().transform(input_data, train_phase=train_phase) transformed_data = net decoded, logits, logits_t, log_prob = net_builder(class_count, net, train_phase, keep_prob, seq_len) # Inaccuracy: label error rate if loss == "sce": char_preds = tf.argmax(logits, axis=2, output_type=tf.int32) ler = tf.reduce_mean(tf.cast(tf.not_equal(char_preds, targets), tf.float32), name='label_error_rate') ler_chars = tf.reduce_mean(tf.edit_distance( tf.cast(decoded[0], tf.int32), str_targets), name='str_label_error_rate') weights = tf.cast(tf.not_equal(targets, class_count - 1), tf.float32) trn_loss = tf.losses.sparse_softmax_cross_entropy(labels=targets, logits=logits, weights=weights) # trn_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits, name="sparse_softmax_cross_entropy") elif loss == "ctc": ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets), name='label_error_rate') trn_loss = tf.nn.ctc_loss(targets, logits_t, seq_len, ctc_merge_repeated=True) logits_loss = tf.reduce_mean(tf.reduce_sum(tf.pow(logits, 2), 1)) trn_loss = tf.reduce_sum([ tf.reduce_mean(trn_loss, name='trn_loss'), tf.multiply(logits_loss, logits_regularization) ]) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(trn_loss) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=0.33) if loss == "sce": return (init, saver, keep_prob, learning_rate, train_phase, input_data, transformed_data, targets, seq_len, logits, decoded, log_prob, trn_loss, ler, optimizer, str_targets, ler_chars) elif loss == "ctc": return (init, saver, keep_prob, learning_rate, train_phase, input_data, transformed_data, targets, seq_len, logits, decoded, log_prob, trn_loss, ler, optimizer)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) print('num_inputs:{0}\\t\tlayer_size:{1}'.format( num_inputs, layer_sizes)) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) # 为每个特征值初始化一个长度为10的向量 node_in = num_inputs * embed_size # 将每个特征embeding 为10维的向量, 总共16个特征,所以是160个输入 网络为[160, 500, 1] for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] print('init_vars:', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) # 将每个特征的隐含向量连起来,组成网络的输入,160维 l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('第{0}个隐藏层l.shape, wi.shape, bi.shape'.format(i), l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) # 从tensor中删除所有大小是1的维度 self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
num_supports = 1 model_func = GCN elif FLAGS.model == 'gcn_cheby': support = chebyshev_polynomials(adj, FLAGS.max_degree) num_supports = 1 + FLAGS.max_degree model_func = GCN elif FLAGS.model == 'dense': support = [preprocess_adj(adj)] # Not used num_supports = 1 model_func = MLP else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features0[2], dtype=tf.int64)), 'labels': tf.placeholder(tf.float32, shape=(None, y_train1.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout } # Create model model = model_func(placeholders, input_dim=features0[2][1], logging=True) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True
def train(self): images, labels_seq, _, seq_lens = data_tool.get_data_from_TFrecord(is_training=self.IS_TRAINING) ## tf.nn.ctc_loss()中Labels需要传入sparseTensor。 # 从TFrecord获取label sparse的indices,然后values已知都是1,shape已知=[chars_max_num,num_classes],可以直接得出sparse labels_sparse = tf.sparse_placeholder(dtype=tf.int32, name='labels_sparse') global_steps = tf.Variable(0, trainable=False, name="global_steps", dtype=tf.int32) ## inference cnn_ouput = inference.cnn_network(images) logistics = inference.bi_lstmm_network(cnn_ouput) # md_lstm_output = inference.mdlstm_network(cnn_ouput) # logistics = inference.fc_network(md_lstm_output) # ctc_loss cost = self.compute_cost(labels_sparse=labels_sparse, inputs=logistics, sequence_length=seq_lens) # 正则化 if self.IS_REGULARIZER: # L2正则化--2/2--创建变量时,tensorflow会将变量加入集合 tf.GraphKeys.REGULARIZATOIN_LOSSES, regularization_cost = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) cost = cost + regularization_cost if self.IS_TRAINING: ## 如果正在测试集上验证就不进行反向传播 # 指数衰减学习率 learning_rate = tf.train.exponential_decay(self.START_LEARNING_RATE, global_steps, self.DECAY_STEPS, self.DECAY_RATE, staircase=self.IS_STAIRCASE) # optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost, global_step=global_steps) # 通过beamSearch获取预测结果 pred_sparse = self.get_prediction(inputs=logistics, sequence_length=seq_lens) prediction = tf.sparse_tensor_to_dense(tf.cast(pred_sparse, tf.int32)) # 计算预测与真实之间的距离 distance = self.compute_distance(pred_sparse, labels_sparse) # Saver if self.IS_NEED_SAVE and (not os.path.exists(self.MODEL_PATH)): os.makedirs(self.MODEL_PATH) saver = tf.train.Saver() # Tensorboard merged = tf.summary.merge_all() # for test code labels = tf.sparse_tensor_to_dense(labels_sparse) with tf.Session(config=config) as sess: print("Train init......") if self.IS_TRAINING else print("Test init......") sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) ## 有队列迭代读取数据就要初始化本地变量 ckpt = tf.train.get_checkpoint_state(self.MODEL_PATH) # Saver if self.IS_NEED_SAVE and ckpt and ckpt.model_checkpoint_path: # 注意Saver保存时设置的路径,路径不对读取不到。 print("Saver-读取checkpoint,path=", ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) start = global_steps.eval() print("Saver-已成功读取Saver保存点,start from step:", start) # Tensorboard writer = tf.summary.FileWriter(self.LOG_PATH, sess.graph) # 队列化输入 coord = tf.train.Coordinator() # tf.train.start_queue_runners会把graph里的所有队列run起来,并返回管理队列的对应的子线程 threads = tf.train.start_queue_runners(coord=coord, sess=sess) try: loop_batch_times = 1 epoch = 1 print("Train start......total_epochs:", self.EPOCHS, " ,start epoch:", epoch) if self.IS_TRAINING else print("Test start......total_epochs:", self.EPOCHS, " ,start epoch:", epoch) while not coord.should_stop(): start = time.time() ## 从TFrecord中获取到label的sequence(长度定长为最大长度,不足padding。是数字序列,数字是字符在字典中的位置。) ## sequence转为sparse,然后通过feed_dict传入placeholder images_, labels_seq_ = sess.run([images, labels_seq]) label_sparse_feed = data_tool.labels_sequence_to_sparse(labels_seq_) feed_dict = {labels_sparse: label_sparse_feed} if self.IS_TRAINING: ## 在训练集 if self.IS_REGULARIZER: ## 开启了正则化就获取regularization_cost learning_rate_,cost_, prediction_, regularization_cost_, _, global_steps_, distance_, merged_ = sess.run( [learning_rate,cost, prediction, regularization_cost, optimizer, global_steps, distance, merged], feed_dict=feed_dict) print_regular_cost_str = " ,regular_cost=%d" % (regularization_cost_) else: ## 没开启正则化就不获取regularization_cost learning_rate_,cost_, prediction_, _, global_steps_, distance_, merged_ = sess.run( [learning_rate,cost, prediction, optimizer, global_steps, distance, merged], feed_dict=feed_dict) print_regular_cost_str = "" print("In Training...times:%04d" % (loop_batch_times), " ,global_step:%06d" % (global_steps_), " ,cost=", cost_, print_regular_cost_str, " ,distance=", distance_," ,learning_rate=",learning_rate_) else: ## 在测试集不进行反向optimizer cost_, prediction_, global_steps_, distance_, merged_ = sess.run( [cost, prediction, global_steps, distance, merged], feed_dict=feed_dict) print("In testing...times:%04d" % (loop_batch_times), " ,global_step:%06d" % (global_steps_), " ,cost=", cost_, " ,distance=", distance_) ## Console show more if (global_steps_ % 1 == 0): for i in range(0, labels_seq_.shape[0], 100): labels_seq_1 = labels_seq_[i] prediction_1 = prediction_[i] image_1 = images_[i] label_str = data_tool.label_sequence_to_string(labels_seq_1) pred_str = data_tool.label_sequence_to_string(prediction_1) if self.IS_TRAINING: print("In training...times:%04d" % (loop_batch_times), "Show more, ,label=", label_str, " ,prediction=", pred_str) ## for test data_tool.show_one_sample_image(image_1, label_str) else: print("In testing...times:%04d" % (loop_batch_times), "Show more, ,label=", label_str, " ,prediction=", pred_str) ## for test data_tool.show_one_sample_image(image_1, label_str) ## Saver & Tensorboard if (global_steps_ % 3 == 0): # Tensorborad writer.add_summary(merged_, global_step=global_steps_) # Saver保存点 if self.IS_NEED_SAVE: saver.save(sess, self.MODEL_PATH + "/model.ckpt", global_step=global_steps_) print("Model has be saved on global_step=", global_steps_, " ,trained_times=", loop_batch_times) loop_batch_times += 1 except tf.errors.OutOfRangeError: print("Train done......") if self.IS_TRAINING else print("Test done......") if self.IS_NEED_SAVE: saver.save(sess, self.MODEL_PATH + "/model.ckpt") print("Final model has be saved!") coord.request_stop() finally: coord.request_stop() # 等待所有线程退出 coord.join(threads)
def main(rank1): # adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset) adj, features, y_train, y_val, y_test, y_vocab, train_mask, val_mask, test_mask, vocab_mask, _, _ = load_corpus( FLAGS.dataset) train_index = np.where(train_mask)[0] # [10183] adj_train = adj[train_index, :][:, train_index] # [10183, 10183] train_mask = train_mask[train_index] # [61603] -> [10183] y_train = y_train[train_index] # [61603,20] -> [10183,20] ##modify vocab_index = np.where(vocab_mask)[0] # [42757] y_vocab = y_vocab[vocab_index] # [42757,20] tmp_index = list(train_index) + list(vocab_index) # [52940] train_index = tmp_index # modify # adj_train = adj[train_index, :][:, tmp_index] # [10183,52940] # adj_train_vocab = adj[tmp_index, :][:, tmp_index] # [52940,52940] adj_train = adj[tmp_index, :][:, tmp_index] # [52940,52940] #### # y_train = y_train + y_vocab # [10183]+[42757] print('y_vocab type', type(y_vocab)) # y_train = np.vstack(y_train, y_vocab) y_train = np.concatenate([y_train, y_vocab], axis=0) # 按行 ## val_index = np.where(val_mask)[0] # [1131] y_val = y_val[val_index] # [61603,20] -> [1131,20] test_index = np.where(test_mask)[0] # [7532] y_test = y_test[test_index] # [61603,20] -> [7532,20] # train_val_index = np.concatenate([train_index, val_index],axis=0) # 10183+1131 = 11000 # train_test_idnex = np.concatenate([train_index, test_index],axis=0) # 10183+7532 = 12000 ##modify train_val_index = np.concatenate([train_index, val_index], axis=0) # 52940+1131 train_test_idnex = np.concatenate([train_index, test_index], axis=0) # 52940+7532 ## numNode_train = adj_train.shape[0] # 10183 # 52940 # print("numNode", numNode) # if FLAGS.model == 'gcn_mix': if FLAGS.model == 'appnp': normADJ_train = nontuple_preprocess_adj(adj_train) # [52940,52940] # normADJ = nontuple_preprocess_adj(adj) normADJ_val = nontuple_preprocess_adj( adj[train_val_index, :][:, train_val_index]) #[53000,53000] normADJ_test = nontuple_preprocess_adj( adj[train_test_idnex, :][:, train_test_idnex]) #[54000,54000] num_supports = 2 model_func = APPNP else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) # Some preprocessing features = nontuple_preprocess_features( features).todense() #[61603, 61603] train_features = normADJ_train.dot( features[train_index]) # [52940,52940]*[52940,61603]->[52940, 61603] val_features = normADJ_val.dot( features[train_val_index] ) # [53000,53000]*[53000,61603]->[53000,61603] test_features = normADJ_test.dot( features[train_test_idnex] ) # [54000,54000]*[54000,61603]->[54000,61603] nonzero_feature_number = len(np.nonzero(features)[0]) nonzero_feature_number_train = len(np.nonzero(train_features)[0]) # Define placeholders placeholders = { 'support': tf.sparse_placeholder(tf.float32), 'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])), 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout } # Create model model = model_func(placeholders, input_dim=features.shape[-1], logging=True) # Initialize session sess = tf.Session() # Define model evaluation function def evaluate(features, support, labels, placeholders): t_test = time.time() feed_dict_val = construct_feeddict_forMixlayers( features, support, labels, placeholders) outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val) return outs_val[0], outs_val[1], (time.time() - t_test) # Init variables sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() cost_val = [] p0 = column_prop(normADJ_train) # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)] valSupport = sparse_to_tuple( normADJ_val[len(train_index):, :]) # [52940:,:] testSupport = sparse_to_tuple( normADJ_test[len(train_index):, :]) #[52940:,:] t = time.time() maxACC = 0.0 # Train model for epoch in range(FLAGS.epochs): t1 = time.time() n = 0 for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=256, shuffle=True): [normADJ_batch, y_train_batch] = batch p1 = column_prop(normADJ_batch) if rank1 is None: support1 = sparse_to_tuple(normADJ_batch) features_inputs = train_features else: q1 = np.random.choice(np.arange(numNode_train), rank1, replace=False, p=p1) # top layer support1 = sparse_to_tuple(normADJ_batch[:, q1].dot( sp.diags(1.0 / (p1[q1] * rank1)))) features_inputs = train_features[ q1, :] # selected nodes for approximation # Construct feed dictionary feed_dict = construct_feeddict_forMixlayers( features_inputs, support1, y_train_batch, placeholders) #[600,61603] [batch,600] # X1W1 [600,61603][61603,200]->[600,200] # A(X1W1)W2 [batch,600][600,200][200,20]->[batch,20] feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Training step outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict) n = n + 1 # Validation cost, acc, duration = evaluate(val_features, valSupport, y_val, placeholders) cost_val.append(cost) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost), "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t1)) # if epoch > 50 and acc>maxACC: # maxACC = acc # save_path = saver.save(sess, "tmp/tmp_MixModel.ckpt") # Print results # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]), # "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost), # "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format((time.time() - t1)/n)) if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean( cost_val[-(FLAGS.early_stopping + 1):-1]): # print("Early stopping...") break train_duration = time.time() - t # Testing # if os.path.exists("tmp/pubmed_MixModel.ckpt"): # saver.restore(sess, "tmp/pubmed_MixModel.ckpt") test_cost, test_acc, test_duration = evaluate(test_features, testSupport, y_test, placeholders) print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "training time per epoch=", "{:.5f}".format(train_duration / (epoch + 1)), "test time=", "{:.5f}".format(test_duration))
biases = process.preprocess_adj(adj) nnz = len(biases[1]) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: #bias_idx = tf.placeholder(tf.int64) #bias_val = tf.placeholder(tf.float32) #bias_shape = tf.placeholder(tf.int64) bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop,
flags.DEFINE_integer('gpu', -1, 'Which gpu to use') flags.DEFINE_integer('seeded', 1, 'Set numpy random seed') np.set_printoptions(suppress=True, precision=3) if FLAGS.seeded: np.random.seed(1) A_orig, A, X = load_siemens() features = X[0] num_features = 245 + 1 features_nonzero = 245 * 2 placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), } num_nodes = 245 model = GCNModelSiemens(placeholders, num_features, num_nodes, features_nonzero) with tf.name_scope('optimizer'): opt = OptimizerSiemens(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'],
def _build_graph(self): """ Build a computation graph that represents the model """ rnn_inputs = self._build_input() # rnn_inputs: a list of num_step tensors, # each tensor of size (batch_size, query_embed_size). self.rnn_inputs = [ tf.reshape(q, [-1, self.query_embed_size]) for q in tf.split(rnn_inputs, self.num_step, axis=1) ] self.rnn_inputs_new = [ a for i, a in enumerate(self.rnn_inputs) if i < self.num_step - 1 ] #print(len(self.rnn_inputs_new)) self.cells = [] init_states = [] self.cells_bw = [] init_states_bw = [] for i in range(self.rank): cell = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cells.append( tf.contrib.rnn.MultiRNNCell([cell] * self.num_layer, state_is_tuple=True)) init_states.append(self.cells[i].zero_state( tf.shape(self.tails)[0], tf.float32)) ##### making backward cells cell_bw = tf.contrib.rnn.LSTMCell(self.rnn_state_size) self.cells_bw.append( tf.contrib.rnn.MultiRNNCell([cell_bw] * self.num_layer, state_is_tuple=True)) init_states_bw.append(self.cells_bw[i].zero_state( tf.shape(self.tails)[0], tf.float32)) self.rnn_outputs_list = [] for i in range(self.rank): # rnn_outputs: a list of num_step tensors, # each tensor of size (batch_size, rnn_state_size). rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( self.cells[i], self.cells_bw[i], self.rnn_inputs_new, initial_state_fw=init_states[i], initial_state_bw=init_states_bw[i], scope='f_' + str(i)) self.rnn_outputs_list.append(rnn_outputs) # making a NN here with 128 hidden units and leaky relu self.W_0 = tf.Variable(np.random.randn(self.rnn_state_size * 2, self.num_operator + 1), dtype=tf.float32) self.b_0 = tf.Variable(np.zeros((1, self.num_operator + 1)), dtype=tf.float32) # attention_operators: a list of num_step lists, # each inner list has num_operator tensors, # each tensor of size (batch_size, 1). # Each tensor represents the attention over an operator. self.attention_operators_list = [] self.memories_list = [] for i in range(self.rank): self.attention_operators_list.append([ tf.split( tf.nn.softmax(tf.matmul(rnn_output, self.W_0) + self.b_0), self.num_operator + 1, axis=1) for rnn_output in self.rnn_outputs_list[i] ]) # memories: (will be) a tensor of size (batch_size, t+1, num_entity), # where t is the current step (zero indexed) # Then tensor represents currently populated memory cells. self.memories_list.append( tf.expand_dims( tf.one_hot(indices=self.tails, depth=self.num_entity), 1)) self.database = { r: tf.sparse_placeholder(dtype=tf.float32, name="database_%d" % r) for r in range(self.num_operator // 2) } # Get predictions self.predictions = 0.0 for i_rank in range(self.rank): for t in range(self.num_step): # memory_read: tensor of size (batch_size, num_entity) # memory_read = tf.squeeze(self.memories, squeeze_dims=[1]) memory_read = self.memories_list[i_rank][:, -1, :] if t < self.num_step - 1: # database_results: (will be) a list of num_operator tensors, # each of size (batch_size, num_entity). database_results = [] memory_read = tf.transpose(memory_read) for r in range(self.num_operator // 2): for op_matrix, op_attn in zip([ self.database[r], tf.sparse_transpose(self.database[r]) ], [ self.attention_operators_list[i_rank][t][r], self.attention_operators_list[i_rank][t][ r + self.num_operator // 2] ]): product = tf.sparse_tensor_dense_matmul( op_matrix, memory_read) database_results.append( tf.transpose(product) * op_attn) database_results.append( tf.transpose(memory_read) * self.attention_operators_list[i_rank][t][-1]) added_database_results = tf.add_n(database_results) if self.norm: added_database_results /= tf.maximum( self.thr, tf.reduce_sum(added_database_results, axis=1, keep_dims=True)) if self.dropout > 0.: added_database_results = tf.nn.dropout( added_database_results, keep_prob=1. - self.dropout) # Populate a new cell in memory by concatenating. self.memories_list[i_rank] = tf.concat([ self.memories_list[i_rank], tf.expand_dims(added_database_results, 1) ], axis=1) else: self.predictions += memory_read print(self.rank) self.final_loss = -tf.reduce_sum( self.targets * tf.log(tf.maximum(self.predictions, self.thr)), 1) if not self.accuracy: self.in_top = tf.nn.in_top_k(predictions=self.predictions, targets=self.heads, k=self.top_k) else: _, indices = tf.nn.top_k(self.predictions, self.top_k, sorted=False) self.in_top = tf.equal(tf.squeeze(indices), self.heads) self.optimizer = tf.train.AdamOptimizer(self.learning_rate) gvs = self.optimizer.compute_gradients(tf.reduce_mean(self.final_loss)) # capped_gvs = list(map( # lambda grad, var: self._clip_if_not_None(grad, var, -5., 5.), gvs) ) capped_gvs = list( map(lambda t: self._clip_if_not_None(t[0], t[1], -5., 5.), gvs)) self.optimizer_step = self.optimizer.apply_gradients(capped_gvs)
def test_process_input_transposed(self): with self.test_session(): sp_feeder = tf.sparse_placeholder(tf.float32) wals_model = factorization_ops.WALSModel(5, 7, 3, num_row_shards=2, num_col_shards=3, regularization=0.01, unobserved_weight=0.1, col_init=self.col_init, row_weights=self.row_wts, col_weights=self.col_wts) wals_model.initialize_op.run() wals_model.worker_init.run() # Split input into multiple SparseTensors with scattered rows. # Here the inputs are transposed. But the same constraints as described in # the previous non-transposed test case apply to these inputs (before they # are transposed). sp_r0_t = np_matrix_to_tf_sparse(INPUT_MATRIX, [0, 3], transpose=True).eval() sp_r1_t = np_matrix_to_tf_sparse(INPUT_MATRIX, [4, 1], shuffle=True, transpose=True).eval() sp_r2_t = np_matrix_to_tf_sparse(INPUT_MATRIX, [2], transpose=True).eval() sp_r3_t = sp_r1_t input_scattered_rows = [sp_r0_t, sp_r1_t, sp_r2_t, sp_r3_t] # Test updating row factors. # Here we feed in scattered rows of the input. # Note that the needed suffix of placeholder are in the order of test # case name lexicographical order and then in the line order of where # they appear. wals_model.initialize_row_update_op.run() process_input_op = wals_model.update_row_factors(sp_input=sp_feeder, transpose_input=True)[1] for inp in input_scattered_rows: feed_dict = {sp_feeder: inp} process_input_op.run(feed_dict=feed_dict) row_factors = [x.eval() for x in wals_model.row_factors] self.assertAllClose(row_factors[0], self._row_factors_0, atol=1e-3) self.assertAllClose(row_factors[1], self._row_factors_1, atol=1e-3) # Split input into multiple SparseTensors with scattered columns. # Here the inputs are transposed. But the same constraints as described in # the previous non-transposed test case apply to these inputs (before they # are transposed). sp_c0_t = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[0, 1], transpose=True).eval() sp_c1_t = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[4, 2], transpose=True).eval() sp_c2_t = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[5], transpose=True, shuffle=True).eval() sp_c3_t = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[3, 6], transpose=True).eval() sp_c4_t = sp_c2_t input_scattered_cols = [sp_c0_t, sp_c1_t, sp_c2_t, sp_c3_t, sp_c4_t] # Test updating column factors. # Here we feed in scattered columns of the input. wals_model.initialize_col_update_op.run() process_input_op = wals_model.update_col_factors(sp_input=sp_feeder, transpose_input=True)[1] for inp in input_scattered_cols: feed_dict = {sp_feeder: inp} process_input_op.run(feed_dict=feed_dict) col_factors = [x.eval() for x in wals_model.col_factors] self.assertAllClose(col_factors[0], self._col_factors_0, atol=1e-3) self.assertAllClose(col_factors[1], self._col_factors_1, atol=1e-3) self.assertAllClose(col_factors[2], self._col_factors_2, atol=1e-3)
def train_gcn(features, adj_train, args, graph_type): model_str = args.model # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero, args.hidden1, args.hidden2) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, args.hidden1, args.hidden2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=1, norm=1, lr=args.lr) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=1, norm=1, lr=args.lr) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model # use different epochs for ppi and similarity network if graph_type == "sequence_similarity": epochs = args.epochs_simi else: epochs = args.epochs_ppi for epoch in range(epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: args.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) if epoch % 10 == 0: print("Epoch:", '%04d' % (epoch+1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean,feed_dict=feed_dict) return emb
num_classes = data.voca_size+1 batch_size = 8 learning_rate = 0.01 num_epochs=201 num_examples = 16 graph= tf.Graph() with graph.as_default(): #mfcc is used as features inputs= tf.placeholder(tf.float32, [None, None, num_features], name='inpu') inputs_reshaped= tf.expand_dims(inputs, 3) #inputs_reshaped = tf.transpose(inputs_reshaped, [0, 2, 1,3]) #Sparpse_placeholder targets= tf.sparse_placeholder(tf.int32, name='targ') #seq_len= [batch_size] seq_len= tf.placeholder(tf.int32, [None], name='seqlen') #network definition #convolution block 1 conv1= tf.layers.conv2d(inputs_reshaped, 64, kernel_size=(3,3), activation=tf.nn.relu, padding='SAME') norm1= tf.layers.batch_normalization(inputs= conv1) pool1 = tf.layers.max_pooling2d(inputs=norm1, pool_size=(2,2), strides=(1,2), padding='SAME') # convolution block 2 conv2 = tf.layers.conv2d(pool1, 128, kernel_size=[3, 3], activation=tf.nn.relu, padding='SAME') norm2 = tf.layers.batch_normalization(inputs=conv2) pool2 = tf.layers.max_pooling2d(inputs=norm2, pool_size=[3, 3], strides=(1, 3), padding='SAME')
"""Attach a lot of summaries to a Tensor.""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.scalar_summary('mean/' + name, mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) tf.scalar_summary('sttdev/' + name, stddev) tf.scalar_summary('max/' + name, tf.reduce_max(var)) tf.scalar_summary('min/' + name, tf.reduce_min(var)) tf.histogram_summary(name, var) with tf.name_scope('input'): # Shape [BS, TRIGRAM_D]. query_batch = tf.sparse_placeholder(tf.float32, shape=query_in_shape, name='QueryBatch') # Shape [BS, TRIGRAM_D] doc_batch = tf.sparse_placeholder(tf.float32, shape=doc_in_shape, name='DocBatch') with tf.name_scope('L1'): l1_par_range = np.sqrt(6.0 / (TRIGRAM_D + L1_N)) weight1 = tf.Variable( tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range)) bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range)) variable_summaries(weight1, 'L1_weights') variable_summaries(bias1, 'L1_biases') # query_l1 = tf.matmul(tf.to_float(query_batch),weight1)+bias1
def main(rank1): # config = tf.ConfigProto(device_count={"CPU": 4}, # limit to num_cpu_core CPU usage # inter_op_parallelism_threads = 1, # intra_op_parallelism_threads = 4, # log_device_placement=False) adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ( "data/") adj = adj + adj.T y_train = transferLabel2Onehot(y_train, 41) y_val = transferLabel2Onehot(y_val, 41) y_test = transferLabel2Onehot(y_test, 41) features = sp.lil_matrix(features) adj_train = adj[train_index, :][:, train_index] numNode_train = adj_train.shape[0] # print("numNode", numNode) if FLAGS.model == 'gcn_mix': normADJ_train = nontuple_preprocess_adj(adj_train) normADJ = nontuple_preprocess_adj(adj) # normADJ_val = nontuple_preprocess_adj(adj_val) # normADJ_test = nontuple_preprocess_adj(adj_test) model_func = GCN_APPRO_Mix else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) # Some preprocessing features = nontuple_preprocess_features(features).todense() train_features = normADJ_train.dot(features[train_index]) features = normADJ.dot(features) nonzero_feature_number = len(np.nonzero(features)[0]) nonzero_feature_number_train = len(np.nonzero(train_features)[0]) # Define placeholders placeholders = { 'support': tf.sparse_placeholder(tf.float32), 'AXfeatures': tf.placeholder(tf.float32, shape=(None, features.shape[1])), 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout } # Create model model = model_func(placeholders, input_dim=features.shape[-1], logging=True) # Initialize session sess = tf.Session() saver = tf.train.Saver() # Define model evaluation function def evaluate(features, support, labels, placeholders): t_test = time.time() feed_dict_val = construct_feeddict_forMixlayers( features, support, labels, placeholders) outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val) return outs_val[0], outs_val[1], (time.time() - t_test) # Init variables sess.run(tf.global_variables_initializer()) cost_val = [] # testSupport = [sparse_to_tuple(normADJ), sparse_to_tuple(normADJ)] valSupport = sparse_to_tuple(normADJ[val_index, :]) testSupport = sparse_to_tuple(normADJ[test_index, :]) t = time.time() maxACC = 0.0 # Train model for epoch in range(FLAGS.epochs): t1 = time.time() n = 0 for batch in iterate_minibatches_listinputs([normADJ_train, y_train], batchsize=256, shuffle=True): [normADJ_batch, y_train_batch] = batch if rank1 is None: support1 = sparse_to_tuple(normADJ_batch) features_inputs = train_features else: distr = np.nonzero(np.sum(normADJ_batch, axis=0))[1] if rank1 > len(distr): q1 = distr else: q1 = np.random.choice(distr, rank1, replace=False) # top layer # q1 = np.random.choice(np.arange(numNode_train), rank1) # top layer support1 = sparse_to_tuple(normADJ_batch[:, q1] * numNode_train / len(q1)) features_inputs = train_features[ q1, :] # selected nodes for approximation # Construct feed dictionary feed_dict = construct_feeddict_forMixlayers( features_inputs, support1, y_train_batch, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Training step outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict) n = n + 1 # Validation cost, acc, duration = evaluate(features, valSupport, y_val, placeholders) cost_val.append(cost) if epoch > 50 and acc > maxACC: maxACC = acc save_path = saver.save(sess, "tmp/tmp_MixModel_uniform.ckpt") # Print results print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost), "val_acc=", "{:.5f}".format(acc), "time per batch=", "{:.5f}".format( (time.time() - t1) / n)) if epoch > FLAGS.early_stopping and np.mean(cost_val[-2:]) > np.mean( cost_val[-(FLAGS.early_stopping + 1):-1]): # print("Early stopping...") break train_duration = time.time() - t # Testing if os.path.exists("tmp/tmp_MixModel_uniform.ckpt"): saver.restore(sess, "tmp/tmp_MixModel_uniform.ckpt") test_cost, test_acc, test_duration = evaluate(features, testSupport, y_test, placeholders) print("rank1 = {}".format(rank1), "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "training time=", "{:.5f}".format(train_duration), "epoch = {}".format(epoch + 1), "test time=", "{:.5f}".format(test_duration))
features[0] = np.random.standard_normal(features[0].shape) if sparse: biases = [process.preprocess_adj_hete(a) for a in adj] # transposed here else: biases = [] for a in adj: a = a.todense() a = a[np.newaxis] with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb, ft)) for nb, ft in zip(nb_nodes, ft_size)] if sparse: bias_in = [tf.sparse_placeholder(dtype=tf.float32) for _ in biases] else: bias_in = None lbl_in = [tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes[target_node[i]], nb_classes[i])) for i in range(len(nb_classes))] msk_in = [tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes[target_node[i]])) for i in range(len(nb_classes))] attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, target_nodes=target_node, bias_mat=bias_in, adj_type=adj_type, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) with tf.name_scope('loss_acc'): loss, accuracy, acc_name, acc_full_name = [], [], [], []
def train_model(): dataset = 'own_layer_all' # Define model evaluation function # test_cost, test_acc, pred, labels, test_duration def evaluate(features, support, labels, mask, placeholders): t_test = time.time() feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders) outs_val = sess.run( [model.loss, model.accuracy, model.pred, model.labels], feed_dict=feed_dict_val) return outs_val[0], outs_val[1], outs_val[2], outs_val[3], ( time.time() - t_test) # Set random seed seed = random.randint(1, 200) np.random.seed(seed) tf.set_random_seed(seed) # Settings os.environ["CUDA_VISIBLE_DEVICES"] = "" flags = tf.app.flags FLAGS = flags.FLAGS lst = list(FLAGS._flags().keys()) for key in lst: FLAGS.__delattr__(key) # 'cora', 'citeseer', 'pubmed' flags.DEFINE_string('dataset', dataset, 'Dataset string.') # 'gcn', 'gcn_cheby', 'dense' flags.DEFINE_string('model', 'gcn_multi', 'Model string.') flags.DEFINE_float('learning_rate', 0.02, 'Initial learning rate.') flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 200, 'Number of units in hidden layer 1.') flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).') flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.') # 5e-4 flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).') flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.') # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, test_size = load_corpus( FLAGS.dataset) print(adj) # print(adj[0], adj[1]) f = open('data/' + dataset + '_y_test.txt', 'w') for i in range(len(y_test)): f.write(str(y_test[i]) + '\n') f.close() features = sp.identity(features.shape[0]) # featureless 单位阵 print(adj.shape) print(features.shape) # Some preprocessing features = preprocess_features(features) if FLAGS.model == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 model_func = GCN elif FLAGS.model == 'gcn_cheby': support = chebyshev_polynomials(adj, FLAGS.max_degree) num_supports = 1 + FLAGS.max_degree model_func = GCN elif FLAGS.model == 'dense': support = [preprocess_adj(adj)] # Not used num_supports = 1 model_func = MLP elif FLAGS.model == 'gcn_multi': support = [preprocess_adj(adj)] # Not used num_supports = 1 model_func = GCNM else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)), 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), # helper variable for sparse dropout 'num_features_nonzero': tf.placeholder(tf.int32) } # Create model print(features[2][1]) model = model_func(placeholders, input_dim=features[2][1], logging=True) # Initialize session session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) sess = tf.Session(config=session_conf) # Init variables sess.run(tf.global_variables_initializer()) cost_val = [] # Train model for epoch in range(FLAGS.epochs): aaa = model.pred bbb = placeholders['labels'] print(bbb) print(str(aaa)) t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Training step outs = sess.run([ model.opt_op, model.loss, model.accuracy, model.layers[0].embedding ], feed_dict=feed_dict) # Validation #print(y_val[1100]) cost, acc, pred, labels, duration = evaluate(features, support, y_val, val_mask, placeholders) cost_val.append(cost) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost), "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t)) if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean( cost_val[-(FLAGS.early_stopping + 1):-1]): print("Early stopping...") break print("Optimization Finished!") # Testing test_cost, test_acc, pred, labels, test_duration = evaluate( features, support, y_test, test_mask, placeholders) print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration)) test_pred = [] test_labels = [] print(len(test_mask)) for i in range(len(test_mask)): # print(test_mask[i]) if test_mask[i]: test_pred.append(pred[i]) test_labels.append(labels[i]) print(test_labels) test_pred_sort0 = np.arange(292 * 10).reshape(292, 10) test_labels_sort0 = np.arange(292 * 10).reshape(292, 10) test_labels_sort_mask0 = [0 for i in range(292)] #test_pred_sort = [0 for i in range(292)] #test_labels_sort = [0 for i in range(292)] test_idx_reorder = parse_index_file("data/{}.test.index".format(dataset)) print(test_idx_reorder) for i in range(len(test_idx_reorder)): idx = test_idx_reorder[i] - 1168 test_pred_sort0[idx] = test_pred[i][:] test_labels_sort0[idx] = test_labels[i][:] test_labels_sort_mask0[idx] = 1 test_pred_sort = [] test_labels_sort = [] for i in range(292): if test_labels_sort_mask0[i] == 1: test_pred_sort.append(test_pred_sort0[i]) test_labels_sort.append(test_labels_sort0[i]) print(test_labels_sort) strlabel = 'result/label/' + dataset + '.npy' strpred = 'result/pred/' + dataset + '.npy' np.save(strlabel, test_labels_sort) np.save(strpred, test_pred_sort) ''' print("Test Precision, Recall and F1-Score...") print(metrics.classification_report(test_labels, test_pred, digits=4)) print("Macro average Test Precision, Recall and F1-Score...") print(metrics.precision_recall_fscore_support(test_labels, test_pred, average='macro')) print("Micro average Test Precision, Recall and F1-Score...") print(metrics.precision_recall_fscore_support(test_labels, test_pred, average='micro')) ''' # doc and word embeddings print('embeddings:') word_embeddings = outs[3][train_size:adj.shape[0] - test_size] train_doc_embeddings = outs[3][:train_size] # include val docs test_doc_embeddings = outs[3][adj.shape[0] - test_size:] print(len(word_embeddings), len(train_doc_embeddings), len(test_doc_embeddings)) print(word_embeddings) f = open('data/corpus/' + dataset + '_vocab.txt', 'r') words = f.readlines() f.close() vocab_size = len(words) word_vectors = [] for i in range(vocab_size): word = words[i].strip() word_vector = word_embeddings[i] word_vector_str = ' '.join([str(x) for x in word_vector]) word_vectors.append(word + ' ' + word_vector_str) word_embeddings_str = '\n'.join(word_vectors) f = open('data/' + dataset + '_word_vectors.txt', 'w') f.write(word_embeddings_str) f.close() doc_vectors = [] doc_id = 0 for i in range(train_size): doc_vector = train_doc_embeddings[i] doc_vector_str = ' '.join([str(x) for x in doc_vector]) doc_vectors.append('doc_' + str(doc_id) + ' ' + doc_vector_str) doc_id += 1 for i in range(test_size): doc_vector = test_doc_embeddings[i] doc_vector_str = ' '.join([str(x) for x in doc_vector]) doc_vectors.append('doc_' + str(doc_id) + ' ' + doc_vector_str) doc_id += 1 doc_embeddings_str = '\n'.join(doc_vectors) f = open('data/' + dataset + '_doc_vectors.txt', 'w') f.write(doc_embeddings_str) f.close()
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size + embed_size * embed_size for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) z = tf.reduce_sum(tf.reshape(xw, [-1, num_inputs, embed_size]), 1) op = tf.reshape( tf.matmul(tf.reshape(z, [-1, embed_size, 1]), tf.reshape(z, [-1, 1, embed_size])), [-1, embed_size * embed_size]) if layer_norm: # x_mean, x_var = tf.nn.moments(xw, [1], keep_dims=True) # xw = (xw - x_mean) / tf.sqrt(x_var) # x_g = tf.Variable(tf.ones([num_inputs * embed_size]), name='x_g') # x_b = tf.Variable(tf.zeros([num_inputs * embed_size]), name='x_b') # x_g = tf.Print(x_g, [x_g[:10], x_b]) # xw = xw * x_g + x_b p_mean, p_var = tf.nn.moments(op, [1], keep_dims=True) op = (op - p_mean) / tf.sqrt(p_var) p_g = tf.Variable(tf.ones([embed_size**2]), name='p_g') p_b = tf.Variable(tf.zeros([embed_size**2]), name='p_b') # p_g = tf.Print(p_g, [p_g[:10], p_b]) op = op * p_g + p_b l = tf.concat([xw, op], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(tf.concat(w0, 0)) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
padding="same", activation=None, name='C_L_5') #Layer5.shape = [batch_size,time_steps,5] pred_class = tf.reshape(Layer5, [-1, 5]) out = tf.transpose(Layer5, (1, 0, 2)) #out.shape = [time_steps,batch_size,5] return out, pred_class #------------------------------- network ------------------------------ with tf.name_scope('inputs'): x = tf.placeholder(tf.float32, shape=[None, time_steps, 1], name="inputX") y = tf.sparse_placeholder(tf.int32, name="inputY") seq_length = tf.placeholder(tf.int32, [None]) pred, pred_class = Classifor(x, training=True, reuse=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('loss_'): loss = tf.reduce_mean( tf.nn.ctc_loss(labels=y, inputs=pred, sequence_length=seq_length)) with tf.name_scope('train'): with tf.control_dependencies(update_ops): training = tf.train.AdamOptimizer(learning_rate=Lr).minimize(loss) with tf.name_scope('distance'):
def __init__(self,n_time,dt,tau_m,tau_reset,n_input,n_hidden,n_output,sigma=1.,b0=1): ''' This tensorflow model is supposed to represent any feedforward network of integrate and fire neuron. The corresponding spiking model should have a membrane time constant of tau_m and a reset modeled with a self connection decreasing the voltage from threshold (0) to reset (-b) at each spike. The attributes spike_x,spike_y, and spike_z are the place holders that will be given the spike trains measured in NEST or on the chip. :param n_time: number of time steps :param dt: time step in arbitrary unit (ex. ms) :param tau_m: membrane time constant in a. unit :param tau_reset: reset time constant in a. unit :param n_input: number of input neurons :param n_hidden: number of hidden neurons :param n_output: number of output neurons :param sigma: variance of the random weights at initialization :param b0: size of the bias at initialization ''' # Define the place holders, the gate are value taken from the data self.spike_x = tf.sparse_placeholder(tf.float32, [None, 1, n_time, n_input]) self.spike_y = tf.sparse_placeholder(tf.float32, [None, 1, n_time, n_hidden]) self.spike_z = tf.sparse_placeholder(tf.float32, [None, 1, n_time, n_output]) self.out_ = tf.placeholder(tf.float32, [None, n_output]) # We will need to filter with some PSP tt = np.arange(n_time) * dt filter_m = np.exp(-tt/tau_m) filter_m = filter_m.reshape(1,n_time) filter_reset = np.exp(-tt/tau_reset) filter_reset = filter_reset.reshape(1,n_time) # Model parameters self.W_hid = tf.Variable(tf.random_normal([n_input, n_hidden],stddev=sigma * 1./n_input)) self.b_hid = tf.Variable(tf.ones([n_hidden])) self.W_out = tf.Variable(tf.random_normal([n_hidden, n_output],stddev=sigma * 1./n_hidden)) self.b_out = tf.Variable(tf.ones([n_output])) # Build the model of IF neuron with exponential PSP and Exponential Reset psp_x = tf.nn.conv2d(self.spike_x,filter_m) reset_y = tf.nn.conv2d(self.spike_y,filter_reset) ## TODO: Debug this, the product for reset should be done with somekind of outter product self.V_y = tf.matmul(psp_x,self.W_hid) - (1 + reset_y) * self.b_hid # This is the funny part, by defining this the derivative of the error wrt W_hid and W_out are valid # In particular this is equal to spike_y when V_y is 0 at spike time # But it is still differentiable wrt W_hid and has the right derivative self.differentiable_spike_y = self.spike_y * (self.V_y +1) self.psp_y = tf.nn.conv2d(self.differentiable_spike_y,filter_m) self.reset_z = tf.nn.conv2d(self.spike_z,filter_reset) ## TODO: Debug this, the product for reset should be done with someking of outter product self.V_z = tf.matmul(self.psp_y, self.W_out) - (1 + self.reset_z) * self.b_out self.differentiable_spike_z = self.spike_z * (self.V_z +1) self.z = tf.reduce_sum(self.differentiable_spike_z,reduction_indices=(1,2)) # loss function self.out = tf.nn.softmax(self.z) self.loss = tf.reduce_mean(-tf.reduce_sum(self.out_ * tf.log(self.out), reduction_indices=[1]))
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size + embed_size * embed_size for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) z = tf.reduce_sum(tf.reshape(xw, [-1, num_inputs, embed_size]), 1) op = tf.reshape( tf.matmul(tf.reshape(z, [-1, embed_size, 1]), tf.reshape(z, [-1, 1, embed_size])), [-1, embed_size * embed_size]) if layer_norm: # x_mean, x_var = tf.nn.moments(xw, [1], keep_dims=True) # xw = (xw - x_mean) / tf.sqrt(x_var) # x_g = tf.Variable(tf.ones([num_inputs * embed_size]), name='x_g') # x_b = tf.Variable(tf.zeros([num_inputs * embed_size]), name='x_b') # x_g = tf.Print(x_g, [x_g[:10], x_b]) # xw = xw * x_g + x_b p_mean, p_var = tf.nn.moments(op, [1], keep_dims=True) op = (op - p_mean) / tf.sqrt(p_var) p_g = tf.Variable(tf.ones([embed_size**2]), name='p_g') p_b = tf.Variable(tf.zeros([embed_size**2]), name='p_b') # p_g = tf.Print(p_g, [p_g[:10], p_b]) op = op * p_g + p_b l = tf.concat([xw, op], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(tf.concat(w0, 0)) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def test_process_input(self): with self.test_session(): sp_feeder = tf.sparse_placeholder(tf.float32) wals_model = factorization_ops.WALSModel(5, 7, 3, num_row_shards=2, num_col_shards=3, regularization=0.01, unobserved_weight=0.1, col_init=self.col_init, row_weights=self.row_wts, col_weights=self.col_wts) wals_model.initialize_op.run() wals_model.worker_init.run() # Split input into multiple sparse tensors with scattered rows. Note that # this split can be different than the factor sharding and the inputs can # consist of non-consecutive rows. Each row needs to include all non-zero # elements in that row. sp_r0 = np_matrix_to_tf_sparse(INPUT_MATRIX, [0, 2]).eval() sp_r1 = np_matrix_to_tf_sparse(INPUT_MATRIX, [1, 4], shuffle=True).eval() sp_r2 = np_matrix_to_tf_sparse(INPUT_MATRIX, [3], shuffle=True).eval() input_scattered_rows = [sp_r0, sp_r1, sp_r2] # Test updating row factors. # Here we feed in scattered rows of the input. wals_model.initialize_row_update_op.run() process_input_op = wals_model.update_row_factors(sp_input=sp_feeder, transpose_input=False)[1] for inp in input_scattered_rows: feed_dict = {sp_feeder: inp} process_input_op.run(feed_dict=feed_dict) row_factors = [x.eval() for x in wals_model.row_factors] self.assertAllClose(row_factors[0], self._row_factors_0, atol=1e-3) self.assertAllClose(row_factors[1], self._row_factors_1, atol=1e-3) # Split input into multiple sparse tensors with scattered columns. Note # that here the elements in the sparse tensors are not ordered and also # do not need to consist of consecutive columns. However, each column # needs to include all non-zero elements in that column. sp_c0 = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[2, 0]).eval() sp_c1 = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[5, 3, 1], shuffle=True).eval() sp_c2 = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[4, 6]).eval() sp_c3 = np_matrix_to_tf_sparse(INPUT_MATRIX, col_slices=[3, 6], shuffle=True).eval() input_scattered_cols = [sp_c0, sp_c1, sp_c2, sp_c3] # Test updating column factors. # Here we feed in scattered columns of the input. wals_model.initialize_col_update_op.run() process_input_op = wals_model.update_col_factors(sp_input=sp_feeder, transpose_input=False)[1] for inp in input_scattered_cols: feed_dict = {sp_feeder: inp} process_input_op.run(feed_dict=feed_dict) col_factors = [x.eval() for x in wals_model.col_factors] self.assertAllClose(col_factors[0], self._col_factors_0, atol=1e-3) self.assertAllClose(col_factors[1], self._col_factors_1, atol=1e-3) self.assertAllClose(col_factors[2], self._col_factors_2, atol=1e-3)
def construct_graph(self): with tf.Graph().as_default(): self.run_ops = [] self.X = tf.placeholder(tf.float32, [None, None, self.input_dim], name='feature') self.Y = tf.sparse_placeholder(tf.int32, name="labels") self.seq_len = tf.placeholder(tf.int32, [None], name='seq_len') self.learning_rate_var = tf.Variable(float( self.nnet_conf.learning_rate), trainable=False, name='learning_rate') if self.use_sgd: optimizer = tf.train.GradientDescentOptimizer( self.learning_rate_var) else: optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate_var, beta1=0.9, beta2=0.999, epsilon=1e-08) for i in range(self.num_threads): with tf.device("/gpu:%d" % i): initializer = tf.random_uniform_initializer( -self.nnet_conf.init_scale, self.nnet_conf.init_scale) model = LSTM_Model(self.nnet_conf) mean_loss, ctc_loss, label_error_rate, decoded, softval = model.loss( self.X, self.Y, self.seq_len) if self.use_sgd and self.use_normal: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(mean_loss, tvars), self.nnet_conf.grad_clip) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework. get_or_create_global_step()) else: train_op = optimizer.minimize(mean_loss) run_op = { 'train_op': train_op, 'mean_loss': mean_loss, 'ctc_loss': ctc_loss, 'label_error_rate': label_error_rate } # 'decoded':decoded, # 'softval':softval} self.run_ops.append(run_op) tf.get_variable_scope().reuse_variables() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) self.sess = tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads, allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tmp_variables = tf.trainable_variables() self.saver = tf.train.Saver(tmp_variables, max_to_keep=100) #self.saver = tf.train.Saver(max_to_keep=100, sharded = True) if self.restore_training: self.sess.run(init) ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: logging.info("restore training") self.saver.restore(self.sess, ckpt.model_checkpoint_path) self.num_batch_total = self.get_num( ckpt.model_checkpoint_path) if self.print_trainable_variables == True: print_trainable_variables( self.sess, ckpt.model_checkpoint_path + '.txt') sys.exit(0) logging.info('model:' + ckpt.model_checkpoint_path) logging.info('restore learn_rate:' + str(self.sess.run(self.learning_rate_var))) else: logging.info('No checkpoint file found') self.sess.run(init) logging.info('init learn_rate:' + str(self.sess.run(self.learning_rate_var))) else: self.sess.run(init) self.total_variables = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) logging.info('total parameters : %d' % self.total_variables)
def __init__(self, w_in, w_out, sense_dim, embedding_dim, batch_size, context_window, learning_rate, bi_w_in): max_context_length = 2 * context_window + 1 eval_mode = tf.placeholder(tf.bool, shape=[]) self.eval_mode = eval_mode self.bi_info = tf.sparse_placeholder(tf.int32) bi_info = tf.sparse_to_dense(self.bi_info.indices, self.bi_info.dense_shape, self.bi_info.values) self.lengths = tf.placeholder(tf.int32, [context_window * 2 + batch_size]) # add self here so we can feed it outside this class context_indices = tf.placeholder( tf.int32, [context_window * 2 + batch_size, max_context_length]) sense_indices = tf.placeholder( tf.int32, [(context_window * 2 + batch_size) * sense_dim]) self.context_indices = context_indices self.sense_indices = sense_indices major_weight = tf.placeholder(tf.float32) reg_weight = tf.placeholder(tf.float32) self.major_weight = major_weight self.reg_weight = reg_weight embedded_context = self.dense_lookup(w_in, context_indices) bi_embedded_context = self.sparse_lookup(bi_w_in, bi_info, self.lengths) # Combine bilingual contextual information embedded_context = tf.cond( eval_mode, lambda: tf.identity(embedded_context), lambda: tf.add(major_weight * embedded_context, (1 - major_weight) * bi_embedded_context)) # [(context_window*2+batch_size), sense_dim, embedding_dim] embedded_word_output = tf.nn.embedding_lookup( w_out, context_indices[:, context_window]) # shape = [(context_window*2+batch_size), sense_dim, 1] sense_score = tf.matmul(embedded_word_output, embedded_context) # [(context_window*2+batch_size), sense_dim] sense_score = tf.squeeze(sense_score) # [context_window*2+batch_size] sense_greedy = tf.argmax(sense_score, 1) self.sense_greedy = sense_greedy target_sense_sampled_indices = tf.placeholder(tf.int32, [batch_size]) self.target_sense_sampled_indices = target_sense_sampled_indices # [batch_size] reward_prob = tf.placeholder(tf.float32, [batch_size], name='reward_logit') self.reward_prob = reward_prob # [(context_window*2+batch_size), sense_dim] sense_prob = tf.nn.softmax(sense_score) self.sense_prob = sense_prob entropy = -tf.multiply(tf.log(sense_prob + 1e-8), sense_prob) entropy = tf.reduce_sum(entropy) * reg_weight # [(context_window*2+batch_size)* sense_dim] sense_score = tf.reshape( sense_score, [(context_window * 2 + batch_size) * sense_dim]) # [batch_size] sense_selected_logit_input = tf.gather(sense_score, target_sense_sampled_indices) # [batch_size, sense_dim] cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=sense_selected_logit_input, labels=reward_prob)) cost += entropy self.print_cost = cost self.print_ent = entropy optimizer = tf.train.GradientDescentOptimizer(learning_rate) self.update = optimizer.minimize(cost)
def test_compatibility(args): args = namedtuple("Args", args.keys())(*args.values()) load_from = args.load_from config_file = load_from + '/results.json' log_file = load_from + '/log.json' test_filename = args.filename + '.json' with open(config_file) as f: config = json.load(f) with open(log_file) as f: log = json.load(f) # Dataloader DATASET = config['dataset'] if DATASET == 'polyvore' or DATASET == 'custom': # load dataset project_dir = os.path.dirname(os.path.abspath(__file__)) if DATASET == 'polyvore': dl = DataLoaderPolyvore(os.path.join(project_dir, 'data', DATASET, 'dataset')) else : dl = DataLoaderCustom(os.path.join(project_dir, 'data', DATASET, 'dataset')) orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase('train') full_train_adj = dl.train_adj orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase('valid') orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase('test') # orig_all_features, adj_all, all_labels, all_r_indices, all_c_indices = dl.get_phase('all') full_test_adj = dl.test_adj dl.setup_test_compatibility(filename=test_filename, resampled=args.resampled) else: raise NotImplementedError('A data loader for dataset {} does not exist'.format(DATASET)) NUMCLASSES = 2 BN_AS_TRAIN = False ADJ_SELF_CONNECTIONS = True def norm_adj(adj_to_norm): return normalize_nonsym_adj(adj_to_norm) train_features, mean, std = dl.normalize_features(orig_train_features, get_moments=True) val_features = dl.normalize_features(orig_val_features, mean=mean, std=std) test_features = dl.normalize_features(orig_test_features, mean=mean, std=std) # all_features = dl.normalize_features(orig_all_features, mean=mean, std=std) train_support = get_degree_supports(adj_train, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) val_support = get_degree_supports(adj_val, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) test_support = get_degree_supports(adj_test, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) for i in range(1, len(train_support)): train_support[i] = norm_adj(train_support[i]) val_support[i] = norm_adj(val_support[i]) test_support[i] = norm_adj(test_support[i]) num_support = len(train_support) placeholders = { 'row_indices': tf.placeholder(tf.int32, shape=(None,)), 'col_indices': tf.placeholder(tf.int32, shape=(None,)), 'dropout': tf.placeholder_with_default(0., shape=()), 'weight_decay': tf.placeholder_with_default(0., shape=()), 'is_train': tf.placeholder_with_default(True, shape=()), 'support': [tf.sparse_placeholder(tf.float32, shape=(None, None)) for sup in range(num_support)], 'node_features': tf.placeholder(tf.float32, shape=(None, None)), 'labels': tf.placeholder(tf.float32, shape=(None,)) } model = CompatibilityGAE(placeholders, input_dim=train_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, hidden=config['hidden'], learning_rate=config['learning_rate'], logging=True, batch_norm=config['batch_norm']) # Construct feed dicts for train, val and test phases train_feed_dict = construct_feed_dict(placeholders, train_features, train_support, train_labels, train_r_indices, train_c_indices, config['dropout']) val_feed_dict = construct_feed_dict(placeholders, val_features, val_support, val_labels, val_r_indices, val_c_indices, 0., is_train=BN_AS_TRAIN) test_feed_dict = construct_feed_dict(placeholders, test_features, test_support, test_labels, test_r_indices, test_c_indices, 0., is_train=BN_AS_TRAIN) # Add ops to save and restore all the variables. saver = tf.train.Saver() def eval(): # use this as a control value, if the model is ok, the value will be the same as in log val_avg_loss, val_acc, conf, pred = sess.run([model.loss, model.accuracy, model.confmat, model.predict()], feed_dict=val_feed_dict) print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=", "{:.5f}".format(val_acc)) with tf.Session() as sess: saver.restore(sess, os.path.join(load_from, 'best_epoch.ckpt')) count = 0 preds = [] labels = [] # evaluate the the model for accuracy prediction eval() prob_act = tf.nn.sigmoid K = args.k for outfit in dl.comp_outfits: before_item = time.time() items, score = outfit num_new = test_features.shape[0] new_adj = sp.csr_matrix((num_new, num_new)) # no connections if args.k > 0: # add edges to the adj matrix available_adj = dl.test_adj.copy() available_adj = available_adj.tolil() i = 0 for idx_from in items[:-1]: for idx_to in items[i+1:]: # remove outfit edges, they won't be expanded available_adj[idx_to, idx_from] = 0 available_adj[idx_from, idx_to] = 0 i += 1 available_adj = available_adj.tocsr() available_adj.eliminate_zeros() if args.subset: # use only a subset (of size 3) of the outfit items = np.random.choice(items, 3) new_features = test_features # predict edges between the items query_r = [] query_c = [] i = 0 item_indexes = items for idx_from in item_indexes[:-1]: for idx_to in item_indexes[i+1:]: query_r.append(idx_from) query_c.append(idx_to) i += 1 if args.k > 0: G = Graph(available_adj) nodes_to_expand = np.unique(items) for node in nodes_to_expand: edges = G.run_K_BFS(node, K) for edge in edges: u, v = edge new_adj[u, v] = 1 new_adj[v, u] = 1 query_r = np.array(query_r) query_c = np.array(query_c) new_adj = new_adj.tocsr() new_support = get_degree_supports(new_adj, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS, verbose=False) for i in range(1, len(new_support)): new_support[i] = norm_adj(new_support[i]) new_support = [sparse_to_tuple(sup) for sup in new_support] new_feed_dict = construct_feed_dict(placeholders, new_features, new_support, [], query_r, query_c, 0., is_train=BN_AS_TRAIN) pred = sess.run(prob_act(model.outputs), feed_dict=new_feed_dict) # if pred != pred: # print(new_features, new_support, query_r, query_c) # break predicted_score = pred.mean() print("[{}] Mean scores between outfit: {:.4f}, label: {}".format(count, predicted_score, score)) # TODO: remove this print print("Total Elapsed: {:.4f}".format(time.time() - before_item)) count += 1 preds.append(predicted_score) labels.append(score) preds = np.array(preds) labels = np.array(labels) AUC = compute_auc(preds, labels) # use this as a control value, if the model is ok, the value will be the same as in log eval() print('The AUC compat score is: {}'.format(AUC)) print('Best val score saved in log: {}'.format(config['best_val_score'])) print('Last val score saved in log: {}'.format(log['val']['acc'][-1])) print("mean positive prediction: {}".format(preds[labels.astype(bool)].mean())) print("mean negative prediction: {}".format(preds[np.logical_not(labels.astype(bool))].mean()))
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs # node_in = num_inputs * (embed_size + num_inputs) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) xw3d = tf.reshape(xw, [-1, num_inputs, embed_size]) row = [] col = [] for i in range(num_inputs-1): for j in range(i+1, num_inputs): row.append(i) col.append(j) # batch * pair * k p = tf.transpose( # pair * batch * k tf.gather( # num * batch * k tf.transpose( xw3d, [1, 0, 2]), row), [1, 0, 2]) # batch * pair * k q = tf.transpose( tf.gather( tf.transpose( xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, embed_size]) q = tf.reshape(q, [-1, num_pairs, embed_size]) ip = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) # simple but redundant # batch * n * 1 * k, batch * 1 * n * k # ip = tf.reshape( # tf.reduce_sum( # tf.expand_dims(xw3d, 2) * # tf.expand_dims(xw3d, 1), # 3), # [-1, num_inputs**2]) l = tf.concat([xw, ip], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
street_suffs = [random.choice(street_types) for i in range(n)] zips = [random.choice(rand_zips) for i in range(n)] full_streets = [str(x) + ' ' + y + ' ' + z for x,y,z in zip(numbers, streets, street_suffs)] reference_data = [list(x) for x in zip(full_streets,zips)] # Generate test dataset with some typos typo_streets = [create_typo(x) for x in streets] typo_full_streets = [str(x) + ' ' + y + ' ' + z for x,y,z in zip(numbers, typo_streets, street_suffs)] test_data = [list(x) for x in zip(typo_full_streets,zips)] # Now we can perform address matching # Create graph sess = tf.Session() # Placeholders test_address = tf.sparse_placeholder( dtype=tf.string) test_zip = tf.placeholder(shape=[None, 1], dtype=tf.float32) ref_address = tf.sparse_placeholder(dtype=tf.string) ref_zip = tf.placeholder(shape=[None, n], dtype=tf.float32) # Declare Zip code distance for a test zip and reference set zip_dist = tf.square(tf.subtract(ref_zip, test_zip)) # Declare Edit distance for address address_dist = tf.edit_distance(test_address, ref_address, normalize=True) # Create similarity scores zip_max = tf.gather(tf.squeeze(zip_dist), tf.argmax(zip_dist, 1)) zip_min = tf.gather(tf.squeeze(zip_dist), tf.argmin(zip_dist, 1)) zip_sim = tf.div(tf.subtract(zip_max, zip_dist), tf.subtract(zip_max, zip_min)) address_sim = tf.subtract(1., address_dist)
def __init__(self, vocab_size, positional_embeddings=False, beam_width=1, alignment_history=False): """ Initialize global variables and compute graph """ # vocabulary parameters # input image self.beam_width = beam_width self.attention_mode = 0 self.vocab_size = vocab_size self.learning_rate = tf.placeholder(tf.float32) self.input_image = tf.placeholder(tf.float32, shape=(None, 46, None, 1), name='img_data') self.batch_size = tf.shape(self.input_image)[0] # attention part placeholder self.att_label = tf.placeholder(tf.int32, shape=[None, None], name='att_label') self.att_train_length = tf.placeholder(tf.int32, shape=[None], name='att_train_length') # self.eight = tf.constant(8, dtype=tf.int32) # ctc part placeholder self.ctc_label = tf.sparse_placeholder(tf.int32, name='ctc_label') self.ctc_feature_length = tf.placeholder(tf.int32, shape=[None], name='ctc_feature_length') self.max_dec_iteration = tf.placeholder(tf.int32, shape=[1]) self.enc_lstm_dim = 256 self.dec_lstm_dim = 512 self.embedding_size = 512 self.ctc_loss_weights = 0.2 self.att_loss_weights = 1 - self.ctc_loss_weights self.wd = 0.00002 self.momentum = 0.9 self.embedding = tf.get_variable( "embedding", [self.vocab_size, self.embedding_size]) self.cnn_out, self.sequence_len = convnet_layers( self.input_image, self.ctc_feature_length, mode) self.enc_outputs = rnn_layers(self.cnn_out, self.sequence_len, self.enc_lstm_dim) attention_weights_depth = 2 * self.enc_lstm_dim attention_layer_size = 2 * self.enc_lstm_dim attention_states = tf.reshape( self.enc_outputs, [self.batch_size, -1, 2 * self.enc_lstm_dim]) attention_states_tiled = tile_batch( attention_states, self.beam_width) # For generalization attention_mechanism = BahdanauAttention(attention_weights_depth, attention_states_tiled) dec_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.dec_lstm_dim) self.cell = AttentionWrapper(cell=dec_lstm_cell, attention_mechanism=attention_mechanism, attention_layer_size=attention_layer_size, alignment_history=alignment_history) self.setup_decoder() self.final_outputs, self.final_state, _ = dynamic_decode( self.decoder, maximum_iterations=self.max_dec_iteration[0] - 1) self.ctc_loss_branch() self.finalize_model()
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs # node_in = num_inputs * (embed_size + num_inputs) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) xw3d = tf.reshape(xw, [-1, num_inputs, embed_size]) row = [] col = [] for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) # batch * pair * k p = tf.transpose( # pair * batch * k tf.gather( # num * batch * k tf.transpose(xw3d, [1, 0, 2]), row), [1, 0, 2]) # batch * pair * k q = tf.transpose(tf.gather(tf.transpose(xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, embed_size]) q = tf.reshape(q, [-1, num_pairs, embed_size]) ip = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) # simple but redundant # batch * n * 1 * k, batch * 1 * n * k # ip = tf.reshape( # tf.reduce_sum( # tf.expand_dims(xw3d, 2) * # tf.expand_dims(xw3d, 1), # 3), # [-1, num_inputs**2]) l = tf.concat([xw, ip], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
import tensorflow as tf from tensorflow.python.framework import graph_util from constants import c model_folder ='Path/to/your/model/folder' num_features = c.LSTM.FEATURES num_hidden = c.LSTM.HIDDEN batch_size=1 num_layers=1 num_classes=28 # Construct the graph. For detail comments, please see lstm_ctc.py inputs = tf.placeholder(tf.float32, [batch_size, None, num_features], name='InputData') targets = tf.sparse_placeholder(tf.int32, name='LabelData') seq_len = tf.placeholder(tf.int32, [None], name='SeqLen') cell = tf.contrib.rnn.LSTMCell(num_hidden) stack = tf.contrib.rnn.MultiRNNCell([cell] * num_layers,state_is_tuple=True) outputs, _ = tf.nn.dynamic_rnn(stack, inputs, seq_len, dtype=tf.float32, time_major =False) shape = tf.shape(inputs) batch_s, max_time_steps = shape[0], shape[1] outputs = tf.reshape(outputs, [-1, num_hidden]) W = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1)) b = tf.Variable(tf.constant(0., shape=[num_classes])) logits = tf.matmul(outputs, W) + b logits = tf.reshape(logits, [batch_s, -1, num_classes]) logits = tf.transpose(logits, (1, 0, 2)) decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len) y = tf.sparse_to_dense(decoded[0].indices, decoded[0].dense_shape, decoded[0].values)
def fit_model(adj, val_edges, val_edges_false, test_edges, test_edges_false, model_name): # Lists to collect average results mean_roc = [] mean_ap = [] mean_time = [] # Load graph dataset #if FLAGS.verbose: # print("Loading data...") #adj_init, features = load_data(FLAGS.dataset) print(f'Loading data... n: {adj.shape[0]}, m: {adj.nnz//2}') # The entire training process is repeated FLAGS.nb_run times for i in range(FLAGS.nb_run): # Start computation of running times t_start = time.time() # Preprocessing and initialization if FLAGS.verbose: print("Preprocessing and Initializing...") # Compute number of nodes num_nodes = adj.shape[0] # If features are not used, replace feature matrix by identity matrix if not FLAGS.features: features = sp.identity(adj.shape[0]) # Preprocessing on node features features = sparse_to_tuple(features) num_features = features[2][1] features_nonzero = features[1].shape[0] # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model model = None model_name = model_name.lower() if model_name == 'gcn_ae': # Standard Graph Autoencoder model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_name == 'gcn_vae': # Standard Graph Variational Autoencoder model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) elif model_name == 'source_target_gcn_ae': # Source-Target Graph Autoencoder if FLAGS.dimension % 2 != 0: raise ValueError( 'Dimension must be even for Source-Target models') model = SourceTargetGCNModelAE(placeholders, num_features, features_nonzero) elif model_name == 'source_target_gcn_vae': # Source-Target Graph Variational Autoencoder if FLAGS.dimension % 2 != 0: raise ValueError( 'Dimension must be even for Source-Target models') model = SourceTargetGCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) elif model_name == 'gravity_gcn_ae': # Gravity-Inspired Graph Autoencoder model = GravityGCNModelAE(placeholders, num_features, features_nonzero) elif model_name == 'gravity_gcn_vae': # Gravity-Inspired Graph Variational Autoencoder model = GravityGCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) else: raise ValueError('Undefined model!') # Optimizer (see tkipf/gae original GAE repository for details) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) with tf.name_scope('optimizer'): # Optimizer for Non-Variational Autoencoders if model_name in ('gcn_ae', 'source_target_gcn_ae', 'gravity_gcn_ae'): opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) # Optimizer for Variational Autoencoders elif model_name in ('gcn_vae', 'source_target_gcn_vae', 'gravity_gcn_vae'): opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) # Normalization and preprocessing on adjacency matrix adj_norm = preprocess_graph(adj) adj_label = sparse_to_tuple(adj + sp.eye(adj.shape[0])) # Initialize TF session sess = tf.Session() sess.run(tf.global_variables_initializer()) # Model training print(f"Training {model_name}...") t = time.time() print_every = 50 ## Flag to compute total running time #t_start = time.time() for epoch in range(FLAGS.epochs + 1): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] if epoch > 0 and epoch % print_every == 0 and FLAGS.verbose: # Display epoch information print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "time=", "{:.5f}".format(time.time() - t)) # Validation (implemented for Task 1 only) if FLAGS.validation and FLAGS.task == 'link_prediction': feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) val_roc, val_ap = compute_scores(val_edges, val_edges_false, emb) print("val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap)) # Flag to compute Graph AE/VAE training time t_model = time.time() # Get embedding from model emb = sess.run(model.z_mean, feed_dict=feed_dict) # Test model print("Testing model...") if FLAGS.task == 'link_prediction': # Compute ROC and AP scores on test sets roc_score, ap_score = compute_scores(test_edges, test_edges_false, emb) # Append to list of scores over all runs mean_roc.append(roc_score) mean_ap.append(ap_score) sess.close() # close the TensorFlow session and free up resources prob_mat = get_prob_mat_from_emb(emb) return prob_mat
def gae_scores( adj_sparse, train_test_split, features_matrix=None, LEARNING_RATE = 0.01, EPOCHS = 200, HIDDEN1_DIM = 32, HIDDEN2_DIM = 16, DROPOUT = 0, edge_score_mode="dot-product", verbose=1, dtype=tf.float32 ): adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = train_test_split # Unpack train-test split if verbose >= 1: print 'GAE preprocessing...' start_time = time.time() # Train on CPU (hide GPU) due to memory constraints os.environ['CUDA_VISIBLE_DEVICES'] = "" # Convert features from normal matrix --> sparse matrix --> tuple # features_tuple contains: (list of matrix coordinates, list of values, matrix dimensions) if features_matrix is None: x = sp.lil_matrix(np.identity(adj_sparse.shape[0])) else: x = sp.lil_matrix(features_matrix) features_tuple = sparse_to_tuple(x) features_shape = features_tuple[2] # Get graph attributes (to feed into model) num_nodes = adj_sparse.shape[0] # number of nodes in adjacency matrix num_features = features_shape[1] # number of features (columsn of features matrix) features_nonzero = features_tuple[1].shape[0] # number of non-zero entries in features matrix (or length of values list) # Store original adjacency matrix (without diagonal entries) for later adj_orig = deepcopy(adj_sparse) adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Normalize adjacency matrix adj_norm = preprocess_graph(adj_train) # Add in diagonals adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Define placeholders placeholders = { # TODO: try making these dense from the get-go 'features': tf.sparse_placeholder(tf.float16), 'adj': tf.sparse_placeholder(tf.float16), 'adj_orig': tf.sparse_placeholder(tf.float16), 'dropout': tf.placeholder_with_default(0., shape=()) } # How much to weigh positive examples (true edges) in cost print_function # Want to weigh less-frequent classes higher, so as to prevent model output bias # pos_weight = (num. negative samples / (num. positive samples) pos_weight = float(adj_sparse.shape[0] * adj_sparse.shape[0] - adj_sparse.sum()) / adj_sparse.sum() # normalize (scale) average weighted cost norm = adj_sparse.shape[0] * adj_sparse.shape[0] / float((adj_sparse.shape[0] * adj_sparse.shape[0] - adj_sparse.sum()) * 2) if verbose >= 1: print 'Initializing GAE model...' # Create VAE model model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, HIDDEN1_DIM, HIDDEN2_DIM, dtype=dtype, flatten_output=False) opt = OptimizerVAE(preds=model.reconstructions, labels=tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), # labels=placeholders['adj_orig'], model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, learning_rate=LEARNING_RATE, dtype=tf.float16) cost_val = [] acc_val = [] val_roc_score = [] prev_embs = [] # Initialize session sess = tf.Session() if verbose >= 1: # Print total # trainable variables total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print "Variable shape: ", shape variable_parameters = 1 for dim in shape: print "Current dimension: ", dim variable_parameters *= dim.value print "Variable params: ", variable_parameters total_parameters += variable_parameters print '' print "TOTAL TRAINABLE PARAMS: ", total_parameters print 'Initializing TF variables...' sess.run(tf.global_variables_initializer()) if verbose >= 1: print 'Starting GAE training!' # Train model for epoch in range(EPOCHS): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features_tuple, placeholders) feed_dict.update({placeholders['dropout']: DROPOUT}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] # Evaluate predictions feed_dict.update({placeholders['dropout']: 0}) gae_emb = sess.run(model.z_mean, feed_dict=feed_dict) prev_embs.append(gae_emb) gae_score_matrix = np.dot(gae_emb, gae_emb.T) # # TODO: remove this (debugging) # if not np.isfinite(gae_score_matrix).all(): # print 'Found non-finite value in GAE score matrix! Epoch: {}'.format(epoch) # with open('numpy-nan-debugging.pkl', 'wb') as f: # dump_info = {} # dump_info['gae_emb'] = gae_emb # dump_info['epoch'] = epoch # dump_info['gae_score_matrix'] = gae_score_matrix # dump_info['adj_norm'] = adj_norm # dump_info['adj_label'] = adj_label # dump_info['features_tuple'] = features_tuple # # dump_info['feed_dict'] = feed_dict # dump_info['prev_embs'] = prev_embs # pickle.dump(dump_info, f, protocol=2) # # END TODO roc_curr, ap_curr = get_roc_score(val_edges, val_edges_false, gae_score_matrix, apply_sigmoid=True) val_roc_score.append(roc_curr) # Print results for this epoch if verbose == 2: print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t)) if verbose == 2: print("Optimization Finished!") # Print final results feed_dict.update({placeholders['dropout']: 0}) gae_emb = sess.run(model.z_mean, feed_dict=feed_dict) # Dot product edge scores (default) if edge_score_mode == "dot-product": gae_score_matrix = np.dot(gae_emb, gae_emb.T) runtime = time.time() - start_time # Calculate final scores gae_val_roc, gae_val_ap = get_roc_score(val_edges, val_edges_false, gae_score_matrix) gae_test_roc, gae_test_ap = get_roc_score(test_edges, test_edges_false, gae_score_matrix) # Take bootstrapped edge embeddings (via hadamard product) elif edge_score_mode == "edge-emb": def get_edge_embeddings(edge_list): embs = [] for edge in edge_list: node1 = edge[0] node2 = edge[1] emb1 = gae_emb[node1] emb2 = gae_emb[node2] edge_emb = np.multiply(emb1, emb2) embs.append(edge_emb) embs = np.array(embs) return embs # Train-set edge embeddings pos_train_edge_embs = get_edge_embeddings(train_edges) neg_train_edge_embs = get_edge_embeddings(train_edges_false) train_edge_embs = np.concatenate([pos_train_edge_embs, neg_train_edge_embs]) # Create train-set edge labels: 1 = real edge, 0 = false edge train_edge_labels = np.concatenate([np.ones(len(train_edges)), np.zeros(len(train_edges_false))]) # Val-set edge embeddings, labels if len(val_edges) > 0 and len(val_edges_false) > 0: pos_val_edge_embs = get_edge_embeddings(val_edges) neg_val_edge_embs = get_edge_embeddings(val_edges_false) val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs]) val_edge_labels = np.concatenate([np.ones(len(val_edges)), np.zeros(len(val_edges_false))]) # Test-set edge embeddings, labels pos_test_edge_embs = get_edge_embeddings(test_edges) neg_test_edge_embs = get_edge_embeddings(test_edges_false) test_edge_embs = np.concatenate([pos_test_edge_embs, neg_test_edge_embs]) # Create val-set edge labels: 1 = real edge, 0 = false edge test_edge_labels = np.concatenate([np.ones(len(test_edges)), np.zeros(len(test_edges_false))]) # Train logistic regression classifier on train-set edge embeddings edge_classifier = LogisticRegression(random_state=0) edge_classifier.fit(train_edge_embs, train_edge_labels) # Predicted edge scores: probability of being of class "1" (real edge) if len(val_edges) > 0 and len(val_edges_false) > 0: val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1] test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1] runtime = time.time() - start_time # Calculate scores if len(val_edges) > 0 and len(val_edges_false) > 0: gae_val_roc = roc_auc_score(val_edge_labels, val_preds) # gae_val_roc_curve = roc_curve(val_edge_labels, val_preds) gae_val_ap = average_precision_score(val_edge_labels, val_preds) else: gae_val_roc = None gae_val_roc_curve = None gae_val_ap = None gae_test_roc = roc_auc_score(test_edge_labels, test_preds) # gae_test_roc_curve = roc_curve(test_edge_labels, test_preds) gae_test_ap = average_precision_score(test_edge_labels, test_preds) # Record scores gae_scores = {} gae_scores['test_roc'] = gae_test_roc # gae_scores['test_roc_curve'] = gae_test_roc_curve gae_scores['test_ap'] = gae_test_ap gae_scores['val_roc'] = gae_val_roc # gae_scores['val_roc_curve'] = gae_val_roc_curve gae_scores['val_ap'] = gae_val_ap gae_scores['val_roc_per_epoch'] = val_roc_score gae_scores['runtime'] = runtime return gae_scores
flags.DEFINE_integer('feat_dim', 963, 'Number of units in feature layer.') flags.DEFINE_integer('coord_dim', 3, 'Number of units in output layer.') flags.DEFINE_float('weight_decay', 5e-6, 'Weight for L2 loss.') # Define placeholders(dict) and model num_blocks = 3 num_supports = 2 placeholders = { 'features': tf.placeholder(tf.float32, shape=(None, 3)), 'img_inp': tf.placeholder(tf.float32, shape=(224, 224, 3)), 'labels': tf.placeholder(tf.float32, shape=(None, 6)), 'support1': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'support2': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'support3': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'faces': [tf.placeholder(tf.int32, shape=(None, 4)) for _ in range(num_blocks)], 'edges': [tf.placeholder(tf.int32, shape=(None, 2)) for _ in range(num_blocks)], 'lape_idx': [tf.placeholder(tf.int32, shape=(None, 10)) for _ in range(num_blocks)], #for laplace term 'pool_idx': [tf.placeholder(tf.int32, shape=(None, 2)) for _ in range(num_blocks - 1)], #for unpooling 'dropout':