vocab_size=vocab_size, query_size=sentence_size, story_size=sentence_size, memory_key_size=memory_size, feature_size=FLAGS.feature_size, memory_value_size=memory_size, embedding_size=FLAGS.embedding_size, hops=FLAGS.hops, reader=FLAGS.reader, l2_lambda=FLAGS.l2_lambda) grads_and_vars = optimizer.compute_gradients(model.loss_op) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in model._nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(nil_grads_and_vars, name="train_op", global_step=global_step) sess.run(tf.global_variables_initializer())
with tf.Session() as sess: global_step = tf.Variable(0, name="global_step", trainable=False) # decay learning rate starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 2000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=FLAGS.epsilon) model = MemN2N_KV(batch_size=batch_size, vocab_size=vocab_size, note_size=sentence_size, doc_size=sentence_size, memory_key_size=memory_size, feature_size=FLAGS.feature_size, memory_value_size=memory_size, embedding_size=FLAGS.embedding_size, hops=FLAGS.hops, reader=FLAGS.reader, l2_lambda=FLAGS.l2_lambda) grads_and_vars = optimizer.compute_gradients(model.loss_op) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in model._nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(grads_and_vars, name="train_op", global_step=global_step) sess.run(tf.initialize_all_variables()) for i in range(1, FLAGS.epochs+1): np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end]
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=FLAGS.epsilon) #optimizer = tf.train.AdagradOptimizer(learning_rate) best_kappa_so_far = 0.0 with tf.Session(config=session_conf) as sess: model = MemN2N_KV(batch_size, vocab_size, max_sent_size, max_sent_size, memory_size, memory_size, embedding_size, len(score_range), feature_size, hops, reader, l2_lambda) grads_and_vars = optimizer.compute_gradients( model.loss_op, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g, 1e-3), v) for g, v in grads_and_vars] # test point #nil_grads_and_vars = [] #for g, v in grads_and_vars: # if v.name in model._nil_vars: # nil_grads_and_vars.append((zero_nil_slot(g), v)) # else: # nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(grads_and_vars, name="train_op", global_step=global_step) sess.run(tf.initialize_all_variables(), feed_dict={model.w_placeholder: word2vec})
# decay learning rate starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 3000, 0.96, staircase=True) # test point optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=FLAGS.epsilon) #optimizer = tf.train.AdagradOptimizer(learning_rate) best_kappa_so_far = 0.0 with tf.Session(config=session_conf) as sess: model = MemN2N_KV(batch_size, vocab_size, max_sent_size, max_sent_size, memory_size, memory_size, embedding_size, len(score_range), feature_size, hops, reader, l2_lambda) grads_and_vars = optimizer.compute_gradients(model.loss_op, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g, 1e-3), v) for g, v in grads_and_vars] # test point #nil_grads_and_vars = [] #for g, v in grads_and_vars: # if v.name in model._nil_vars: # nil_grads_and_vars.append((zero_nil_slot(g), v)) # else: # nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(grads_and_vars, name="train_op", global_step=global_step) sess.run(tf.global_variables_initializer(), feed_dict={model.w_placeholder: word2vec}) saver = tf.train.Saver(tf.global_variables()) def train_step(m, e, s, ma):
def testBatchSize(): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) global_step = tf.Variable(0, name="global_step", trainable=False) # decay learning rate starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=FLAGS.epsilon) config = tf.ConfigProto(allow_soft_placement=True) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = MemN2N_KV(batch_size=batch_size, vocab_size=vocab_size, query_size=sentence_size, story_size=sentence_size, memory_key_size=memory_size, feature_size=FLAGS.feature_size, memory_value_size=memory_size, embedding_size=FLAGS.embedding_size, hops=FLAGS.hops, reader=FLAGS.reader, l2_lambda=FLAGS.l2_lambda) grads_and_vars = optimizer.compute_gradients(model.loss_op) grads_and_vars = [(tf.clip_by_norm(g, FLAGS.max_grad_norm), v) for g, v in grads_and_vars if g is not None] grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in model._nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(nil_grads_and_vars, name="train_op", global_step=global_step) sess.run(tf.global_variables_initializer()) def train_step(s, q, a): feed_dict = { model._memory_value: s, model._query: q, model._memory_key: s, model._labels: a, model.keep_prob: FLAGS.keep_prob } _, step, predict_op = sess.run( [train_op, global_step, model.predict_op], feed_dict) return predict_op def test_step(s, q): feed_dict = { model._query: q, model._memory_key: s, model._memory_value: s, model.keep_prob: 1 } preds = sess.run(model.predict_op, feed_dict) return preds for t in range(1, FLAGS.epochs + 1): np.random.shuffle(batches) train_preds = [] #for start in range(0, n_train, batch_size): for start, end in batches: #end = start + batch_size s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] predict_op = train_step(s, q, a) train_preds += list(predict_op) train_preds = test_step(trainS, trainQ) train_acc = metrics.accuracy_score(train_labels, train_preds) trainaccuracy.append(train_acc) # print('-----------------------') # print('Epoch', t) # print('Training Accuracy: {0:.2f}'.format(train_acc)) # print('-----------------------') val_preds = test_step(valS, valQ) val_acc = metrics.accuracy_score(np.array(val_preds), val_labels) validationaccuracy.append(val_acc) # print(val_preds) # print('-----------------------') # print('Epoch', t) # print('Validation Accuracy:', val_acc) # print('-----------------------') # test on train dataset train_preds = test_step(trainS, trainQ) train_acc = metrics.accuracy_score(train_labels, train_preds) train_acc = '{0:.2f}'.format(train_acc) # eval dataset val_preds = test_step(valS, valQ) val_acc = metrics.accuracy_score(val_labels, val_preds) val_acc = '{0:.2f}'.format(val_acc) # batchACCURACY.append(val_acc) # testing dataset test_preds = test_step(testS, testQ) test_acc = metrics.accuracy_score(test_labels, test_preds) test_acc = '{0:.2f}'.format(test_acc) print("Testing Accuracy: {}".format(test_acc))