def experiment_fn(run_config, params): data = Data(FLAGS) data.initialize_word_vectors() model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat) estimator = tf.estimator.Estimator( model_fn=model.make_graph, # model_dir=FLAGS.model_dir, config=run_config, params=FLAGS) train_input_fn, train_feed_fn = data.make_input_fn('train') eval_input_fn, eval_feed_fn = data.make_input_fn('test') print_vars = [ 'source', 'predict' # 'decoder_output', # 'actual' ] print_inputs = tf.train.LoggingTensorHook(print_vars, every_n_iter=FLAGS.print_every, formatter=data.get_formatter( ['source', 'predict'])) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=FLAGS.iterations, min_eval_frequency=FLAGS.print_every, train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs], eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)], eval_steps=10) return experiment
def main(args): # tf.logging._logger.setLevel(logging.INFO) tf.logging.set_verbosity(logging.INFO) data = Data(FLAGS) model = Seq2seq(data.vocab_size, FLAGS) input_fn, feed_fn = data.make_input_fn() print_inputs = tf.train.LoggingTensorHook( ['source', 'target', 'predict'], every_n_iter=FLAGS.print_every, formatter=data.get_formatter(['source', 'target', 'predict'])) estimator = tf.estimator.Estimator( model_fn=model.make_graph, model_dir=FLAGS.model_dir) #, params=FLAGS) estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=FLAGS.iterations)
def trainWithPreviousKnowledge(datasetPath, datasetSize, transferMethod=None, transferVocabularyPath=None, sourceCheckpointPath=None, suffix=""): percentages = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] checkpoint_filename = "checkpointsAdaptiveWithPrev" + suffix size = datasetSize epoch = 5 test_source = datasetPath + "test_source.txt" test_target = datasetPath + "test_target.txt" resultPath = datasetPath + "AdaptiveWithPrev" if (transferMethod is None): vocabulary = datasetPath + "v.txt" else: vocabulary = transferVocabularyPath data = Data(FLAGS, "", "", "", "", vocabulary) model = Seq2seq(data.vocab_size, FLAGS, transferMethod, sourceCheckpointPath) for i in percentages: source_filename = datasetPath + "without-pool/" + format( i, '.1f') + "_source" + ".txt" target_filename = datasetPath + "without-pool/" + format( i, '.1f') + "_target" + ".txt" data = Data(FLAGS, source_filename, target_filename, test_source, test_target, vocabulary) iterations = int(round(size * i * epoch / FLAGS.batch_size)) # iterations = 1 input_fn, feed_fn = data.make_input_fn() test_fn = data.make_test_fn() print_inputs = tf.train.LoggingTensorHook( ['source', 'target', 'predict'], every_n_iter=FLAGS.print_every, formatter=data.get_formatter(['source', 'target', 'predict'])) estimator = tf.estimator.Estimator(model_fn=model.make_graph, model_dir=checkpoint_filename, params=FLAGS) print("Training with " + format(i, '.2f') + " percent of the dataset.") estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=iterations) model.setLoadParameters(False) # test_paraphrases = list(estimator.predict(test_fn)) a = estimator.predict(test_fn) test_paraphrases = [] # a = list(a) # for i in a: # print(i.shape) # print(len(a)) for j in a: # j = i[:, 0] test_paraphrases.append(j) data.builtTranslationCorpus(test_paraphrases) scr = evaluate(data.reference_corpus, data.translation_corpus) print(i, scr) saveResult(i, scr, resultPath)
def supervisedLearning(datasetPath, datasetSize, transferMethod=None, transferVocabularyPath=None, sourceCheckpointPath=None, suffix=""): test_source = datasetPath + "test_source.txt" test_target = datasetPath + "test_target.txt" train_source = datasetPath + "train_source.txt" train_target = datasetPath + "train_target.txt" resultPath = datasetPath + "SL" if (transferMethod is None): vocabulary = datasetPath + "quora_msr_vocabulary.txt" else: vocabulary = transferVocabularyPath data = Data(FLAGS, train_source, train_target, test_source, test_target, vocabulary) model = Seq2seq(data.vocab_size, FLAGS, transferMethod, sourceCheckpointPath) size = datasetSize epoch = 5 # iterations = int(round(size * epoch / FLAGS.batch_size)) iterations = 4 # var_list = checkpoint_utils.list_variables('checkpoints') # for v in var_list: print(v) input_fn, feed_fn = data.make_input_fn() print_inputs = tf.train.LoggingTensorHook( ['source', 'target', 'predict'], every_n_iter=FLAGS.print_every, formatter=data.get_formatter(['source', 'target', 'predict'])) # estimator = tf.estimator.Estimator(model_fn = model.make_graph, model_dir="checkpointsQuoraMSR") # estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=iterations) # modelInfer = Seq2seq(data.vocab_size, FLAGS, transferMethod, sourceCheckpointPath, False, inferGraph = 1) estimator = tf.estimator.Estimator( model_fn=model.make_graph, model_dir="data/cps/checkpointsQuoraMSRinit") model.setLoadParameters(False) test_fn = data.make_test_fn() a = estimator.predict(test_fn) test_paraphrases = [] # a = list(a) # for i in a: # print(i.shape)recent # print(len(a)) for j in a: # j = i[:, 0] test_paraphrases.append(j) data.builtTranslationCorpus(test_paraphrases) scr = evaluate(data.reference_corpus, data.translation_corpus) print(data.translation_corpus) print(scr) saveResult(100, scr, resultPath)