def experiment_fn(run_config, params):
    data = Data(FLAGS)
    data.initialize_word_vectors()

    model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat)
    estimator = tf.estimator.Estimator(
        model_fn=model.make_graph,
        #                                        model_dir=FLAGS.model_dir,
        config=run_config,
        params=FLAGS)

    train_input_fn, train_feed_fn = data.make_input_fn('train')
    eval_input_fn, eval_feed_fn = data.make_input_fn('test')

    print_vars = [
        'source', 'predict'
        # 'decoder_output',
        # 'actual'
    ]
    print_inputs = tf.train.LoggingTensorHook(print_vars,
                                              every_n_iter=FLAGS.print_every,
                                              formatter=data.get_formatter(
                                                  ['source', 'predict']))

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=FLAGS.iterations,
        min_eval_frequency=FLAGS.print_every,
        train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs],
        eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)],
        eval_steps=10)
    return experiment
def main(args):
    # tf.logging._logger.setLevel(logging.INFO)
    tf.logging.set_verbosity(logging.INFO)

    data = Data(FLAGS)
    model = Seq2seq(data.vocab_size, FLAGS)

    input_fn, feed_fn = data.make_input_fn()
    print_inputs = tf.train.LoggingTensorHook(
        ['source', 'target', 'predict'],
        every_n_iter=FLAGS.print_every,
        formatter=data.get_formatter(['source', 'target', 'predict']))

    estimator = tf.estimator.Estimator(
        model_fn=model.make_graph, model_dir=FLAGS.model_dir)  #, params=FLAGS)
    estimator.train(input_fn=input_fn,
                    hooks=[tf.train.FeedFnHook(feed_fn), print_inputs],
                    steps=FLAGS.iterations)
예제 #3
0
def trainWithPreviousKnowledge(datasetPath,
                               datasetSize,
                               transferMethod=None,
                               transferVocabularyPath=None,
                               sourceCheckpointPath=None,
                               suffix=""):
    percentages = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    checkpoint_filename = "checkpointsAdaptiveWithPrev" + suffix
    size = datasetSize
    epoch = 5
    test_source = datasetPath + "test_source.txt"
    test_target = datasetPath + "test_target.txt"
    resultPath = datasetPath + "AdaptiveWithPrev"

    if (transferMethod is None):
        vocabulary = datasetPath + "v.txt"
    else:
        vocabulary = transferVocabularyPath

    data = Data(FLAGS, "", "", "", "", vocabulary)
    model = Seq2seq(data.vocab_size, FLAGS, transferMethod,
                    sourceCheckpointPath)

    for i in percentages:
        source_filename = datasetPath + "without-pool/" + format(
            i, '.1f') + "_source" + ".txt"
        target_filename = datasetPath + "without-pool/" + format(
            i, '.1f') + "_target" + ".txt"
        data = Data(FLAGS, source_filename, target_filename, test_source,
                    test_target, vocabulary)
        iterations = int(round(size * i * epoch / FLAGS.batch_size))
        # iterations = 1

        input_fn, feed_fn = data.make_input_fn()
        test_fn = data.make_test_fn()
        print_inputs = tf.train.LoggingTensorHook(
            ['source', 'target', 'predict'],
            every_n_iter=FLAGS.print_every,
            formatter=data.get_formatter(['source', 'target', 'predict']))

        estimator = tf.estimator.Estimator(model_fn=model.make_graph,
                                           model_dir=checkpoint_filename,
                                           params=FLAGS)
        print("Training with " + format(i, '.2f') + " percent of the dataset.")
        estimator.train(input_fn=input_fn,
                        hooks=[tf.train.FeedFnHook(feed_fn), print_inputs],
                        steps=iterations)

        model.setLoadParameters(False)

        # test_paraphrases = list(estimator.predict(test_fn))

        a = estimator.predict(test_fn)

        test_paraphrases = []

        # a = list(a)

        # for i in a:
        #     print(i.shape)

        # print(len(a))

        for j in a:
            # j = i[:, 0]
            test_paraphrases.append(j)

        data.builtTranslationCorpus(test_paraphrases)
        scr = evaluate(data.reference_corpus, data.translation_corpus)
        print(i, scr)
        saveResult(i, scr, resultPath)
예제 #4
0
def supervisedLearning(datasetPath,
                       datasetSize,
                       transferMethod=None,
                       transferVocabularyPath=None,
                       sourceCheckpointPath=None,
                       suffix=""):

    test_source = datasetPath + "test_source.txt"
    test_target = datasetPath + "test_target.txt"
    train_source = datasetPath + "train_source.txt"
    train_target = datasetPath + "train_target.txt"
    resultPath = datasetPath + "SL"

    if (transferMethod is None):
        vocabulary = datasetPath + "quora_msr_vocabulary.txt"
    else:
        vocabulary = transferVocabularyPath

    data = Data(FLAGS, train_source, train_target, test_source, test_target,
                vocabulary)
    model = Seq2seq(data.vocab_size, FLAGS, transferMethod,
                    sourceCheckpointPath)
    size = datasetSize
    epoch = 5
    # iterations = int(round(size * epoch / FLAGS.batch_size))
    iterations = 4

    # var_list = checkpoint_utils.list_variables('checkpoints')
    # for v in var_list: print(v)

    input_fn, feed_fn = data.make_input_fn()
    print_inputs = tf.train.LoggingTensorHook(
        ['source', 'target', 'predict'],
        every_n_iter=FLAGS.print_every,
        formatter=data.get_formatter(['source', 'target', 'predict']))

    # estimator = tf.estimator.Estimator(model_fn = model.make_graph, model_dir="checkpointsQuoraMSR")
    # estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=iterations)

    # modelInfer = Seq2seq(data.vocab_size, FLAGS, transferMethod, sourceCheckpointPath, False, inferGraph = 1)
    estimator = tf.estimator.Estimator(
        model_fn=model.make_graph,
        model_dir="data/cps/checkpointsQuoraMSRinit")
    model.setLoadParameters(False)

    test_fn = data.make_test_fn()

    a = estimator.predict(test_fn)

    test_paraphrases = []

    # a = list(a)

    # for i in a:
    #     print(i.shape)recent   # print(len(a))

    for j in a:
        # j = i[:, 0]
        test_paraphrases.append(j)

    data.builtTranslationCorpus(test_paraphrases)
    scr = evaluate(data.reference_corpus, data.translation_corpus)
    print(data.translation_corpus)
    print(scr)
    saveResult(100, scr, resultPath)