Beispiel #1
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    print_debug('our training DataSetDir=%s  , LogDir=%s' %
                (FLAGS.datadir, FLAGS.logdir))

    #vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))
    vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt"))
    FLAGS.mode = "train"
    for i in range(10):
        print("Iteration ", i, " phase: ", FLAGS.mode)
        if FLAGS.mode == "train":
            #hps.batch_size = 256
            # dataset = Dataset(vocab, os.path.join(FLAGS.datadir,
            #                                       "training-monolingual.tokenized.shuffled/*"))
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.train.txt"))

            trainlogdir = (
                FLAGS.logdir + str("/") + "train"
            )  #(FLAGS.logdir+str("\\")+"train")#os.path.join(FLAGS.logdir, "train")
            print_debug('train log dir=%s' % (trainlogdir))

            run_train(dataset, hps, trainlogdir, ps_device="/gpu:0")
            print_debug('Finished run_train !!!!!!!!!!!')
        elif FLAGS.mode.startswith("eval"):
            print_debug('eval mode')

            # if FLAGS.mode.startswith("eval_train"):
            #     data_dir = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
            # elif FLAGS.mode.startswith("eval_full"):
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/*")
            # else:
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050")
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.test.txt"),
                              deterministic=True)
            run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
            print_debug('Finished run_eval !!!!!!!!!!!')

        if FLAGS.mode == "train":
            FLAGS.mode = "eval_full"
        else:
            FLAGS.mode = "train"
Beispiel #2
0
def main(_):
    hps   = Sentiment.get_default_hparams().parse(FLAGS.hpconfig)
    vocab = Vocabulary.from_file( os.path.join(FLAGS.data_dir, "sent.vocab.freq.dict"))

    if FLAGS.mode == "train":
        dataset = Dataset(os.path.join(FLAGS.data_dir, "train.sent_data.txt"), vocab)
        run_train(dataset,                  ## dataset
                  hps,                      ## configurations
                  FLAGS.log_dir + "/train") ## loging dir
    elif FLAGS.mode.startswith("eval"):
        dataset = Dataset(os.path.join(FLAGS.data_dir, "test.sent_data.txt"), vocab)
        run_eval(dataset,                  ## dataset
                  hps,                      ## configurations
                  FLAGS.log_dir) ## loging dir
Beispiel #3
0
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus

    vocab = Vocabulary.from_file("1b_word_vocab.txt")

    if FLAGS.mode == "train":
        hps.batch_size = 256
        dataset = Dataset(vocab, FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*"
        else:
            data_dir = FLAGS.datadir + "/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
Beispiel #4
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(
        os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(
            vocab,
            os.path.join(FLAGS.datadir,
                         "training-monolingual.tokenized.shuffled/*"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = os.path.join(
                FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
        elif FLAGS.mode.startswith("eval_full"):
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        else:
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif FLAGS.mode.startswith("infer"):
        data_dir = os.path.join(
            FLAGS.datadir,
            "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_infer(dataset, hps, FLAGS.logdir, FLAGS.mode, vocab)
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus
    
    vocab = Vocabulary.from_file(FLAGS.datadir + "/lm_vocab.txt", hps.vocab_size)

    if FLAGS.mode == "train":
        hps.batch_size = 256  # reset batchsize
        dataset = Dataset(vocab, FLAGS.datadir + "/train/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/train/*"
        elif FLAGS.mode.startswith("eval_test"):
            data_dir = FLAGS.datadir + "/heldout/*"
        print("data_dir:",data_dir)
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif  FLAGS.mode.startswith("predict_next"):
        data_dir = "data/news.en.heldout-00001-of-00050"
        dataset = Dataset(vocab, data_dir)
        predict_next(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps,vocab)