Esempio n. 1
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "train.txt"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval"):
        data_dir = os.path.join(FLAGS.datadir, "eval.txt")
        #predict_model = prediction.Model('/dir/ckpt',os.path.join(FLAGS.datadir, "vocabulary.txt"), hps)

        dataset = Dataset(vocab, data_dir, deterministic=True)
        prefix_words = "<brk>".split()
        predict_model = predict.Model(hps, FLAGS.logdir, FLAGS.datadir)
        print('start input')
        out = predict_model.predictnextkwords(prefix_words, FLAGS.num_sen)
        for row in out:
            print(' '.join(row) + "\n")
        print("len_out: " + str(len(out)))
Esempio n. 2
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    print_debug('our training DataSetDir=%s  , LogDir=%s' %
                (FLAGS.datadir, FLAGS.logdir))

    #vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))
    vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt"))
    FLAGS.mode = "train"
    for i in range(10):
        print("Iteration ", i, " phase: ", FLAGS.mode)
        if FLAGS.mode == "train":
            #hps.batch_size = 256
            # dataset = Dataset(vocab, os.path.join(FLAGS.datadir,
            #                                       "training-monolingual.tokenized.shuffled/*"))
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.train.txt"))

            trainlogdir = (
                FLAGS.logdir + str("/") + "train"
            )  #(FLAGS.logdir+str("\\")+"train")#os.path.join(FLAGS.logdir, "train")
            print_debug('train log dir=%s' % (trainlogdir))

            run_train(dataset, hps, trainlogdir, ps_device="/gpu:0")
            print_debug('Finished run_train !!!!!!!!!!!')
        elif FLAGS.mode.startswith("eval"):
            print_debug('eval mode')

            # if FLAGS.mode.startswith("eval_train"):
            #     data_dir = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
            # elif FLAGS.mode.startswith("eval_full"):
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/*")
            # else:
            #     data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050")
            dataset = Dataset(vocab,
                              os.path.join(FLAGS.datadir, "ptb.test.txt"),
                              deterministic=True)
            run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
            print_debug('Finished run_eval !!!!!!!!!!!')

        if FLAGS.mode == "train":
            FLAGS.mode = "eval_full"
        else:
            FLAGS.mode = "train"
Esempio n. 3
0
def main(_):
    hps   = Sentiment.get_default_hparams().parse(FLAGS.hpconfig)
    vocab = Vocabulary.from_file( os.path.join(FLAGS.data_dir, "sent.vocab.freq.dict"))

    if FLAGS.mode == "train":
        dataset = Dataset(os.path.join(FLAGS.data_dir, "train.sent_data.txt"), vocab)
        run_train(dataset,                  ## dataset
                  hps,                      ## configurations
                  FLAGS.log_dir + "/train") ## loging dir
    elif FLAGS.mode.startswith("eval"):
        dataset = Dataset(os.path.join(FLAGS.data_dir, "test.sent_data.txt"), vocab)
        run_eval(dataset,                  ## dataset
                  hps,                      ## configurations
                  FLAGS.log_dir) ## loging dir
Esempio n. 4
0
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus

    vocab = Vocabulary.from_file("1b_word_vocab.txt")

    if FLAGS.mode == "train":
        hps.batch_size = 256
        dataset = Dataset(vocab, FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*"
        else:
            data_dir = FLAGS.datadir + "/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
Esempio n. 5
0
def main(_):
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(
        os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(
            vocab,
            os.path.join(FLAGS.datadir,
                         "training-monolingual.tokenized.shuffled/*"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = os.path.join(
                FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")
        elif FLAGS.mode.startswith("eval_full"):
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        else:
            data_dir = os.path.join(
                FLAGS.datadir,
                "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
            )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif FLAGS.mode.startswith("infer"):
        data_dir = os.path.join(
            FLAGS.datadir,
            "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050"
        )
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_infer(dataset, hps, FLAGS.logdir, FLAGS.mode, vocab)
Esempio n. 6
0
def main(_):
    hvd.init()
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus

    vocab = Vocabulary.from_file(FLAGS.vocab)
    hps.vocab_size = vocab.num_tokens

    config = tf.ConfigProto()
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    os.environ["CUDA_VISIBLE_DEVICES"] = str(hvd.local_rank())

    if FLAGS.logdir is None:
        FLAGS.logdir = os.path.join('/tmp',
                                    'lm-run-{}'.format(int(time.time())))
        print('logdir: {}'.format(FLAGS.logdir))
    hps.batch_size = 256
    dataset = Dataset(vocab, FLAGS.datadir)
    run_train(dataset,
              hps,
              FLAGS.logdir + '/train',
              ps_device='/gpu:' + str(hvd.local_rank()))
def main(_):
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps.num_gpus = FLAGS.num_gpus
    
    vocab = Vocabulary.from_file(FLAGS.datadir + "/lm_vocab.txt", hps.vocab_size)

    if FLAGS.mode == "train":
        hps.batch_size = 256  # reset batchsize
        dataset = Dataset(vocab, FLAGS.datadir + "/train/*")
        run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0")
    elif FLAGS.mode.startswith("eval_"):
        if FLAGS.mode.startswith("eval_train"):
            data_dir = FLAGS.datadir + "/train/*"
        elif FLAGS.mode.startswith("eval_test"):
            data_dir = FLAGS.datadir + "/heldout/*"
        print("data_dir:",data_dir)
        dataset = Dataset(vocab, data_dir, deterministic=True)
        run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
    elif  FLAGS.mode.startswith("predict_next"):
        data_dir = "data/news.en.heldout-00001-of-00050"
        dataset = Dataset(vocab, data_dir)
        predict_next(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps,vocab) 
Esempio n. 8
0
    print("INDEX: %s" % task_index)

cluster = tf.train.ClusterSpec(cluster_spec)
server = tf.train.Server(cluster, job_name=role, task_index=task_index)
if role == "ps":
    server.join()
else:
    ps_device = '/job:ps/task:0'
    """
    Start either train or eval. Note hardcoded parts of path for training and eval data
    """
    hps = LM.get_default_hparams().parse(FLAGS.hpconfig)
    hps._set("num_gpus", FLAGS.num_gpus)
    print('*****HYPER PARAMETERS*****')
    print(hps)
    print('**************************')

    vocab = Vocabulary.from_file(
        os.path.join(FLAGS.datadir, "1b_word_vocab.txt"))

    if FLAGS.mode == "train":
        #hps.batch_size = 256
        dataset = Dataset(
            vocab,
            os.path.join(FLAGS.datadir,
                         "training-monolingual.tokenized.shuffled/*"))
        run_train(dataset,
                  hps,
                  os.path.join(FLAGS.logdir, "train"),
                  ps_device=ps_device)