def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "train.txt")) run_train(dataset, hps, os.path.join(FLAGS.logdir, "train"), ps_device="/gpu:0") elif FLAGS.mode.startswith("eval"): data_dir = os.path.join(FLAGS.datadir, "eval.txt") #predict_model = prediction.Model('/dir/ckpt',os.path.join(FLAGS.datadir, "vocabulary.txt"), hps) dataset = Dataset(vocab, data_dir, deterministic=True) prefix_words = "<brk>".split() predict_model = predict.Model(hps, FLAGS.logdir, FLAGS.datadir) print('start input') out = predict_model.predictnextkwords(prefix_words, FLAGS.num_sen) for row in out: print(' '.join(row) + "\n") print("len_out: " + str(len(out)))
def main(_): hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file("small_voca.txt") # ("1b_word_vocab.txt") if FLAGS.mode == "train": hps.batch_size = 256 dataset = Dataset( vocab, FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*") run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus if FLAGS.mode.startswith("eval_train"): data_dir = FLAGS.datadir + "/training-monolingual.tokenized.shuffled/*" else: data_dir = FLAGS.datadir + "/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps)
def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') print_debug('our training DataSetDir=%s , LogDir=%s' % (FLAGS.datadir, FLAGS.logdir)) #vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) vocab = Vocabulary.from_file(os.path.join(FLAGS.datadir, "vocabulary.txt")) FLAGS.mode = "train" for i in range(10): print("Iteration ", i, " phase: ", FLAGS.mode) if FLAGS.mode == "train": #hps.batch_size = 256 # dataset = Dataset(vocab, os.path.join(FLAGS.datadir, # "training-monolingual.tokenized.shuffled/*")) dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "ptb.train.txt")) trainlogdir = ( FLAGS.logdir + str("/") + "train" ) #(FLAGS.logdir+str("\\")+"train")#os.path.join(FLAGS.logdir, "train") print_debug('train log dir=%s' % (trainlogdir)) run_train(dataset, hps, trainlogdir, ps_device="/gpu:0") print_debug('Finished run_train !!!!!!!!!!!') elif FLAGS.mode.startswith("eval"): print_debug('eval mode') # if FLAGS.mode.startswith("eval_train"): # data_dir = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*") # elif FLAGS.mode.startswith("eval_full"): # data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/*") # else: # data_dir = os.path.join(FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050") dataset = Dataset(vocab, os.path.join(FLAGS.datadir, "ptb.test.txt"), deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) print_debug('Finished run_eval !!!!!!!!!!!') if FLAGS.mode == "train": FLAGS.mode = "eval_full" else: FLAGS.mode = "train"
def main(_): """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file( os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset( vocab, os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")) run_train(dataset, hps, os.path.join(FLAGS.logdir, "train"), ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): if FLAGS.mode.startswith("eval_train"): data_dir = os.path.join( FLAGS.datadir, "training-monolingual.tokenized.shuffled/*") elif FLAGS.mode.startswith("eval_full"): data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) else: data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) elif FLAGS.mode.startswith("infer"): data_dir = os.path.join( FLAGS.datadir, "heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050" ) dataset = Dataset(vocab, data_dir, deterministic=True) run_infer(dataset, hps, FLAGS.logdir, FLAGS.mode, vocab)
def run(): parser = argparse.ArgumentParser() arg = parser.add_argument hps = LM.get_default_hparams().parse('num_steps=20,num_shards=8,num_layers=2,emb_size=12,projected_size=12,state_size=80,num_sampled=0,batch_size=1,vocab_size=102') hps._set("num_gpus", 1) #arg('model') #arg('vocab') arg('--port', type=int, default=8000) arg('--host', default='localhost') arg('--debug', action='store_true') args = parser.parse_args() global model #model = Model(args.model, args.vocab, hps) model = Model('/Users/ruiyangwang/Desktop/f-lm/logs/test/train/model.ckpt-0','/Users/ruiyangwang/Desktop/examples/word_language_model/data/penn/vocabulary.txt', hps) app.run(port=args.port, host=args.host, debug=args.debug)
def run(): parser = argparse.ArgumentParser() arg = parser.add_argument hps = LM.get_default_hparams().parse( 'num_steps=20,num_shards=6,num_layers=2,learning_rate=0.2,max_grad_norm=1,keep_prob=0.9,emb_size=1024,projected_size=1024,state_size=8192,num_sampled=8192,batch_size=512,vocab_size=11859,num_of_groups=4' ) hps._set("num_gpus", 1) #arg('model') #arg('vocab') arg('--port', type=int, default=8000) arg('--host', default='localhost') arg('--debug', action='store_true') args = parser.parse_args() global model #model = Model(args.model, args.vocab, hps) model = Model('/Users/ruiyangwang/Desktop/model/model.ckpt-44260', '/Users/ruiyangwang/Desktop/vocabulary2.txt', hps) app.run(port=args.port, host=args.host, debug=args.debug)
def main(_): hvd.init() hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file(FLAGS.vocab) hps.vocab_size = vocab.num_tokens config = tf.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank()) os.environ["CUDA_VISIBLE_DEVICES"] = str(hvd.local_rank()) if FLAGS.logdir is None: FLAGS.logdir = os.path.join('/tmp', 'lm-run-{}'.format(int(time.time()))) print('logdir: {}'.format(FLAGS.logdir)) hps.batch_size = 256 dataset = Dataset(vocab, FLAGS.datadir) run_train(dataset, hps, FLAGS.logdir + '/train', ps_device='/gpu:' + str(hvd.local_rank()))
def main(_): hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps.num_gpus = FLAGS.num_gpus vocab = Vocabulary.from_file(FLAGS.datadir + "/lm_vocab.txt", hps.vocab_size) if FLAGS.mode == "train": hps.batch_size = 256 # reset batchsize dataset = Dataset(vocab, FLAGS.datadir + "/train/*") run_train(dataset, hps, FLAGS.logdir + "/train", ps_device="/gpu:0") elif FLAGS.mode.startswith("eval_"): if FLAGS.mode.startswith("eval_train"): data_dir = FLAGS.datadir + "/train/*" elif FLAGS.mode.startswith("eval_test"): data_dir = FLAGS.datadir + "/heldout/*" print("data_dir:",data_dir) dataset = Dataset(vocab, data_dir, deterministic=True) run_eval(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps) elif FLAGS.mode.startswith("predict_next"): data_dir = "data/news.en.heldout-00001-of-00050" dataset = Dataset(vocab, data_dir) predict_next(dataset, hps, FLAGS.logdir, FLAGS.mode, FLAGS.eval_steps,vocab)
import json import numpy as np import time import tensorflow as tf from data_utils import Vocabulary, Dataset from language_model import LM from common import CheckpointLoader BATCH_SIZE = 1 NUM_TIMESTEPS = 1 MAX_WORD_LEN = 50 UPLOAD_FOLDER = '/data/ngramTest/uploads' UPLOAD_FOLDER = './' hps = LM.get_default_hparams() vocab = Vocabulary.from_file("1b_word_vocab.txt") with tf.variable_scope("model"): hps.num_sampled = 0 # Always using full softmax at evaluation. run out of memory hps.keep_prob = 1.0 hps.num_gpus = 1 model = LM(hps, "predict_next", "/cpu:0") if hps.average_params: print("Averaging parameters for evaluation.") saver = tf.train.Saver(model.avg_dict) else: saver = tf.train.Saver() # Use only 4 threads for the evaluation. config = tf.ConfigProto(allow_soft_placement=True,
print("You're not in the cluster spec! exiting!") exit(-1) else: print("ROLE: %s" % role) print("INDEX: %s" % task_index) cluster = tf.train.ClusterSpec(cluster_spec) server = tf.train.Server(cluster, job_name=role, task_index=task_index) if role == "ps": server.join() else: ps_device = '/job:ps/task:0' """ Start either train or eval. Note hardcoded parts of path for training and eval data """ hps = LM.get_default_hparams().parse(FLAGS.hpconfig) hps._set("num_gpus", FLAGS.num_gpus) print('*****HYPER PARAMETERS*****') print(hps) print('**************************') vocab = Vocabulary.from_file( os.path.join(FLAGS.datadir, "1b_word_vocab.txt")) if FLAGS.mode == "train": #hps.batch_size = 256 dataset = Dataset( vocab, os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")) run_train(dataset,