def main(_): vocab = reader.get_vocab(FLAGS.vocab) test_ids, test_meta = reader.make_test(PDPATH('/test_data/'+FLAGS.test), vocab) model_path = PDPATH('/trained_models/') + FLAGS.model config = load_configs(model_path) with tf.Graph().as_default() as graph: with tf.Session() as session: test_input = TestData(config = config, test_data = test_ids, test_meta = test_meta, vocab=vocab, name="TestInput") with tf.variable_scope("Model"): mtest = Basic_LSTM_Model(is_training=False, config=config, input_=test_input) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) saved_files = os.listdir(model_path) for file in saved_files: if '.meta' in file: ckpt = file.split(sep='.')[0] saver.restore(session, os.path.join(model_path,ckpt)) continue np.set_printoptions(precision=4, suppress=False, linewidth=100) b = run_test(session=session, model=mtest, model_input=test_input) print(b) b = b / np.sum(b, axis=1).reshape([-1,1]) np.set_printoptions(precision=4, suppress=False, linewidth=100) print(b)
def main(): proceed = True usrdir = input('[FFBP Reader] Provide user directory (if any), or press \'enter\' to use default directory: ') usrdir = usrdir.strip() while proceed: path = input('[FFBP Reader] Enter name of log directory OR corresponding index: ') try: ID = int(path) path = PDPATH('/FFBP{}/logs/FFBPlog_{}/snap.pkl'.format('/' + usrdir if len(usrdir) else '', path)) except ValueError: ID = int(path.split(sep='_')[-1]) path = PDPATH('/FFBP{}/logs/'.format('/' + usrdir if len(usrdir) else '') + path + '/snap.pkl') with open(path, 'rb'): reader = NetworkData(path) code.interact(local=locals()) print('[FFBP Reader] Would you like to proceed?') prompt = input("[y/n] -> ") if prompt == 'n': proceed = False
def reader_demo(): file = PDPATH('/train_data/ptb_word_data/train.txt') print('Step 1. Convert raw corpus into a long list:') L = _read_words(file) print('Length = {}'.format(len(L))) print('\nStep 2. Build vocab (assign strings to IDs):') V = _build_mini_vocab(file, True) for i,w in enumerate(V): print(i,w) if i > 20: break
def main(): import reader as reader from trainer import Configs from PDPATH import PDPATH ptb_vocab = get_vocab('ptb.voc') raw_test_data = reader.make_test(PDPATH('/RNN/test_data/coffee.txt'), ptb_vocab) test_input = TestData(config=Configs(), test_data=raw_test_data, vocab=ptb_vocab, name="TestInput")
def sandbox(): def f(filename, sorted_words_only=False): # Long list of word sequences separated by <eos> data = _read_words(filename) # Stores tallies of unique words in data, e.g. {''<unk>': 4794, 'the': 4529, '<eos>': 3761} counter = collections.Counter(data) return counter file = PDPATH('/train_data/ptb_word_data/train.txt') d=f(file) items = ['the', 'dog', 'dogs', 'boy', 'boys', 'is', 'are', 'has', 'have', 'was', 'were'] for i in items: print(i, d[i])
def tag_corpus(): file = PDPATH('/train_data/ptb_word_data/train.txt') s2id = rd._build_big_vocab(file) V = Vocab(s2id) tags = 'JJ' words_by_tags = dict(zip(tags, [[] for i in range(len(tags))])) adjectives = [] for k, (id, f, pos) in s2id.items(): if pos == tags: adjectives.append(k) with open('ptb_adjs', mode='+a') as f: f.write(',' + k) print(len(adjectives))
def __init__(self, path=''): self.sess_index = 0 self.logs_path = PDPATH() + '/logs' self.trained_path = PDPATH() + '/trained_models' self.may_be_make_dir(self.logs_path) self.may_be_make_dir(self.trained_path)
def make_vocab(): file = PDPATH('/train_data/tiny_data/train.txt') s2id = _build_mini_vocab(file) V = Vocab(s2id) pickle.dump(V, open(PDPATH('/vocabs/tiny.voc'), 'wb'))
def get_vocab(filename): # unpickle Vocab f = PDPATH('/vocabs/{}'.format(filename)) v = pickle.load(open(f, 'rb')) return v
def main(_): config = Configs( batch_size=20, hidden_size=1500, init_scale=0.04, keep_prob=.35, learning_rate=1.0, lr_decay=1 / 1.15, max_epoch=14, max_grad_norm=10, max_max_epoch=55, model=FLAGS.arch.lower( ), # Set of available models: 'LSTM', 'RNN', 'SRN' num_layers=1, num_steps=35, vocab_size=10000) eval_config = config.clone() eval_config.batch_size = 1 eval_config.num_steps = 1 if FLAGS.train_data: path = PDPATH('/train_data/' + FLAGS.train_data) else: print( 'Provide path to training data, e.g: train.py --train_data=\'path\'' ) return logger = Logger() raw_data = reader.raw_data(path) train_data, valid_data, test_data, _ = raw_data with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale, seed=None) with tf.name_scope("Train"): train_input = InputData(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = get_model(config.model, is_training=True, config=config, input_=train_input) print(m) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = InputData(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = get_model(config.model, is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = InputData(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = get_model(config.model, is_training=False, config=eval_config, input_=test_input) logger.make_child_i(logger.logs_path, 'RNNlog') saver = tf.train.Saver(var_list=tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Model'), sharded=False, write_version=tf.train.SaverDef.V2) sv = tf.train.Supervisor(logdir=logger.logs_child_path, saver=saver) train_log = [] valid_log = [] out = [] # Session runs here # Setup session configs sess_config = tf.ConfigProto(log_device_placement=False) sess_config.gpu_options.allow_growth = True # Start session context manager by calling to tf.train.Supervisor's managed_session with sv.managed_session(config=sess_config) as session: print('Starting on: {} (GMT)'.format(str( datetime.datetime.today()))) print(banner(s='begin')) start = time.time() if FLAGS.prog: printProgress(0, config.max_max_epoch, 'Training', 'Complete', barLength=60) for i in range(config.max_max_epoch): fin = i + 1 valid_perplexity, _ = run_epoch(session, mvalid) valid_log.append(valid_perplexity) if len(valid_log) >= 2: if valid_log[-1] > valid_log[-2]: elapsed = time.time() - start break lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 1) m.assign_lr(session, config.learning_rate * lr_decay) train_perplexity, _ = run_epoch(session, m, eval_op=m.train_op) train_log.append(train_perplexity) output_density = 10 output_frequency = config.max_max_epoch // output_density if config.max_max_epoch >= output_frequency: if (i % output_frequency == 0) or i == config.max_max_epoch - 1: print_(i, train_perplexity, valid_perplexity) else: print_(i, train_perplexity, valid_perplexity) if i == config.max_max_epoch - 1: elapsed = time.time() - start if FLAGS.prog: printProgress(i + 1, config.max_max_epoch, 'Training', 'Complete', barLength=60) test_perplexity, outputs = run_epoch(session, mtest) print('\nStopped training on epoch {}:'.format(fin)) print( " Train PPL = {:.4f}\n Valid PPL = {:.4f}\n Test PPL = {:.4f}" .format(train_perplexity, valid_perplexity, test_perplexity)) print(' Stopped {} (GMT)'.format(str( datetime.datetime.today()))) m, s = divmod(elapsed, 60) h, m = divmod(m, 60) print(' Elapsed time {}:{}:{}'.format(int(h), int(m), int(s))) if FLAGS.save_as: if FLAGS.name: save_to = logger.make_child(logger.trained_path, FLAGS.name) else: save_to = logger.make_child_i(logger.trained_path, 'model') spath = save_to + '/' + FLAGS.save_as print("\nSaving model to {}.".format(spath)) saver.save(session, spath, global_step=sv.global_step) save_config(config, filename=spath) save_plot('Learning curves from {}'.format(FLAGS.save_as), save_to, train_log, valid_log)