Ejemplo n.º 1
0
def main(_):
    vocab = reader.get_vocab(FLAGS.vocab)
    test_ids, test_meta = reader.make_test(PDPATH('/test_data/'+FLAGS.test), vocab)
    model_path = PDPATH('/trained_models/') + FLAGS.model
    config = load_configs(model_path)


    with tf.Graph().as_default() as graph:
        with tf.Session() as session:
            test_input = TestData(config = config,
                                  test_data = test_ids,
                                  test_meta = test_meta,
                                  vocab=vocab,
                                  name="TestInput")

            with tf.variable_scope("Model"):
                mtest = Basic_LSTM_Model(is_training=False, config=config, input_=test_input)

            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            saved_files = os.listdir(model_path)
            for file in saved_files:
                if '.meta' in file:
                    ckpt = file.split(sep='.')[0]
                    saver.restore(session, os.path.join(model_path,ckpt))
                    continue


            np.set_printoptions(precision=4, suppress=False, linewidth=100)
            b = run_test(session=session, model=mtest, model_input=test_input)
            print(b)
            b = b / np.sum(b, axis=1).reshape([-1,1])
            np.set_printoptions(precision=4, suppress=False, linewidth=100)
            print(b)
Ejemplo n.º 2
0
def main():
    proceed = True
    usrdir = input('[FFBP Reader] Provide user directory (if any), or press \'enter\' to use default directory: ')
    usrdir = usrdir.strip()
    while proceed:
        path = input('[FFBP Reader] Enter name of log directory OR corresponding index: ')
        try:
            ID = int(path)
            path = PDPATH('/FFBP{}/logs/FFBPlog_{}/snap.pkl'.format('/' + usrdir if len(usrdir) else '', path))
        except ValueError:
            ID = int(path.split(sep='_')[-1])
            path = PDPATH('/FFBP{}/logs/'.format('/' + usrdir if len(usrdir) else '') + path + '/snap.pkl')
        with open(path, 'rb'):
            reader = NetworkData(path)
        code.interact(local=locals())

        print('[FFBP Reader] Would you like to proceed?')
        prompt = input("[y/n] -> ")
        if prompt == 'n': proceed = False
Ejemplo n.º 3
0
def reader_demo():
    file = PDPATH('/train_data/ptb_word_data/train.txt')
    print('Step 1. Convert raw corpus into a long list:')
    L = _read_words(file)
    print('Length = {}'.format(len(L)))

    print('\nStep 2. Build vocab (assign strings to IDs):')
    V = _build_mini_vocab(file, True)
    for i,w in enumerate(V):
        print(i,w)
        if i > 20: break
Ejemplo n.º 4
0
def main():
    import reader as reader
    from trainer import Configs
    from PDPATH import PDPATH

    ptb_vocab = get_vocab('ptb.voc')
    raw_test_data = reader.make_test(PDPATH('/RNN/test_data/coffee.txt'), ptb_vocab)

    test_input = TestData(config=Configs(),
                          test_data=raw_test_data,
                          vocab=ptb_vocab,
                          name="TestInput")
Ejemplo n.º 5
0
def sandbox():
    def f(filename, sorted_words_only=False):
        # Long list of word sequences separated by <eos>
        data = _read_words(filename)
        # Stores tallies of unique words in data, e.g. {''<unk>': 4794, 'the': 4529, '<eos>': 3761}
        counter = collections.Counter(data)
        return counter
    file = PDPATH('/train_data/ptb_word_data/train.txt')
    d=f(file)

    items = ['the', 'dog', 'dogs', 'boy', 'boys', 'is', 'are', 'has', 'have', 'was', 'were']
    for i in items:
        print(i, d[i])
Ejemplo n.º 6
0
def tag_corpus():
    file = PDPATH('/train_data/ptb_word_data/train.txt')
    s2id = rd._build_big_vocab(file)
    V = Vocab(s2id)
    tags = 'JJ'
    words_by_tags = dict(zip(tags, [[] for i in range(len(tags))]))
    adjectives = []

    for k, (id, f, pos) in s2id.items():
        if pos == tags:
            adjectives.append(k)
            with open('ptb_adjs', mode='+a') as f:
                f.write(',' + k)
    print(len(adjectives))
Ejemplo n.º 7
0
 def __init__(self, path=''):
     self.sess_index = 0
     self.logs_path = PDPATH() + '/logs'
     self.trained_path = PDPATH() + '/trained_models'
     self.may_be_make_dir(self.logs_path)
     self.may_be_make_dir(self.trained_path)
Ejemplo n.º 8
0
def make_vocab():
    file = PDPATH('/train_data/tiny_data/train.txt')
    s2id = _build_mini_vocab(file)
    V = Vocab(s2id)
    pickle.dump(V, open(PDPATH('/vocabs/tiny.voc'), 'wb'))
Ejemplo n.º 9
0
def get_vocab(filename):
    # unpickle Vocab
    f = PDPATH('/vocabs/{}'.format(filename))
    v = pickle.load(open(f, 'rb'))
    return v
Ejemplo n.º 10
0
def main(_):

    config = Configs(
        batch_size=20,
        hidden_size=1500,
        init_scale=0.04,
        keep_prob=.35,
        learning_rate=1.0,
        lr_decay=1 / 1.15,
        max_epoch=14,
        max_grad_norm=10,
        max_max_epoch=55,
        model=FLAGS.arch.lower(
        ),  # Set of available models: 'LSTM', 'RNN', 'SRN'
        num_layers=1,
        num_steps=35,
        vocab_size=10000)
    eval_config = config.clone()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    if FLAGS.train_data: path = PDPATH('/train_data/' + FLAGS.train_data)
    else:
        print(
            'Provide path to training data, e.g: train.py --train_data=\'path\''
        )
        return

    logger = Logger()

    raw_data = reader.raw_data(path)
    train_data, valid_data, test_data, _ = raw_data

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale,
                                                    seed=None)
        with tf.name_scope("Train"):
            train_input = InputData(config=config,
                                    data=train_data,
                                    name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = get_model(config.model,
                              is_training=True,
                              config=config,
                              input_=train_input)
            print(m)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        with tf.name_scope("Valid"):
            valid_input = InputData(config=config,
                                    data=valid_data,
                                    name="ValidInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mvalid = get_model(config.model,
                                   is_training=False,
                                   config=config,
                                   input_=valid_input)
            tf.summary.scalar("Validation Loss", mvalid.cost)

        with tf.name_scope("Test"):
            test_input = InputData(config=eval_config,
                                   data=test_data,
                                   name="TestInput")
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                mtest = get_model(config.model,
                                  is_training=False,
                                  config=eval_config,
                                  input_=test_input)

        logger.make_child_i(logger.logs_path, 'RNNlog')
        saver = tf.train.Saver(var_list=tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='Model'),
                               sharded=False,
                               write_version=tf.train.SaverDef.V2)
        sv = tf.train.Supervisor(logdir=logger.logs_child_path, saver=saver)
        train_log = []
        valid_log = []
        out = []

        # Session runs here
        # Setup session configs
        sess_config = tf.ConfigProto(log_device_placement=False)
        sess_config.gpu_options.allow_growth = True
        # Start session context manager by calling to tf.train.Supervisor's managed_session
        with sv.managed_session(config=sess_config) as session:
            print('Starting on: {} (GMT)'.format(str(
                datetime.datetime.today())))
            print(banner(s='begin'))
            start = time.time()
            if FLAGS.prog:
                printProgress(0,
                              config.max_max_epoch,
                              'Training',
                              'Complete',
                              barLength=60)
            for i in range(config.max_max_epoch):
                fin = i + 1
                valid_perplexity, _ = run_epoch(session, mvalid)
                valid_log.append(valid_perplexity)
                if len(valid_log) >= 2:
                    if valid_log[-1] > valid_log[-2]:
                        elapsed = time.time() - start
                        break

                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 1)
                m.assign_lr(session, config.learning_rate * lr_decay)

                train_perplexity, _ = run_epoch(session, m, eval_op=m.train_op)
                train_log.append(train_perplexity)

                output_density = 10
                output_frequency = config.max_max_epoch // output_density
                if config.max_max_epoch >= output_frequency:
                    if (i % output_frequency
                            == 0) or i == config.max_max_epoch - 1:
                        print_(i, train_perplexity, valid_perplexity)
                else:
                    print_(i, train_perplexity, valid_perplexity)

                if i == config.max_max_epoch - 1:
                    elapsed = time.time() - start

                if FLAGS.prog:
                    printProgress(i + 1,
                                  config.max_max_epoch,
                                  'Training',
                                  'Complete',
                                  barLength=60)

            test_perplexity, outputs = run_epoch(session, mtest)
            print('\nStopped training on epoch {}:'.format(fin))
            print(
                "    Train PPL = {:.4f}\n    Valid PPL = {:.4f}\n    Test PPL  = {:.4f}"
                .format(train_perplexity, valid_perplexity, test_perplexity))
            print('    Stopped {} (GMT)'.format(str(
                datetime.datetime.today())))
            m, s = divmod(elapsed, 60)
            h, m = divmod(m, 60)
            print('    Elapsed time {}:{}:{}'.format(int(h), int(m), int(s)))

            if FLAGS.save_as:
                if FLAGS.name:
                    save_to = logger.make_child(logger.trained_path,
                                                FLAGS.name)
                else:
                    save_to = logger.make_child_i(logger.trained_path, 'model')

                spath = save_to + '/' + FLAGS.save_as
                print("\nSaving model to {}.".format(spath))
                saver.save(session, spath, global_step=sv.global_step)
                save_config(config, filename=spath)
                save_plot('Learning curves from {}'.format(FLAGS.save_as),
                          save_to, train_log, valid_log)