Example #1
0
def main(unused_args):
    ''' Generates data from a trained model (fun!) '''
    
    if not FLAGS.load_model:
        print('--load_model is required')
        return -1
  
    with tf.Graph().as_default(), tf.Session() as session:
        
        ''' load parameters of the model '''    
        with tf.variable_scope("params"):
            num_layers_var = tf.Variable(0, name='num_layers')
            hidden_size_var = tf.Variable(0, name='hidden_size')
            vocab_size_var = tf.Variable(0, name='vocab_size')
            tf.train.Saver([num_layers_var, hidden_size_var, vocab_size_var]).restore(session, FLAGS.load_model)
            vocab_var = tf.Variable([0] * vocab_size_var.eval(), name='vocab')
            tf.train.Saver([vocab_var]).restore(session, FLAGS.load_model)
            
            FLAGS.num_layers = np.asscalar(num_layers_var.eval())
            FLAGS.hidden_size = np.asscalar(hidden_size_var.eval())
            
            vocab = Vocab.from_array(vocab_var.eval())
            
            print('Loaded model from file', FLAGS.load_model)
            print('\tnum_layers:', FLAGS.num_layers)
            print('\thidden_size:', FLAGS.hidden_size)
            print('\tvocab_size', vocab.size)
        
        ''' load inference graph '''
        with tf.variable_scope("model", reuse=None):
            m = graph.inference_graph(vocab.size, FLAGS.num_layers, FLAGS.hidden_size)
          
        tf.train.Saver().restore(session, FLAGS.load_model)
        
        logits = np.ones((vocab.size,))
        state = session.run(m.initial_state)
        for i in range(FLAGS.sample_size):
            logits = logits / FLAGS.temperature
            prob = np.exp(logits)
            prob /= np.sum(prob)
            prob = prob.ravel()
            ix = np.random.choice(range(len(prob)), p=prob)
            
            print(vocab.decode(ix), end='')
        
            logits, state = session.run([m.logits, m.final_state],
                                         {m.input_data: np.array([[ix]]),
                                          m.initial_state: state})
Example #2
0
def main(unused_args):
    ''' Trains model from data '''

    if not FLAGS.input_data:
        raise ValueError("Must set --input_data to the filename of input dataset")

    if not FLAGS.train_dir:
        raise ValueError("Must set --train_dir to the directory where training files will be saved")

    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)

    with tf.Graph().as_default(), tf.Session() as session:

        ''' To make tf.train.Saver write parameters as part of the saved file, add params to the graph as variables (hackish? - MK)'''
        with tf.variable_scope("params", reuse=None):
            num_layers_var = tf.Variable(FLAGS.num_layers, trainable=False, name='num_layers')
            hidden_size_var = tf.Variable(FLAGS.hidden_size, trainable=False, name='hidden_size')
        
            ''' If pre-trained model loaded from file, use loaded vocabulary and NN geometry. Else, compute vocabulary and use command-line params for num_layers and hidden_size ''' 
            if FLAGS.load_model:
                vocab_size_var = tf.Variable(0, trainable=False, name='vocab_size')
                tf.train.Saver([num_layers_var, hidden_size_var, vocab_size_var]).restore(session, FLAGS.load_model)
                vocab_var = tf.Variable([0] * vocab_size_var.eval(), trainable=False, name='vocab')
                tf.train.Saver([vocab_var]).restore(session, FLAGS.load_model)
    
                FLAGS.num_layers = np.asscalar(num_layers_var.eval())  # need np.asscalar to upcast np.int32 to Python int
                FLAGS.hidden_size = np.asscalar(hidden_size_var.eval())
                
                vocab = Vocab.from_array(vocab_var.eval())
                train_data, valid_data, test_data, vocab = reader.read_datasets(FLAGS.input_data, FLAGS.train_fraction, FLAGS.valid_fraction, vocab=vocab)
            else:
                train_data, valid_data, test_data, vocab = reader.read_datasets(FLAGS.input_data, FLAGS.train_fraction, FLAGS.valid_fraction, vocab_size=FLAGS.vocab_size)
                vocab_size_var = tf.Variable(vocab.size, trainable=False, name='vocab_size')
                vocab_var = tf.Variable(vocab.to_array(), trainable=False, name='vocab')

        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale)
        with tf.variable_scope("model", initializer=initializer):
            m = graph.inference_graph(vocab.size, FLAGS.num_layers, FLAGS.hidden_size, FLAGS.batch_size, FLAGS.num_steps, FLAGS.dropout_rate)
            m.update(graph.cost_graph(m.logits, FLAGS.batch_size, FLAGS.num_steps, vocab.size))
            m.update(graph.training_graph(m.cost, FLAGS.grad_clip))

        # create saver before creating more graph nodes, so that we do not save any vars defined below      
        saver = tf.train.Saver(max_to_keep=50)

        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("model", reuse=True):
            mvalid = graph.inference_graph(vocab.size, FLAGS.num_layers, FLAGS.hidden_size, FLAGS.batch_size, FLAGS.num_steps)
            mvalid.update(graph.cost_graph(mvalid.logits, FLAGS.batch_size, FLAGS.num_steps, vocab.size))

        if FLAGS.load_model:
            saver.restore(session, FLAGS.load_model)
            print('Loaded model from', FLAGS.load_model)
        else:
            print('Created model')
        
        print('\tnum_layers:', FLAGS.num_layers)
        print('\thidden_size:', FLAGS.hidden_size)
        print('\tvocab_size:', vocab.size)
        print()
        print('Training parameters')
        print('\tbatch_size:', FLAGS.batch_size)
        print('\tnum_steps:', FLAGS.num_steps)
        print('\tlearning_rate:', FLAGS.learning_rate)
        print('\tbeta1:', FLAGS.beta1)
        print('\tbeta2:', FLAGS.beta2)
        print()
        print('Datasets')
        print('\ttraining dataset size:', len(train_data))
        print('\tvalidation dataset size:', len(valid_data))
        print('\ttest dataset size:', len(test_data))
        print()
        
        ''' create two summaries: training cost and validation cost '''
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=session.graph)
        summary_train = summary_graph('Training cost', ema_decay=0.95)
        summary_valid = summary_graph('Validation cost')
        
        session.run([
            m.lr.initializer,
            m.beta1.initializer,
            m.beta2.initializer,
        ])

        tf.initialize_all_variables().run()
        
        session.run([
            tf.assign(m.lr, FLAGS.learning_rate),
            tf.assign(m.beta1, FLAGS.beta1),
            tf.assign(m.beta2, FLAGS.beta2),
        ])

        state = session.run(m.initial_state)
        iterations = len(train_data) // FLAGS.batch_size // FLAGS.num_steps * FLAGS.max_epochs
        for i, (x, y) in enumerate(reader.next_batch(train_data, FLAGS.batch_size, FLAGS.num_steps)):
            if i >= iterations:
                break
        
            start_time = time.time()
        
            cost, state, _ = session.run([m.cost, m.final_state, m.train_op], {
                    m.input_data: x,
                    m.targets: y,
                    m.initial_state: state
            })
        
            epoch = float(i) / (len(train_data) // FLAGS.batch_size // FLAGS.num_steps)
            time_elapsed = time.time() - start_time
            print('%d/%d (epoch %.3f), train_loss = %6.8f, time/batch = %.4fs' % (i+1, iterations, epoch, cost, time_elapsed))
            
            session.run([summary_train.update], {summary_train.x: cost})
        
            if (i+1) % FLAGS.eval_val_every == 0 or i == iterations-1:
                # evaluate loss on validation data
                cost = run_test(session, mvalid, valid_data, FLAGS.batch_size, FLAGS.num_steps)
                print("validation cost = %6.8f" % cost)
                save_as = '%s/epoch%.2f_%.4f.model' % (FLAGS.train_dir, epoch, cost)
                saver.save(session, save_as)

                ''' write out summary events '''
                buffer, = session.run([summary_train.summary])
                summary_writer.add_summary(buffer, i)
                
                session.run([summary_valid.update], {summary_valid.x: cost})
                buffer, = session.run([summary_valid.summary])
                summary_writer.add_summary(buffer, i)
                
                summary_writer.flush()
        
        if len(test_data) > FLAGS.batch_size * FLAGS.num_steps:
            cost = run_test(session, mvalid, test_data, FLAGS.batch_size, FLAGS.num_steps)
            print("Test cost: %.3f" % test_loss)