Ejemplo n.º 1
0
def run_epoch(sess, model, data):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()

    # accumulated counts
    costs = 0.0
    iters = 0

    # initial RNN state
    state = model.initial_state.eval()

    for step, (x, y) in enumerate(ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed_dict={
            model.input_data: x,
            model.targets: y,
            model.initial_state: state
        })
        costs += cost
        iters += model.num_steps

        perplexity = np.exp(costs / iters)

        if step % 100 == 0:
            break

    return (costs / iters), perplexity
Ejemplo n.º 2
0
def test(test_data, verbose=0):

    model = load_model('weights/my_model.h5')

    #     model = get_model()
    #     model.load_weights('my_model_weights.h5')

    acc = 0.0
    siz = 0
    perplexity = []
    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(test_data, dataset_size, num_steps)):

        x1, y1 = one_hot(x, y[:, -1])

        output = model.predict(x1, verbose=verbose)
        score, accuracy = model.evaluate(x1, y1, verbose=1, batch_size=10)

        perplexity.append(np.power(accuracy, 2))

        siz += 1

        print('')
        print('Step: ', step + 1, end='')
        print(', Test accuracy:', accuracy)

        acc += accuracy

    print('Average Accuracy: ', acc / siz)

    return np.mean(perp_np)
Ejemplo n.º 3
0
def run_epoch(sess, model, data, verbose=False):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()

    # accumulated counts
    costs = 0.0
    iters = 0

    # initial RNN state
    state = model.initial_state.eval()

    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state, _ = sess.run(
            [model.cost, model.final_state, model.train_op],
            feed_dict={
                model.input_data: x,
                model.targets: y,
                model.initial_state: state
            })
        costs += cost
        iters += model.num_steps

        perplexity = np.exp(costs / iters)

        if verbose and step % 10 == 0:
            progress = (step / epoch_size) * 100
            wps = iters * model.batch_size / (time.time() - start_time)
            print("%.1f%% Perplexity: %.3f (Cost: %.3f) Speed: %.0f wps" %
                  (progress, perplexity, cost, wps))

    return (costs / iters), perplexity
Ejemplo n.º 4
0
def run_epoch(sess, model, data, verbose=False):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()

    # accumulated counts
    costs = 0.0
    iters = 0

    # initial RNN state
    state = model.initial_state.eval()

    for step, (x, y) in enumerate(ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed_dict={
            model.input_data: x,
            model.targets: y,
            model.initial_state: state
        })
        costs += cost
        iters += model.num_steps

        perplexity = np.exp(costs / iters)

        if verbose and step % 10 == 0:
            progress = (step / epoch_size) * 100
            wps = iters * model.batch_size / (time.time() - start_time)
            print("%.1f%% Perplexity: %.3f (Cost: %.3f) Speed: %.0f wps" % (progress, perplexity, cost, wps))

    return (costs / iters), perplexity
Ejemplo n.º 5
0
def train(train_data, verbose=0, model=None):
    if model is None:
        model = get_model()

    if (verbose > 0):
        print('Train...')

    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(train_data, dataset_size, num_steps)):
        #         x1 = np.zeros((dataset_size, num_steps, feat_len))
        #         for i in range(x1.shape[0]):
        #             for j in range(x1.shape[1]):
        #                 x1[i,j,x[i,j]] = 1

        #         y1 = y[:,-1]

        #         y1 = np.zeros((dataset_size, feat_len))
        #         for i in range(y1.shape[0]):
        #             y1[i,y[i,-1]] = 1

        x1, y1 = one_hot(x, y[:, -1])

        model.fit(x1, y1, epochs=10, verbose=verbose, batch_size=10)

        if (step % 100 == 0 and verbose > 0):
            print(step + 1, end=' ')
        break

    if (not os.path.isdir('weights')):
        os.mkdir('weights')

    model.save('weights/my_model.h5')
Ejemplo n.º 6
0
def run_epoch(session, model, data, eval_op=None, verbose=False):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.seq_length
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = session.run(model.initial_state)

    for step, (x, y) in enumerate(
            reader.ptb_iterator(data, model.batch_size, model.seq_length)):
        fetches = [model.cost, model.final_state, eval_op]
        feed_dict = {}
        feed_dict[model.input_data] = x
        feed_dict[model.targets] = y

        for i, (z, z_mean, z_log_sigma_sq) in enumerate(model.initial_state):
            feed_dict[z] = state[i].z
            feed_dict[z_mean] = state[i].z_mean
            feed_dict[z_log_sigma_sq] = state[i].z_log_sigma_sq

        cost, state, _ = session.run(fetches, feed_dict)
        costs += cost
        iters += model.seq_length

        if verbose and step % (epoch_size // 10) == 10:
            print('Progress: %.3f; Perplexity: %.3f; Speed: %.0f wps' %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * model.batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)
Ejemplo n.º 7
0
def run_epoch(sess, model, data):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    saver = tf.train.Saver()
    #初始化模型参数
    state = sess.run(model.initial_state)
    total_cost = 0
    iterations = 0
    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state, _ = sess.run(
            [model.cost, model.final_state, model.train_op],
            feed_dict={
                model.input_data: x,
                model.targets: y,
                model.initial_state: state
            })
        total_cost += cost
        iterations += model.num_steps
        perplexity = np.exp(total_cost / iterations)
        if step % 100 == 0:
            progress = (step * 1.0 / epoch_size) * 100
            print("%.1f%% Perplexity: %.3f (Cost: %.3f) " %
                  (progress, perplexity, cost))
    save_path = saver.save(sess, "./saved_model_rnn/lstm-model.ckpt")
    return (total_cost / iterations), perplexity
def train(data):

	# saver = tf.train.import_meta_graph('weights/ptb_lstm_model.meta')
	# saver.restore(sess, 'weights/ptb_lstm_model')

	epoch_size = ((len(data) // batch_size) - 1) // lstm_steps 

	loss1 = 0.0
	iters = 0
	test_perplexity_prev = 10000000.0
	tf.global_variables_initializer().run()
	for ep in range(epochs):
		for step,(x,y) in enumerate(ptb_reader.ptb_iterator(data,batch_size,lstm_steps)):
			loss_temp,_ = sess.run([loss,train_step],feed_dict={inputs:x,targets:y})
			loss1 += loss_temp
			iters += lstm_steps
			perplexity = np.exp(loss1/iters)

			# if step%10==0:
			progress = (step/float(epoch_size))*100.0
			print("%d %.1f%% Perplexity: %.3f (Loss: %.3f)" % (ep,progress, perplexity,loss1/iters))
		# saver.save(sess,'weights/ptb_lstm_model')
		# print 'Trained model saved'
		# test_perplexity = test(test_data)
		# print("Test Perplexity: %.3f" % test_perplexity)
		loss_test = 0.0
		iters_test = 0
		for step,(x,y) in enumerate(ptb_reader.ptb_iterator(test_data,batch_size,lstm_steps)):
			print step
			loss_temp = sess.run(loss,feed_dict={inputs:x,targets:y})
			loss_test += loss_temp
			iters_test += lstm_steps
		test_perplexity = np.exp(loss_test/iters_test)
		if test_perplexity>test_perplexity_prev:
			break
		print("Test Perplexity: %.3f" % test_perplexity)
		test_perplexity_prev = test_perplexity

	saver.save(sess,'weights/ptb_lstm_model')
	print 'Trained model saved'
	return perplexity,loss1/iters
Ejemplo n.º 9
0
def run_epoch(session, m, data, eval_op, verbose=False):
    """Runs the model on the given data"""
    epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
    ##the epoch_size here equals to the number of iteration!
    start_time = time.time()
    costs = 0.0
    iters = 0
    #print("state one:")
    #print(m.initial_state[0])
    #print("state two")
    #state = m.initial_state[1]
    #print(state)
    #print("the whole state")
    state = m._initial_state.eval()
    #print(tf.shape(m._initial_state))
    #print(m.initial_state)

    #print("2222222222222222")
    # print(state[0])
    # print(data)
    # print(len(data))
    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, m.batch_size, m.num_steps)):
        #print("y!!!!!!!!!!!!!!!!!!!!!!!")
        #print(step)
        #print(x)
        #print(y)
        cost, state, inputs, output, outputs, __ = session.run(
            [m.cost, m.final_state, m.inputs, m.output, m.outputs, eval_op], {
                m.input_data: x,
                m.targets: y,
                m._initial_state: state
            })
        #print("13333232323!!!")
        #print(tf.shape(y).dims)
        #print(tf.shape(output))
        costs += cost
        iters += m.num_steps
        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * m.batch_size / (time.time() - start_time)))

    tvars = tf.trainable_variables()
    print("printing all traiinable vairable for time steps", m.num_steps)
    for tvar in tvars:
        print(tvar.name, tvar.initialized_value())
    return np.exp(costs / iters)
Ejemplo n.º 10
0
def run_epoch(session, model, data, train_op, output_log):
    total_costs = 0.0
    iters = 0
    state = session.run(model.initial_state)
    for step, (x, y) in enumerate(ptb_reader.ptb_iterator(data, model.batch_size,
                                                          model.num_steps)):
        cost, state, _ = session.run([model.cost, model.final_state, train_op],
                                     {model.input_data: x, model.targets: y,
                                      model.initial_state: state})
        total_costs += cost
        iters += model.num_steps

        if output_log and step % 100 == 0:
            print('After %d steps,perplexity is %.3f' %
                  (step, np.exp(total_costs / iters)))

    return np.exp(total_costs / iters)
def test(data):

	saver = tf.train.import_meta_graph('weights/ptb_lstm_model.meta')
	saver.restore(sess, 'weights/ptb_lstm_model')
	epoch_size = ((len(data) // batch_size) - 1) // lstm_steps

	loss1 = 0.0
	iters = 0
	# tf.global_variables_initializer().run()
	for step,(x,y) in enumerate(ptb_reader.ptb_iterator(data,batch_size,lstm_steps)):
		# print step
		loss_temp = sess.run(loss,feed_dict={inputs:x,targets:y})
		loss1 += loss_temp
		iters += lstm_steps
		perplexity = np.exp(loss1/iters)

	return perplexity
Ejemplo n.º 12
0
def test_epoch(sess, model, data):
    saver = tf.train.Saver()
    saver.restore(sess, "./saved_model_rnn/lstm-model.ckpt")
    state = sess.run(model.initial_state)
    total_cost = 0
    iterations = 0
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state = sess.run([model.cost, model.final_state],
                               feed_dict={
                                   model.input_data: x,
                                   model.targets: y,
                                   model.initial_state: state
                               })
        total_cost += cost
        iterations += model.num_steps
        perplexity = np.exp(total_cost / iterations)
    return (total_cost / iterations), perplexity
Ejemplo n.º 13
0
def run_epoch(session, m, data, eval_op, verbose=False, vocabulary=None):
    """
    :param session for computation
    :param m model object
    :param data input data
    :param eval_op
    :param verbose
    :param vocabulary
    Runs the model on the given data."""
    epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = m.initial_state.eval()
    for step, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, m.batch_size, m.num_steps)):
        cost, state, probs, logits, _ = session.run(
            [m.cost, m.final_state, m.probabilities, m.logits, eval_op], {
                m.input_data: x,
                m.targets: y,
                m.initial_state: state
            })
        costs += cost
        iters += m.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * m.batch_size / (time.time() - start_time)))
            chosen_word = np.argmax(probs, 1)
            print("Probabilities shape: %s, Logits shape: %s" %
                  (probs.shape, logits.shape))
            print(chosen_word)
            if vocabulary is not None:
                next_word_id = chosen_word[-1]
                for word_, word_id_ in vocabulary.iteritems():
                    if word_id_ == next_word_id:
                        print(word_)

            print("Batch size: %s, Num steps: %s" %
                  (m.batch_size, m.num_steps))

    return np.exp(costs / iters)
Ejemplo n.º 14
0
def run_epoch(session, m, data, eval_op, verbose=False):
    """Runs the model on the given data."""
    epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = m.initial_state.eval()
    for step, (x, y) in enumerate(
            reader.ptb_iterator(data, m.batch_size, m.num_steps)):
        cost, state, _ = session.run([m.cost, m.final_state, eval_op], {
            m.input_data: x,
            m.targets: y,
            m.initial_state: state
        })
        costs += cost
        iters += m.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * m.batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)
Ejemplo n.º 15
0
def runepoch(sess, data, modeldict, fetches, epoch_no, verbose):
    lr_decay = decay**max(epoch_no - 4, 0.0)
    sess.run(tf.assign(modeldict['lr'], learning_rate * lr_decay))
    state = sess.run(modeldict['initial_state'])
    losses = 0.0
    itercnt = 0

    if verbose: print('Running New Epoch')
    for curr, (x, y) in enumerate(
            ptb_reader.ptb_iterator(data, flags.batchsize, flags.numsteps)):
        feed_dict = {
            modeldict['X']: x,
            modeldict['Y']: y,
            modeldict['initial_state']: state
        }
        vals = sess.run(fetches, feed_dict)
        losses += vals['loss'] * flags.numsteps
        state = vals['final_state']
        itercnt += flags.numsteps
        if curr % 100 == 0 and verbose:
            print('Curr: ', curr, ' | Perplexity: ', np.exp(losses / itercnt))

    if verbose: print('Epoch Complete')
    return np.exp(losses / itercnt)
Ejemplo n.º 16
0
import ptb_reader as pr

source = "C:\\ptb\\ptb\\data"
train_data, valid_data, test_data, word_to_id, id_to_word = pr.ptb_raw_data(
    source)

for step, (x, y) in enumerate(pr.ptb_iterator(train_data, 40, 20)):
    print("y!!!!!!!!!!!!!!!!!!!!!!!")
    print(step)
    print(x)
    print(y)
Ejemplo n.º 17
0
def _main(_):
    # Data
    batch_size = config.batch_size
    memory_size = config.memory_size
    terminating_learning_rate = config.terminating_learning_rate
    data = prepare_data(FLAGS.data_path)
    vocab_size = data["vocab_size"]
    print('vocab_size = {}'.format(vocab_size))

    inputs = tf.placeholder(tf.int32, [None, memory_size], name="inputs")
    targets = tf.placeholder(tf.int32, [None], name="targets")

    # Model architecture
    initializer = tf.random_normal_initializer(stddev=config.initialize_stddev)
    with tf.variable_scope("model", initializer=initializer):
        memnet = tx.modules.MemNetRNNLike(raw_memory_dim=vocab_size,
                                          hparams=config.memnet)
        queries = tf.fill([tf.shape(inputs)[0], config.dim],
                          config.query_constant)
        logits = memnet(inputs, queries)

    # Losses & train ops
    mle_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets,
                                                              logits=logits)
    mle_loss = tf.reduce_sum(mle_loss)

    # Use global_step to pass epoch, for lr decay
    lr = config.opt["optimizer"]["kwargs"]["learning_rate"]
    learning_rate = tf.placeholder(tf.float32, [], name="learning_rate")
    global_step = tf.Variable(0, dtype=tf.int32, name="global_step")
    increment_global_step = tf.assign_add(global_step, 1)
    train_op = tx.core.get_train_op(mle_loss,
                                    learning_rate=learning_rate,
                                    global_step=global_step,
                                    increment_global_step=False,
                                    hparams=config.opt)

    def _run_epoch(sess, data_iter, epoch, is_train=False):
        loss = 0.
        iters = 0

        fetches = {"mle_loss": mle_loss}
        if is_train:
            fetches["train_op"] = train_op

        mode = (tf.estimator.ModeKeys.TRAIN
                if is_train else tf.estimator.ModeKeys.EVAL)

        for _, (x, y) in enumerate(data_iter):
            batch_size = x.shape[0]
            feed_dict = {
                inputs: x,
                targets: y,
                learning_rate: lr,
                tx.global_mode(): mode,
            }

            rets = sess.run(fetches, feed_dict)
            loss += rets["mle_loss"]
            iters += batch_size

        ppl = np.exp(loss / iters)
        return ppl

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        sess.run(tf.tables_initializer())

        try:
            saver.restore(sess, "ckpt/model.ckpt")
            print('restored checkpoint.')
        except:
            print('restore checkpoint failed.')

        last_valid_ppl = None
        heuristic_lr_decay = (hasattr(config, 'heuristic_lr_decay')
                              and config.heuristic_lr_decay)
        while True:
            if lr < terminating_learning_rate:
                break

            epoch = sess.run(global_step)
            if epoch >= config.num_epochs:
                print('Too many epochs!')
                break

            print('epoch: {} learning_rate: {:.6f}'.format(epoch, lr))

            # Train
            train_data_iter = ptb_iterator(data["train_text_id"], batch_size,
                                           memory_size)
            train_ppl = _run_epoch(sess, train_data_iter, epoch, is_train=True)
            print("Train Perplexity: {:.3f}".format(train_ppl))
            sess.run(increment_global_step)

            # checkpoint
            if epoch % 5 == 0:
                try:
                    saver.save(sess, "ckpt/model.ckpt")
                    print("saved checkpoint.")
                except:
                    print("save checkpoint failed.")

            # Valid
            valid_data_iter = ptb_iterator(data["valid_text_id"], batch_size,
                                           memory_size)
            valid_ppl = _run_epoch(sess, valid_data_iter, epoch)
            print("Valid Perplexity: {:.3f}".format(valid_ppl))

            # Learning rate decay
            if last_valid_ppl:
                if heuristic_lr_decay:
                    if valid_ppl > last_valid_ppl * config.heuristic_threshold:
                        lr /= 1. + (valid_ppl / last_valid_ppl \
                                    - config.heuristic_threshold) \
                                   * config.heuristic_rate
                    last_valid_ppl = last_valid_ppl \
                                     * (1 - config.heuristic_smooth_rate) \
                                     + valid_ppl * config.heuristic_smooth_rate
                else:
                    if valid_ppl > last_valid_ppl:
                        lr /= config.learning_rate_anneal_factor
                    last_valid_ppl = valid_ppl
            else:
                last_valid_ppl = valid_ppl
            print("last_valid_ppl: {:.6f}".format(last_valid_ppl))

        epoch = sess.run(global_step)
        print('Terminate after epoch ', epoch)

        # Test
        test_data_iter = ptb_iterator(data["test_text_id"], 1, memory_size)
        test_ppl = _run_epoch(sess, test_data_iter, 0)
        print("Test Perplexity: {:.3f}".format(test_ppl))
def _main(_):
    # Data
    batch_size = config.batch_size
    num_steps = config.num_steps
    data = prepare_data(FLAGS.data_path)
    vocab_size = data["vocab_size"]

    inputs = tf.placeholder(tf.int32, [batch_size, num_steps])
    targets = tf.placeholder(tf.int32, [batch_size, num_steps])

    # Model architecture
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", initializer=initializer):
        embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,
                                           hparams=config.emb)
        emb_inputs = embedder(inputs)
        if config.keep_prob < 1:
            emb_inputs = tf.nn.dropout(
                emb_inputs, tx.utils.switch_dropout(config.keep_prob))

        decoder = tx.modules.BasicRNNDecoder(vocab_size=vocab_size,
                                             hparams={"rnn_cell": config.cell})
        initial_state = decoder.zero_state(batch_size, tf.float32)
        outputs, final_state, seq_lengths = decoder(
            decoding_strategy="train_greedy",
            impute_finished=True,
            inputs=emb_inputs,
            sequence_length=[num_steps] * batch_size,
            initial_state=initial_state)

    # Losses & train ops
    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
        labels=targets, logits=outputs.logits, sequence_length=seq_lengths)

    # Use global_step to pass epoch, for lr decay
    global_step = tf.placeholder(tf.int32)
    train_op = tx.core.get_train_op(mle_loss,
                                    global_step=global_step,
                                    increment_global_step=False,
                                    hparams=config.opt)

    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
        start_time = time.time()
        loss = 0.
        iters = 0
        state = sess.run(initial_state)

        fetches = {
            "mle_loss": mle_loss,
            "final_state": final_state,
        }
        if is_train:
            fetches["train_op"] = train_op
            epoch_size = (len(data["train_text_id"]) // batch_size - 1)\
                // num_steps

        mode = (tf.estimator.ModeKeys.TRAIN
                if is_train else tf.estimator.ModeKeys.EVAL)

        for step, (x, y) in enumerate(data_iter):
            feed_dict = {
                inputs: x,
                targets: y,
                global_step: epoch,
                tx.global_mode(): mode,
            }
            for i, (c, h) in enumerate(initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            rets = sess.run(fetches, feed_dict)
            loss += rets["mle_loss"]
            state = rets["final_state"]
            iters += num_steps

            ppl = np.exp(loss / iters)
            if verbose and is_train and step % (epoch_size // 10) == 10:
                print("%.3f perplexity: %.3f speed: %.0f wps" %
                      ((step + 1) * 1.0 / epoch_size, ppl, iters * batch_size /
                       (time.time() - start_time)))

        ppl = np.exp(loss / iters)
        return ppl

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        sess.run(tf.tables_initializer())

        for epoch in range(config.num_epochs):
            # Train
            train_data_iter = ptb_iterator(data["train_text_id"],
                                           config.batch_size, num_steps)
            train_ppl = _run_epoch(sess,
                                   train_data_iter,
                                   epoch,
                                   is_train=True,
                                   verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" % (epoch, train_ppl))
            # Valid
            valid_data_iter = ptb_iterator(data["valid_text_id"],
                                           config.batch_size, num_steps)
            valid_ppl = _run_epoch(sess, valid_data_iter, epoch)
            print("Epoch: %d Valid Perplexity: %.3f" % (epoch, valid_ppl))
        # Test
        test_data_iter = ptb_iterator(data["test_text_id"], batch_size,
                                      num_steps)
        test_ppl = _run_epoch(sess, test_data_iter, 0)
        print("Test Perplexity: %.3f" % (test_ppl))
Ejemplo n.º 19
0
def _main(_):
    # Data
    tf.logging.set_verbosity(tf.logging.INFO)

    # 1. initialize the horovod
    hvd.init()

    batch_size = config.batch_size
    num_steps = config.num_steps
    data = prepare_data(FLAGS.data_path)
    vocab_size = data["vocab_size"]

    inputs = tf.placeholder(tf.int32, [None, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [None, num_steps], name='targets')

    # Model architecture
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.variable_scope("model", initializer=initializer):
        embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,
                                           hparams=config.emb)
        emb_inputs = embedder(inputs)
        if config.keep_prob < 1:
            emb_inputs = tf.nn.dropout(
                emb_inputs, tx.utils.switch_dropout(config.keep_prob))

        decoder = tx.modules.BasicRNNDecoder(vocab_size=vocab_size,
                                             hparams={"rnn_cell": config.cell})

        # This _batch_size equals to batch_size // hvd.size() in
        # distributed training.
        # because the mini-batch is distributed to multiple GPUs

        _batch_size = tf.shape(inputs)[0]
        initial_state = decoder.zero_state(_batch_size, tf.float32)
        seq_length = tf.broadcast_to([num_steps], (_batch_size, ))
        outputs, final_state, seq_lengths = decoder(
            decoding_strategy="train_greedy",
            impute_finished=True,
            inputs=emb_inputs,
            sequence_length=seq_length,
            initial_state=initial_state)
    # Losses & train ops
    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
        labels=targets, logits=outputs.logits, sequence_length=seq_lengths)

    # Use global_step to pass epoch, for lr decay
    global_step = tf.placeholder(tf.int32)

    opt = tx.core.get_optimizer(global_step=global_step, hparams=config.opt)

    # 2. wrap the optimizer
    opt = hvd.DistributedOptimizer(opt)

    train_op = tx.core.get_train_op(loss=mle_loss,
                                    optimizer=opt,
                                    global_step=global_step,
                                    learning_rate=None,
                                    increment_global_step=False,
                                    hparams=config.opt)

    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
        start_time = time.time()
        loss = 0.
        iters = 0

        fetches = {
            "mle_loss": mle_loss,
            "final_state": final_state,
        }
        if is_train:
            fetches["train_op"] = train_op
            epoch_size = (len(data["train_text_id"]) // batch_size - 1)\
                // num_steps

        mode = (tf.estimator.ModeKeys.TRAIN
                if is_train else tf.estimator.ModeKeys.EVAL)

        for step, (x, y) in enumerate(data_iter):
            if step == 0:
                state = sess.run(initial_state, feed_dict={inputs: x})

            feed_dict = {
                inputs: x,
                targets: y,
                global_step: epoch,
                tx.global_mode(): mode,
            }
            for i, (c, h) in enumerate(initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            rets = sess.run(fetches, feed_dict)
            loss += rets["mle_loss"]
            state = rets["final_state"]
            iters += num_steps

            ppl = np.exp(loss / iters)
            if verbose and is_train and hvd.rank() == 0 \
                and (step + 1) % (epoch_size // 10) == 0:
                tf.logging.info(
                    "%.3f perplexity: %.3f speed: %.0f wps" %
                    ((step + 1) * 1.0 / epoch_size, ppl, iters * batch_size /
                     (time.time() - start_time)))
        _elapsed_time = time.time() - start_time
        tf.logging.info("epoch time elapsed: %f" % (_elapsed_time))
        ppl = np.exp(loss / iters)
        return ppl, _elapsed_time

    # 3. set broadcase global variables from rank-0 process
    bcast = hvd.broadcast_global_variables(0)

    # 4. set visible GPU
    session_config = tf.ConfigProto()
    session_config.gpu_options.visible_device_list = str(hvd.local_rank())

    with tf.Session(config=session_config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        sess.run(tf.tables_initializer())

        # 5. run the broadcast_global_variables node before training
        bcast.run()

        _times = []
        for epoch in range(config.num_epochs):
            # Train
            train_data_iter = ptb_iterator(data["train_text_id"],
                                           config.batch_size,
                                           num_steps,
                                           is_train=True)
            train_ppl, train_time = _run_epoch(sess,
                                               train_data_iter,
                                               epoch,
                                               is_train=True,
                                               verbose=True)
            _times.append(train_time)
            tf.logging.info("Epoch: %d Train Perplexity: %.3f" %
                            (epoch, train_ppl))
            # Valid in the main process
            if hvd.rank() == 0:
                valid_data_iter = ptb_iterator(data["valid_text_id"],
                                               config.batch_size, num_steps)
                valid_ppl, _ = _run_epoch(sess, valid_data_iter, epoch)
                tf.logging.info("Epoch: %d Valid Perplexity: %.3f" %
                                (epoch, valid_ppl))

        tf.logging.info('train times: %s' % (_times))
        tf.logging.info('average train time/epoch %f' %
                        np.mean(np.array(_times)))
        # Test in the main process
        if hvd.rank() == 0:
            test_data_iter = ptb_iterator(data["test_text_id"], batch_size,
                                          num_steps)
            test_ppl, _ = _run_epoch(sess, test_data_iter, 0)
            tf.logging.info("Test Perplexity: %.3f" % (test_ppl))