Ejemplo n.º 1
0
def run_test():
    # test batch generation
    print('download and read data')
    filename = maybe_download('text8.zip', 31344016)
    # Read data
    text = read_data(filename)
    # create datasets
    valid_size = 1000
    valid_text = text[:valid_size]
    train_text = text[valid_size:]
    # train_size = len(train_text)
    # create batch generators
    train_batches = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH, reverse_encoder_input=True)
    valid_batches = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH)

    # print(BatchGenerator.characters(train_batches.next()[0]))
    print('test main batch generator')
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random english generator')
    random_batch = RandomWordsBatchGenerator(2, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                             reverse_encoder_input=False)
    for _ in range(10):
        e_bs, d_bs, dw_bs = random_batch.next()
        print(BatchGenerator.batches2string(e_bs))
        print(BatchGenerator.batches2string(d_bs))
        BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random string gen with padding')
    random_str_batch = ReverseStringBatchGenerator(1, 8, 8,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    random_str_batch = ReverseStringBatchGenerator(2, 8, 16,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
Ejemplo n.º 2
0
def train():
    graph = tf.Graph()
    inputs, labels, sequence_lengths, optimizer, loss, predictions, summary_op, saver = model(
        graph)
    train_writer = tf.train.SummaryWriter('tensorboard/train', graph)
    test_writer = tf.train.SummaryWriter('tensorboard/test', graph)

    with tf.Session(graph=graph) as sess:
        sess.run(tf.initialize_all_variables())
        print("Training...")
        train_batches = BatchGenerator(settings.BATCH_SIZE)
        test_batches = BatchGenerator(3000, settings.SEQUENCE_LIMIT)
        test_inputs, test_labels, test_sequence_lengths = test_batches.next()

        for step in xrange(settings.EPOCH):
            if step % 1000 != 1:
                train_inputs, train_labels, train_sequence_lengths = train_batches.next(
                )
                feed_dict = {
                    inputs: train_inputs,
                    labels: train_labels,
                    sequence_lengths: train_sequence_lengths
                }
                _, train_loss, train_predictions, summary = sess.run(
                    [optimizer, loss, predictions, summary_op],
                    feed_dict=feed_dict)
                train_writer.add_summary(summary, step)
                train_writer.flush()
            else:
                test_feed_dict = {
                    inputs: test_inputs,
                    labels: test_labels,
                    sequence_lengths: test_sequence_lengths
                }
                test_loss, summary = sess.run([loss, summary_op],
                                              feed_dict=test_feed_dict)
                test_writer.add_summary(summary, step)
                test_writer.flush()

            if step % 1000 == 0:
                print('-----------Step %d:-------------' % step)
                print('Training set:')
                print('  Loss       : ', train_loss)
                print('  Input      : ', train_inputs[0])
                print('  Label      : ', utils.onehot2sticker(train_labels))
                print('  Prediction : ',
                      utils.onehot2sticker(train_predictions))

            if step % 10000 == 0:
                # Save the variables to disk.
                save_path = saver.save(sess,
                                       "checkpoints/" + settings.CKPT_NAME)
                print("Model saved in file: %s" % save_path)
Ejemplo n.º 3
0
def train(num_layers, units_per_layer):
    print('download and read data')
    filename = maybe_download('text8.zip', 31344016)

    with tf.Session(graph=tf.Graph()) as validation_session:
        validation_model = ReverseSeqValidationSummaryModel(validation_session.graph)
        validation_session.run(tf.initialize_all_variables())

        with tf.Session(graph=tf.Graph()) as sess:
            # Create model.
            print("Creating %d layers of %d units." % (num_layers, units_per_layer))
            model = create_model(sess, num_layers, units_per_layer, False, DECODER_FEED_PREVIOUS)

            # Read data
            text = read_data(filename)
            # create datasets
            valid_size = 10000
            valid_text = text[:valid_size]
            train_text = text[valid_size:]
            # create batch generators
            validation_batch = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                              reverse_encoder_input=REVERSE_ENCODER_INPUT)
            if TRAIN_BATCH_TYPE == UseTrainBatchType.use_english_words:
                train_batch = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                             reverse_encoder_input=REVERSE_ENCODER_INPUT)
            elif TRAIN_BATCH_TYPE == UseTrainBatchType.use_random_train_words:
                train_batch = RandomWordsBatchGenerator(BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                        reverse_encoder_input=REVERSE_ENCODER_INPUT)
            else:
                train_batch = ReverseStringBatchGenerator(BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                          reverse_encoder_input=REVERSE_ENCODER_INPUT)
                validation_batch = ReverseStringBatchGenerator(1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                               reverse_encoder_input=REVERSE_ENCODER_INPUT)


            # This is the training loop.
            step_time, loss = 0.0, 0.0
            current_step = model.global_step.eval() + 1
            print('starting from step %i' % current_step)
            previous_losses = []
            enc_state = model.initial_enc_state.eval()
            run_data_dir = run_data_directory(num_layers, units_per_layer)
            while True:
                # Get a batch and make a step.
                start_time = time.time()
                encoder_inputs, decoder_inputs, decoder_weights = train_batch.next()
                _, step_loss, enc_state = model.step(sess, current_step, encoder_inputs, decoder_inputs, decoder_weights,
                                                     enc_state, DROPOUT_PROB, False)
                step_time += (time.time() - start_time) / STEPS_PER_CHECKPOINT
                loss += step_loss / STEPS_PER_CHECKPOINT
                current_step += 1
                # Once in a while, we save checkpoint, print statistics, and run evals.
                if current_step % STEPS_PER_CHECKPOINT == 0:
                    # Print statistics for the previous epoch.
                    perplexity = math.exp(loss) if loss < 300 else float('inf')
                    print("global step %d learning rate %.4f step-time %.2f loss %.3f perplexity "
                          "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                                    step_time, loss, perplexity))
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_losses.append(loss)
                    # Save checkpoint and zero timer and loss.
                    checkpoint_path = os.path.join(run_data_dir, 'state')
                    model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                    step_time, loss = 0.0, 0.0
                    # Run evals on validation set and print their perplexity.
                    val_perp = validate_sentence(sess, model, validation_batch, enc_state, current_step)
                    summary_str = validation_model.merged_validation.eval(
                        {validation_model.validation_perp: val_perp if val_perp < 500 else 500 },
                        validation_session)
                    model.summ_writer.add_summary(summary_str, current_step)
                    sys.stdout.flush()
Ejemplo n.º 4
0
best_val_loss = np.inf
best_val_acc = 0.
best_val_auc = 0.
best_epoch = 0
best_acc_epoch = 0
wait = 0
total_time = 0.

print('Training...')

for epoch in range(NB_EPOCH):

    t = time.time()

    adj_train, train_labels, train_u_indices, \
        train_v_indices, val_labels, val_u_indices, val_v_indices = batch_gen.next()

    # global normalization
    support = []
    support_t = []
    adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32)

    for i in range(NUMCLASSES):
        # build individual binary rating matrices (supports) for each rating
        support_unnormalized = sp.csr_matrix(adj_train_int == i + 1,
                                             dtype=np.float32)

        if support_unnormalized.nnz == 0 and DATASET != 'yahoo_music':
            # yahoo music has dataset split with not all ratings types present in training set.
            # this produces empty adjacency matrices for these ratings.
            sys.exit(