Beispiel #1
0
def test(args):
    rnn = args.rnn  # type of RNN
    mode = args.mode  # quantization methods

    weights_fpath = args.weights_fpath  # weights will be stored here
    text_fpath = args.text_fpath  # path to the input file
    grad_clipping = args.grad_clipping
    num_hidden = args.num_hidden
    batch_size = args.batch_size
    #sample_every = args.sample_every  # sample every n batches
    # sequence length during training, number of chars to draw for sampling
    train_seq_length = args.train_seq_length
    load_model = args.load_model

    text_test, vocab_test = utils.utils.parse(text_fpath + 'test.txt')

    #if ((vocab_train == vocab_vali) && (vocab_train == vocab_test)):
    #    print('Vocabulory established')
    # ***ML: need to be modified

    print vocab_test

    # encode each character in the vocabulary as an integer

    encoder = LabelEncoder()
    encoder.fit(list(vocab_test))
    vocab_size = len(vocab_test)

    # ML: build model!
    layers = char_rnn.build_model(
        (None, train_seq_length, vocab_size),  # input_shape
        num_hidden,
        vocab_size,
        grad_clipping,
        rnn,
        mode)

    char_rnn.load_weights(layers['l_out'], weights_fpath + 'weights.pickle')

    print('compiling theano function for testing')
    test_char_rnn = theano_funcs.create_vali_func(
        layers)  # ML:testing flow is as same as validation flow

    try:
        test_losses = []
        seq_iter_test = utils.utils.sequences(text_test, batch_size,
                                              train_seq_length, vocab_size,
                                              encoder)
        print("Start testing flow:")
        for i, (X_test, y_test) in tqdm(enumerate(seq_iter_test), leave=False):
            if X_test is not None and y_test is not None:
                loss = test_char_rnn(X_test, y_test)
                test_losses.append(loss)
                print(' loss = %.6f' % (loss))
        print("Testing flow finished")
        print('Test set average loss = %.6f' % (np.mean(test_losses)))

    except KeyboardInterrupt:
        print('caught ctrl-c, stopping training')
Beispiel #2
0
def generate_samples():
    # parameter
    weights_fpath = 'cv/weights.pickle'  # weights from which to initialize
    text_fpath = 'data/parsed.txt'  # training data text file, to build vocabulary
    rnn = 'GRU'
    mode = 'ternary'

    grad_clipping = 1.
    num_hidden = 128
    train_seq_length, sample_seq_length = 10, 500
    text, vocab = utils.utils.parse(text_fpath)

    # need to build the same encoder as during training, could pickle
    encoder = LabelEncoder()
    encoder.fit(list(vocab))
    vocab_size = len(vocab)

    layers = char_rnn.build_model(
        (None, train_seq_length, vocab_size),  # input_shape
        num_hidden,
        vocab_size,
        grad_clipping,
        rnn,
        mode)

    # load the mdoel
    print('loading model weights from %s' % (weights_fpath))
    char_rnn.load_weights(layers['l_out'], weights_fpath)
    print('loading model done!')

    print('compiling theano function for sampling')
    sample = theano_funcs.create_sample_func(layers)

    try:
        while True:
            # prompt the user for a phrase to initialize the sampling
            phrase = raw_input('start a phrase of at least %d chars:\n' %
                               (train_seq_length))
            if len(phrase) < train_seq_length:
                print('len(phrase) = %d, need len(phrase) >= %d' %
                      (len(phrase), train_seq_length))
                continue
            generated_phrase = utils.utils.sample(sample, phrase,
                                                  train_seq_length,
                                                  sample_seq_length,
                                                  vocab_size, encoder)
            print('%s\n' % (generated_phrase))
    except KeyboardInterrupt:
        print('caught ctrl-c')
    print('done')
Beispiel #3
0
def test_create_train_func():
    import numpy as np
    from char_rnn import build_model
    batch_size, sequence_length, vocab_size = 16, 32, 64

    layers = build_model((None, None, vocab_size), 128, vocab_size, 10.)
    train_func = create_train_func(layers)

    X = np.zeros((batch_size, sequence_length, vocab_size), dtype=np.float32)
    X[:, :, 0] = 1.
    y = np.random.randint(0, vocab_size, batch_size).astype(np.int32)
    print('testing train_func')
    loss = train_func(X, y)
    print('loss = %.6f' % (loss))
    print('done')
Beispiel #4
0
def train(args):
    rnn = args.rnn  # type of RNN
    mode = args.mode

    weights_fpath = args.weights_fpath  # weights will be stored here
    text_fpath = args.text_fpath  # path to the input file
    max_epochs = args.max_epochs
    lr = args.lr
    lr_decay = args.lr_decay
    lr_decay_after = args.lr_decay_after
    grad_clipping = args.grad_clipping  # ML: need to be modified
    num_hidden = args.num_hidden
    batch_size = args.batch_size
    sample_every = args.sample_every  # sample every n batches
    # sequence length during training, number of chars to draw for sampling
    train_seq_length, sample_seq_length = args.train_seq_length, args.sample_seq_length
    load_model = args.load_model

    text, vocab = utils.utils.parse(text_fpath)

    # encode each character in the vocabulary as an integer
    encoder = LabelEncoder()
    encoder.fit(list(vocab))
    vocab_size = len(vocab)

    # ML: build model!
    layers = char_rnn.build_model(
        (None, train_seq_length, vocab_size),  # input_shape
        num_hidden,
        vocab_size,
        grad_clipping,
        rnn,
        mode)

    # optionally load a pre-trained model
    if load_model:
        print('loading model weights from %s' %
              (weights_fpath + 'weights.pickle'))
        char_rnn.load_weights(layers['l_out'],
                              weights_fpath + 'weights.pickle')

    # phrases to use during sampling
    #phrases = ['I should go to bed now']
    phrases = ['First Citizen:']

    print('compiling theano function for training')
    train_char_rnn = theano_funcs.create_train_func(layers, rnn)
    print('theano function for training built')

    print('compiling theano function for sampling')
    sample = theano_funcs.create_sample_func(layers)
    print('theano funciton for sampling built')
    best_loss = 10
    best_epoch = 1

    print('Start Training')
    try:
        for epoch in range(1, 1 + max_epochs):
            print('epoch %d' % (epoch))
            if epoch >= lr_decay_after:
                lr = lr * lr_decay
            # sample from the model and update the weights
            train_losses = []
            seq_iter = utils.utils.sequences(text, batch_size,
                                             train_seq_length, vocab_size,
                                             encoder)
            for i, (X, y) in tqdm(enumerate(seq_iter), leave=False):
                if X is not None and y is not None:
                    loss = train_char_rnn(X, y, lr)
                    train_losses.append(loss)
                    print(' loss = %.6f' % (loss))

                # continuously sample from the model
                if ((i + 1) % sample_every) == 0:
                    print(' loss = %.6f' % (np.mean(train_losses)))
                    phrase = np.random.choice(phrases)
                    generated_phrase = utils.utils.sample(
                        sample, phrase, train_seq_length, sample_seq_length,
                        vocab_size, encoder)
                    print('%s%s' % (phrase, generated_phrase))
            if np.mean(train_losses) < best_loss:
                print('saving weight to %s in npz format' % (weights_fpath))
                np.savez(weights_fpath + 'rnn_paremeter.npz',
                         *lasagne.layers.get_all_param_values(layers['l_out']))
                best_loss = np.mean(train_losses)
                best_epoch = epoch
            print("  LR:                            " + str(lr))
            print("  training loss:                 " +
                  str(np.mean(train_losses)))
            print("  best epoch:                    " + str(best_epoch))
            print("  best training loss:            " + str(best_loss))

    except KeyboardInterrupt:
        print('caught ctrl-c, stopping training')

    # write the weights to disk so we can try out the model as pickle format
    print('saving weights to %s' % (weights_fpath + 'weights.pickle'))
    weights = lasagne.layers.get_all_param_values(layers['l_out'])  # 'l_out!'
    char_rnn.save_weights(weights, weights_fpath + 'weights.pickle')
    print('done')