def test(args): rnn = args.rnn # type of RNN mode = args.mode # quantization methods weights_fpath = args.weights_fpath # weights will be stored here text_fpath = args.text_fpath # path to the input file grad_clipping = args.grad_clipping num_hidden = args.num_hidden batch_size = args.batch_size #sample_every = args.sample_every # sample every n batches # sequence length during training, number of chars to draw for sampling train_seq_length = args.train_seq_length load_model = args.load_model text_test, vocab_test = utils.utils.parse(text_fpath + 'test.txt') #if ((vocab_train == vocab_vali) && (vocab_train == vocab_test)): # print('Vocabulory established') # ***ML: need to be modified print vocab_test # encode each character in the vocabulary as an integer encoder = LabelEncoder() encoder.fit(list(vocab_test)) vocab_size = len(vocab_test) # ML: build model! layers = char_rnn.build_model( (None, train_seq_length, vocab_size), # input_shape num_hidden, vocab_size, grad_clipping, rnn, mode) char_rnn.load_weights(layers['l_out'], weights_fpath + 'weights.pickle') print('compiling theano function for testing') test_char_rnn = theano_funcs.create_vali_func( layers) # ML:testing flow is as same as validation flow try: test_losses = [] seq_iter_test = utils.utils.sequences(text_test, batch_size, train_seq_length, vocab_size, encoder) print("Start testing flow:") for i, (X_test, y_test) in tqdm(enumerate(seq_iter_test), leave=False): if X_test is not None and y_test is not None: loss = test_char_rnn(X_test, y_test) test_losses.append(loss) print(' loss = %.6f' % (loss)) print("Testing flow finished") print('Test set average loss = %.6f' % (np.mean(test_losses))) except KeyboardInterrupt: print('caught ctrl-c, stopping training')
def generate_samples(): # parameter weights_fpath = 'cv/weights.pickle' # weights from which to initialize text_fpath = 'data/parsed.txt' # training data text file, to build vocabulary rnn = 'GRU' mode = 'ternary' grad_clipping = 1. num_hidden = 128 train_seq_length, sample_seq_length = 10, 500 text, vocab = utils.utils.parse(text_fpath) # need to build the same encoder as during training, could pickle encoder = LabelEncoder() encoder.fit(list(vocab)) vocab_size = len(vocab) layers = char_rnn.build_model( (None, train_seq_length, vocab_size), # input_shape num_hidden, vocab_size, grad_clipping, rnn, mode) # load the mdoel print('loading model weights from %s' % (weights_fpath)) char_rnn.load_weights(layers['l_out'], weights_fpath) print('loading model done!') print('compiling theano function for sampling') sample = theano_funcs.create_sample_func(layers) try: while True: # prompt the user for a phrase to initialize the sampling phrase = raw_input('start a phrase of at least %d chars:\n' % (train_seq_length)) if len(phrase) < train_seq_length: print('len(phrase) = %d, need len(phrase) >= %d' % (len(phrase), train_seq_length)) continue generated_phrase = utils.utils.sample(sample, phrase, train_seq_length, sample_seq_length, vocab_size, encoder) print('%s\n' % (generated_phrase)) except KeyboardInterrupt: print('caught ctrl-c') print('done')
def train(args): rnn = args.rnn # type of RNN mode = args.mode weights_fpath = args.weights_fpath # weights will be stored here text_fpath = args.text_fpath # path to the input file max_epochs = args.max_epochs lr = args.lr lr_decay = args.lr_decay lr_decay_after = args.lr_decay_after grad_clipping = args.grad_clipping # ML: need to be modified num_hidden = args.num_hidden batch_size = args.batch_size sample_every = args.sample_every # sample every n batches # sequence length during training, number of chars to draw for sampling train_seq_length, sample_seq_length = args.train_seq_length, args.sample_seq_length load_model = args.load_model text, vocab = utils.utils.parse(text_fpath) # encode each character in the vocabulary as an integer encoder = LabelEncoder() encoder.fit(list(vocab)) vocab_size = len(vocab) # ML: build model! layers = char_rnn.build_model( (None, train_seq_length, vocab_size), # input_shape num_hidden, vocab_size, grad_clipping, rnn, mode) # optionally load a pre-trained model if load_model: print('loading model weights from %s' % (weights_fpath + 'weights.pickle')) char_rnn.load_weights(layers['l_out'], weights_fpath + 'weights.pickle') # phrases to use during sampling #phrases = ['I should go to bed now'] phrases = ['First Citizen:'] print('compiling theano function for training') train_char_rnn = theano_funcs.create_train_func(layers, rnn) print('theano function for training built') print('compiling theano function for sampling') sample = theano_funcs.create_sample_func(layers) print('theano funciton for sampling built') best_loss = 10 best_epoch = 1 print('Start Training') try: for epoch in range(1, 1 + max_epochs): print('epoch %d' % (epoch)) if epoch >= lr_decay_after: lr = lr * lr_decay # sample from the model and update the weights train_losses = [] seq_iter = utils.utils.sequences(text, batch_size, train_seq_length, vocab_size, encoder) for i, (X, y) in tqdm(enumerate(seq_iter), leave=False): if X is not None and y is not None: loss = train_char_rnn(X, y, lr) train_losses.append(loss) print(' loss = %.6f' % (loss)) # continuously sample from the model if ((i + 1) % sample_every) == 0: print(' loss = %.6f' % (np.mean(train_losses))) phrase = np.random.choice(phrases) generated_phrase = utils.utils.sample( sample, phrase, train_seq_length, sample_seq_length, vocab_size, encoder) print('%s%s' % (phrase, generated_phrase)) if np.mean(train_losses) < best_loss: print('saving weight to %s in npz format' % (weights_fpath)) np.savez(weights_fpath + 'rnn_paremeter.npz', *lasagne.layers.get_all_param_values(layers['l_out'])) best_loss = np.mean(train_losses) best_epoch = epoch print(" LR: " + str(lr)) print(" training loss: " + str(np.mean(train_losses))) print(" best epoch: " + str(best_epoch)) print(" best training loss: " + str(best_loss)) except KeyboardInterrupt: print('caught ctrl-c, stopping training') # write the weights to disk so we can try out the model as pickle format print('saving weights to %s' % (weights_fpath + 'weights.pickle')) weights = lasagne.layers.get_all_param_values(layers['l_out']) # 'l_out!' char_rnn.save_weights(weights, weights_fpath + 'weights.pickle') print('done')