def validate_sentence(session, model, validation_batch, encoder_state, current_step): encoder_inputs, single_decoder_inputs, decoder_weights = validation_batch.next() print(BatchGenerator.batches2string(encoder_inputs)) print(BatchGenerator.batches2string(single_decoder_inputs)) # replicate to full batch size so we have multiple results agains the whole state encoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in encoder_inputs] decoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in single_decoder_inputs] decoder_weights = [np.repeat(x, BATCH_SIZE, axis=0) for x in decoder_weights] # _, eval_loss, prediction = model.step(sess, current_step - 1, encoder_inputs, decoder_inputs, # decoder_weights, enc_state[-1:], 1.0, True) _, eval_loss, prediction = model.step(session, current_step - 1, encoder_inputs, decoder_inputs, decoder_weights, encoder_state, 1.0, True) # split into 'no of batches' list then average across batches reshaped = np.reshape(prediction, (prediction.shape[0] / BATCH_SIZE, BATCH_SIZE, prediction.shape[1])) averaged = np.mean(reshaped, axis=1) # now roll as in case of single batch rolled = np.rollaxis(np.asarray([averaged]), 1, 0) splitted = np.vsplit(rolled, rolled.shape[0]) squeezed = [np.squeeze(e,0) for e in splitted] print(BatchGenerator.batches2string(squeezed)) # compute character to character perplexity val_perp = float(np.exp(BatchGenerator.logprob(np.concatenate(squeezed), np.concatenate(single_decoder_inputs[1:])))) print('--validation perp.: %.2f' % val_perp) return val_perp
def run_test(): # test batch generation print('download and read data') filename = maybe_download('text8.zip', 31344016) # Read data text = read_data(filename) # create datasets valid_size = 1000 valid_text = text[:valid_size] train_text = text[valid_size:] # train_size = len(train_text) # create batch generators train_batches = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH, reverse_encoder_input=True) valid_batches = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH) # print(BatchGenerator.characters(train_batches.next()[0])) print('test main batch generator') e_bs, d_bs, dw_bs = train_batches.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) e_bs, d_bs, dw_bs = train_batches.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) e_bs, d_bs, dw_bs = valid_batches.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) e_bs, d_bs, dw_bs = valid_batches.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) print('test random english generator') random_batch = RandomWordsBatchGenerator(2, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH, reverse_encoder_input=False) for _ in range(10): e_bs, d_bs, dw_bs = random_batch.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) print('test random string gen with padding') random_str_batch = ReverseStringBatchGenerator(1, 8, 8, reverse_encoder_input=False) e_bs, d_bs, dw_bs = random_str_batch.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs) random_str_batch = ReverseStringBatchGenerator(2, 8, 16, reverse_encoder_input=False) e_bs, d_bs, dw_bs = random_str_batch.next() print(BatchGenerator.batches2string(e_bs)) print(BatchGenerator.batches2string(d_bs)) BatchGenerator.verify_weights(d_bs, dw_bs)