train_loader = datagen(train_encoder_batch, train_decoder_batch, train_target_batch) val_loader = datagen(val_encoder_batch, val_decoder_batch, val_target_batch) model.fit_generator(train_loader, steps_per_epoch=train_steps, epochs=1, verbose=1, validation_data=val_loader, validation_steps=val_steps) # On epoch end - decode a batch of misspelled tokens from the # validation set to visualize speller performance. nb_tokens = 5 input_tokens, target_tokens, decoded_tokens = decode_sequences( val_encoder, val_target, input_ctable, target_ctable, maxlen, reverse, encoder_model, decoder_model, nb_tokens, sample_mode=sample_mode, random=True) print('-') print('Input tokens: ', input_tokens) print('Decoded tokens:', decoded_tokens) print('Target tokens: ', target_tokens) print('-') # Save the model at end of each epoch. model_file = '_'.join(['seq2seq', 'epoch', str(epoch + 1)]) + '.h5' save_dir = 'checkpoints' if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, model_file) print('Saving full model to {:s}'.format(save_path))
test_word_set = list(filter(None, set(tokenize(test_text)))) train_max_word_len = max([len(token) for token in word_set]) + 2 test_max_word_len = max([len(token) for token in test_word_set]) + 2 train_enc_tokens, train_dec_tokens, _ = prepare_word_tokens( word_set, train_max_word_len, ERR_RATE) test_enc_tokens, test_dec_tokens, test_target_tokens = prepare_word_tokens( test_word_set, test_max_word_len, ERR_RATE) enc_charset = set(' '.join(train_enc_tokens)) dec_charset = set(' '.join(train_dec_tokens)) enc_oh = OneHotEncoder(enc_charset) dec_oh = OneHotEncoder(dec_charset) token_count = len(test_enc_tokens) right_guess_counter = 0 counter = 0 for enc_token, dec_token, target_token in zip(test_enc_tokens, test_enc_tokens, test_target_tokens): _, decoded_token = decode_sequences([enc_token], enc_oh, dec_oh, train_max_word_len, encoder, decoder, 1, False) #print(f'Decoded: {decoded_token[0]} | Target: {target_token.rstrip("*")}') if decoded_token[0] == target_token.rstrip('*'): right_guess_counter += 1 counter += 1 if (counter % 10 == 0): print(f'{((counter/token_count) * 100):.3f}% finished') print(f'Accuracy: {right_guess_counter/token_count}')
maxlen, error_rate=error_rate, shuffle=False) input_chars = set(' '.join(train_encoder)) target_chars = set(' '.join(train_decoder)) input_ctable = CharacterTable(input_chars) target_ctable = CharacterTable(target_chars) encoder_model, decoder_model = restore_model(model_path, hidden_size) input_tokens, target_tokens, decoded_tokens = decode_sequences( misspelled_tokens, target_tokens, input_ctable, target_ctable, maxlen, reverse, encoder_model, decoder_model, nb_tokens, sample_mode=sample_mode, random=False) print('-') print('Input sentence: ', ' '.join([token for token in input_tokens])) print('-') print('Decoded sentence:', ' '.join([token for token in decoded_tokens])) print('-') print('Target sentence: ', ' '.join([token for token in target_tokens]))
DATA_DIR = './data' if __name__ == '__main__': # Prepare model encoder, decoder = load_s2s_model( 'test-no_reverse-hs-512_err-0.8_bs-256_e-30_drop-0.2.h5', HIDDEN_SIZE) text = load_text(DATA_DIR) word_set = list(filter(None, set(tokenize(text)))) max_word_len = max([len(token) for token in word_set]) + 2 train_enc_tokens, train_dec_tokens, _ = prepare_word_tokens( word_set, max_word_len, ERR_RATE) enc_charset = set(' '.join(train_enc_tokens)) dec_charset = set(' '.join(train_dec_tokens)) enc_oh = OneHotEncoder(enc_charset) dec_oh = OneHotEncoder(dec_charset) # Input decoding loop while True: sentence = input('\nEnter sentence to decode:\n') tokens = list(filter(None, tokenize(sentence))) nb_of_tokens = len(tokens) prepared_tokens = [] for token in tokens: prepared_tokens.append(get_padded_token(token, max_word_len)) input_tokens, decoded_tokens = decode_sequences( prepared_tokens, enc_oh, dec_oh, max_word_len, encoder, decoder, nb_of_tokens, False) print('Decoded sentence:', ' '.join([token for token in decoded_tokens]))