if __name__ == '__main__': vocab_file = '../vocab/vocab' tokenizer_file = '../tokenizer/src_tokenizer' vocab = Vocab(vocab_file, 100000) tokenizer = Tokenizer(vocab) with open(tokenizer_file, mode='wb') as file: pickle.dump(tokenizer, file) max_sequence_len = 10 batch_size = 4 p = Preprocessor(batch_size, 'data/sentences.txt', tokenizer, max_sequence_len) embedding_dim = 50 hidden_dim = 100 ae = AutoEncoder(max_sequence_len, vocab.NumIds(), embedding_dim, hidden_dim) ae.build_models() reducelr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=1e-20) checkpoint_cb = ModelCheckpoint(model_weights, period=1) earlystopping_cb = EarlyStopping(min_delta=0.0001, patience=10) callbacks_list = [reducelr_cb, checkpoint_cb, earlystopping_cb] x = p.get_data()[:5000] print(len(x))
# print('-' * 30, 'Starting', '-' * 30) vocab_file = '../vocab/vocab' tokenizer_file = '../tokenizer/src_tokenizer' vocab = Vocab(vocab_file, 100000) tokenizer = Tokenizer(vocab) with open(tokenizer_file, mode='wb') as file: pickle.dump(tokenizer, file) max_sequence_len = 100 p = Preprocessor(1, 'data/sentences.txt', tokenizer, max_sequence_len) data = p.get_data()[:5000] print('-' * 30, 'Loaded data', '-' * 30) hidden_size = 256 encoder1 = EncoderRNN(vocab.NumIds(), hidden_size) decoder1 = DecoderRNN(hidden_size, vocab.NumIds(), 1) if use_cuda: encoder1 = encoder1.cuda() decoder1 = decoder1.cuda() trainEpochs(encoder1, decoder1, 5000, p, print_every=100) ###################################################################### # evaluateRandomly(encoder1, decoder1) ###################################################################### # Visualizing Attention