if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() if os.path.exists("encoder.pt") and os.path.exists("decoder.pt") and not TRAIN: print("Found saved models") encoder_state = torch.load('encoder.pt') decoder_state = torch.load('decoder.pt') encoder1.load_state_dict(encoder_state) attn_decoder1.load_state_dict(decoder_state) else: trainIters(encoder1, attn_decoder1, TRAIN_ITER, print_every=50) torch.save(encoder1.state_dict(), "encoder.pt") torch.save(attn_decoder1.state_dict(), "decoder.pt") ###################################################################### # evaluateRandomly(encoder1, attn_decoder1) ###################################################################### # Visualizing Attention # --------------------- # # A useful property of the attention mechanism is its highly interpretable # outputs. Because it is used to weight specific encoder outputs of the # input sequence, we can imagine looking where the network is focused most # at each time step. #
#showPlot(plot_losses, plot_losses_test) ###################################################################### # Training # ======================= hidden_size = 200 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, MAX_LENGTH, dropout_p=0.1).to(device) torch.save(input_lang, args.save_path + '/input_lang') torch.save(output_lang, args.save_path + '/output_lang') torch.save(test_set, args.save_path + '/test_set') print(args.print_every) trainIters(encoder1, attn_decoder1, args.n_iters, args.print_every, args.plot_every, save_every=args.save_every) torch.save(encoder1.state_dict(), args.save_path + '/encoder') torch.save(attn_decoder1.state_dict(), args.save_path + '/decoder')