if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

if os.path.exists("encoder.pt") and os.path.exists("decoder.pt") and not TRAIN:
    print("Found saved models")
    encoder_state = torch.load('encoder.pt')
    decoder_state = torch.load('decoder.pt')
    encoder1.load_state_dict(encoder_state)
    attn_decoder1.load_state_dict(decoder_state)
else:
    trainIters(encoder1, attn_decoder1, TRAIN_ITER, print_every=50)

torch.save(encoder1.state_dict(), "encoder.pt")
torch.save(attn_decoder1.state_dict(), "decoder.pt")

######################################################################
#

evaluateRandomly(encoder1, attn_decoder1)

######################################################################
# Visualizing Attention
# ---------------------
#
# A useful property of the attention mechanism is its highly interpretable
# outputs. Because it is used to weight specific encoder outputs of the
# input sequence, we can imagine looking where the network is focused most
# at each time step.
#
Example #2
0


    #showPlot(plot_losses, plot_losses_test)




######################################################################
# Training
# =======================
hidden_size = 200 

encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, MAX_LENGTH, dropout_p=0.1).to(device)


torch.save(input_lang, args.save_path + '/input_lang')
torch.save(output_lang, args.save_path + '/output_lang')
torch.save(test_set, args.save_path + '/test_set')

print(args.print_every)
trainIters(encoder1, attn_decoder1, args.n_iters,  args.print_every, args.plot_every, save_every=args.save_every)

torch.save(encoder1.state_dict(), args.save_path + '/encoder')
torch.save(attn_decoder1.state_dict(), args.save_path + '/decoder')