# Initialize models and start training

encoder = CharCNN(char_vocab, args.hidden_size)

decoder = DecoderRNN(args.hidden_size, numOutputClass)

encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                     lr=args.learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    criterion.cuda()
    encoder.cuda()
    decoder.cuda()

start = time.time()
all_losses = []
loss_avg = 0

try:
    print("Training for %d epochs..." % args.n_epochs)
    numMiniBatches = len(linesInTrain) / args.batch_size

    for epoch in tqdm(range(1, args.n_epochs + 1)):
        minibatchesSinceLastPrint = 0
        for j in range(0, len(linesInTrain), args.batch_size):
            batch_size = args.batch_size
            if j + args.batch_size >= len(linesInTrain):
#number of input char types
char_vocab = len(string.printable)

# number of output classes = vocab size
numOutputClass = len(labelCorpus.dictionary)

# Initialize models and start training

decoder = CharCNN(char_vocab, args.hidden_size, numOutputClass)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    decoder.cuda()

start = time.time()
all_losses = []
loss_avg = 0

try:
    print("Training for %d epochs..." % args.n_epochs)
    for epoch in tqdm(range(1, args.n_epochs + 1)):
        loss = train(*random_training_set(args.batch_size, linesInTrain))
        loss_avg += loss

        if epoch % args.print_every == 0:
            val_loss = evaluate(
                args.batch_size, linesInValid
            )  # test the model on validation data to check performance