#number of input char types
char_vocab = len(string.printable)

# number of output classes = vocab size
numOutputClass = len(labelCorpus.dictionary)
print("Number of Classes:" + str(numOutputClass))

# Initialize models and start training

encoder = CharCNN(char_vocab, args.hidden_size)

decoder = DecoderRNN(args.hidden_size, numOutputClass)

encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                     lr=args.learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    criterion.cuda()
    encoder.cuda()
    decoder.cuda()

start = time.time()
all_losses = []
loss_avg = 0

try:
    dataloader = load_datasets(args)
    best_error = 1000
    early_stop = 0

    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    model_name = args.savedir + '/' + 'best.pt'
    train_begin = time.time()
    print('train begin', '-' * 50)
    print()
    print()
    model = CharCNN(70, args.dropout)
    criterion = nn.CrossEntropyLoss()
    if args.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        scheduler = StepLR(optimizer, step_size=3, gamma=0.5)
    elif args.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=0.0001)
    else:
        print('optimizer is bad')
        optimizer = None
        exit(0)

    writer = SummaryWriter('log')

    for epoch in range(args.epochs):
        epoch_begin = time.time()
        print('train {}/{} epoch'.format(epoch + 1, args.epochs))
        train_loss = train_model(dataloader['train_dataloader'], model,
                                 criterion, optimizer)