leakyRelu=True) print(net) params = net.state_dict() params_shape = [] for k, v in params.items(): # print(k, v.numpy().shape, reduce(mul, v.numpy().shape)) params_shape.append(reduce(mul, v.numpy().shape)) params_total = sum(params_shape) print('params_total:', params_total) if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname)) else: print('create new model') net.apply(weights_init) if opt.ngpu > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net, device_ids=range(opt.ngpu)) net.cuda() criterion = CTCLoss().cuda() if opt.adadelta: optimizer = optim.Adadelta(net.parameters(), lr=opt.lr) # , weight_decay=1e-8) elif opt.rms: optimizer = optim.RMSprop(net.parameters(), lr=opt.lr) else: optimizer = optim.Adam(net.parameters(), lr=opt.lr,
nc = 1 nclass = len(option.alphabet) + 1 crnn = CRNN(nc, nclass, option.nh) crnn = crnn.cuda() def weight_init(module): class_name = module.__class__.__name__ if class_name.find('Conv') != -1: module.weight.data.normal_(0, 0.02) if class_name.find('BatchNorm') != -1: module.weight.data.normal_(1, 0.02) module.bias.data.fill_(0) crnn.apply(weight_init) loss_function = CTCLoss(zero_infinity=True) loss_function = loss_function.cuda() optimizer = Adadelta(crnn.parameters()) converter = Converter(option.alphabet) print_every = 100 total_loss = 0.0 def validation(): print('start validation...') crnn.eval() total_loss = 0.0 n_correct = 0 for i, (input, label) in enumerate(validationset_dataloader):
def main(): conf_file = "conf/train.yml" with open(conf_file, 'r') as f: args = edict(yaml.load(f)) train_root = args.train_root test_root = args.test_root batch_size = args.batch_size max_len = args.max_len img_h = args.img_h img_w = args.img_w n_hidden = args.n_hidden n_iter = args.n_iter lr = args.lr cuda = args.cuda val_interval = args.val_interval save_interval = args.save_interval model_dir = args.model_dir debug_level = args.debug_level experiment = args.experiment n_channel = args.n_channel n_class = args.n_class beta = args.beta image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label \ in iter(trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length)\ / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)
def test_train(self): ''' parameters of train ''' # test_root = "data/ocr_dataset_val" # train_root = "data/ocr_dataset" train_root = "data/ocr_dataset_train_400_10/" test_root = "data/ocr_dataset_train_50_10_val/" batch_size = 20 max_len = 15 img_h, img_w = 32, 150 n_hidden = 512 n_iter = 400 lr = 0.00005 cuda = True val_interval = 250 save_interval = 1000 model_dir = "models" debug_level = 20 experiment = "experiment" n_channel = 3 n_class = 11 beta = 0.5 image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label in iter( trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable( torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)