def training(): for total_steps in range(params.niter): #训练多少个批次 train_iter = iter(train_loader) #用来生成迭代器,返回一个迭代器对象 i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) #criterion 是ctcloss loss_avg.add(cost) #计算平均误差 i += 1 if i % params.displayInterval == 0: #多少次循环打印一次 print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: #开始验证 val(crnn, test_dataset, criterion) if (total_steps + 1) % params.saveInterval == 0: #保存模型 torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def training(): accuracy_rate = 0 for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: accuracy = val(crnn, test_dataset, criterion) if accuracy > accuracy_rate: torch.save( crnn.state_dict(), '{0}/rnn_no_IO_{1}_{2}_{3}.pth'.format( params.experiment, total_steps, i, accuracy)) accuracy_rate = accuracy
def train(crnn, train_loader, criterion, iteration): for p in crnn.parameters(): p.requires_grad = True crnn.train() for i_batch, (image, index) in enumerate(train_loader): if args.cuda: image = image.cuda() criterion = criterion.cuda() label = utils.get_batch_label(dataset, index) preds = crnn(image) batch_size = image.size(0) index = np.array(index.data.numpy()) text, length = converter.encode(label) preds_size = torch.IntTensor([preds.size(0)] * batch_size) # print(preds.shape, text.shape, preds_size.shape, length.shape) # torch.Size([41, 16, 6736]) torch.Size([160]) torch.Size([16]) torch.Size([16]) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if i_batch == 100: break if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (iteration, params.niter, i_batch, len(train_loader), loss_avg.val())) loss_avg.reset()
def training(start): best = 0.982470 for total_steps in range(start, params.niter): train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: acc = val(crnn, test_dataset, criterion) if acc > best: best = acc print('save model ..........') # ti = time.strftime('%Y-%m-%d', time.localtime(time.time())) torch.save( crnn.state_dict(), '{0}/best_model_{1}_{2}.pth'.format( params.experiment, total_steps, i))
def training(crnn, train_loader, criterion, optimizer): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 print("total number", len(train_loader)) while i < len(train_loader): # for p in crnn.parameters(): # p.requires_grad = True #训练阶段 crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) # 每两个epoch就保存一次模型 if (total_steps + 1) % params.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def training(): for epoch in range(params.nEpochs): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, params.nEpochs, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) if (epoch + 1) % params.saveEpoch == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, epoch, i)) print('Saved model params in dir {}'.format(params.experiment)) val(crnn, test_dataset, criterion)
def training(): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() val(crnn, test_dataset1, test_dataset2, test_dataset3, total_steps, i, criterion) return cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: logger.info('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() val(crnn, test_dataset, total_steps, i, criterion) if (total_steps + 1) % params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
def training(): for total_steps in range(test_params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) eval_time = 0.0 prog_bar = mmcv.ProgressBar(test_params.displayInterval) while i < len(train_loader): torch.cuda.empty_cache() for p in crnn.parameters(): p.requires_grad = True crnn.train() val(crnn, test_dataset, criterion, total_steps, i) return start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time() - start loss_avg.add(cost) i += 1 prog_bar.update() if i % test_params.tbInterval == 0: print("\n>>>> Tensorboard Log") writer.add_scalar( 'train/loss', loss_avg.val(), int(i + total_steps * len(train_loader))) # record to tb if i % test_params.displayInterval == 0: sys.stdout.write("\r%100s\r" % ' ') sys.stdout.flush() logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' % (total_steps, test_params.niter, i, len(train_loader), loss_avg.val(), eval_time / i)) if eval_time / i < 0.2: test_params.displayInterval = 1000 elif eval_time / i < 0.5: test_params.displayInterval = 400 elif eval_time / i < 1.0: test_params.displayInterval = 200 prog_bar = mmcv.ProgressBar( test_params.displayInterval) # new interval loss_avg.reset() val(crnn, test_dataset, criterion, total_steps, i) torch.cuda.empty_cache() if (total_steps + 1) % test_params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
def trainBatch(): crnn.train() data = train_iter.next() image, text, length, _ = data image = image.to(device) image.requires_grad_() batch_size = image.size(0) preds = crnn(image) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, text, preds_size, length) optimizer.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(): crnn.train() data = train_iter.next() image, text, length, _ = data image = image.to(device) image.requires_grad_() batch_size = image.size(0) preds = crnn(image) preds = torch.clamp(preds, min=-50.0) if random.random() < 0.01: print(preds[5, 0, :].topk(3)) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds.log_softmax(2), text, preds_size, length) optimizer.zero_grad() cost.backward() optimizer.step() return cost
def training(): for total_steps in range(params.niter): if total_steps < 50: train_loader = s_train_loader test_dataset = s_test_dataset elif total_steps < 80: train_loader = m_train_loader test_dataset = m_test_dataset else: train_loader = l_train_loader test_dataset = l_test_dataset # train_loader = train_loader # test_dataset = test_dataset # if total_steps < 60: # optimizer = optim.RMSprop(crnn.parameters(), lr=params.lr) # else: # optimizer = optim.Adam(crnn.parameters(), lr=params.lr, # betas=(params.beta1, 0.999)) train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) if (total_steps + 1) % params.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def main(crnn, train_loader, val_loader, criterion, optimizer): if args.cuda: crnn.cuda() Iteration = 0 while Iteration < params.niter: train(crnn, train_loader, criterion, Iteration) ## max_i: cut down the consuming time of testing, if you'd like to validate on the whole testset, please set it to len(val_loader) accuracy = val(crnn, val_loader, criterion, Iteration, max_i=1000) for p in crnn.parameters(): p.requires_grad = True crnn.train() if accuracy > params.best_accuracy: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, accuracy)) torch.save(crnn.state_dict(), '{0}/crnn_best.pth'.format(params.experiment)) print("is best accuracy: {0}".format(accuracy > params.best_accuracy)) Iteration += 1
def training(crnn, train_loader, criterion, optimizer): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, criterion) if total_steps % params.saveInterval == 0: save_name = '{0}/crnn_Rec_done_{1}_{2}.pth'.format( params.experiment, total_steps, i) torch.save(crnn.state_dict(), save_name) print('%s saved' % save_name)
def train(crnn, train_loader, criterion, optimizer, valid_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() train_iter = iter(train_loader) # loss averager loss_avg = utils.averager() for i in range(len(train_loader)): data = train_iter.next() _, images, texts = data batch_size = images.size(0) t, l = converter.encode(texts) images = images.cuda() preds = crnn(images) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, t, preds_size, l) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i + 1) % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset()
# sim_preds = converter.beam_decode(preds.data) cer_loss = utils.cer_loss(sim_preds, cpu_texts, ignore_case=False) return cost, cer_loss, batch_size for epoch in range(opt.nepoch): train_iter = iter(train_loader) i = 0 train_ctc = 0 train_cer = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost, cer_loss, batch_size = trainBatch(crnn, criterion, optimizer) train_cer += cer_loss train_ctc += cost * batch_size loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if opt.valid_result and i % opt.valInterval == 0: val(crnn, criterion)
def main(arg): print(arg) train_dataset = dataset.lmdbDataset( path=arg.train_root, # transform=dataset.resizeNormalize((imgW,imgH)), ) test_dataset = dataset.lmdbDataset( path=arg.test_root, # transform=dataset.resizeNormalize((arg.imgW,arg.imgH)), ) d = test_dataset.__getitem__(0) l = test_dataset.__len__() train_loader = DataLoader(train_dataset, num_workers=arg.num_workers, batch_size=arg.batch_size, collate_fn=dataset.alignCollate( imgH=arg.imgH, imgW=arg.imgW, keep_ratio=arg.keep_ratio), shuffle=True, drop_last=True) criterion = CTCLoss() converter = utils.Converter(arg.num_class) crnn = CRNN(imgH=arg.imgH, nc=3, nclass=arg.num_class + 1, nh=256) # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn.apply(weights_init) print(crnn) image = torch.FloatTensor(arg.batch_size, 3, arg.imgH, arg.imgW) text = torch.IntTensor(arg.batch_size * 5) length = torch.IntTensor(arg.batch_size) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if arg.opt == 'adam': optimizer = optim.Adam(crnn.parameters(), 0.01, betas=(0.5, 0.999)) elif arg.opt == 'adadelta': optimizer = optim.Adadelta(crnn.parameters()) else: optimizer = optim.RMSprop(crnn.parameters(), 0.01) for epoch in range(arg.n_epoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) text_labels, l = converter.encode(cpu_texts) utils.loadData(text, text_labels) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) i += 1 if i % arg.displayInterval == 0: print( '[%d/%d][%d/%d] Loss: %f' % (epoch, arg.n_epoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % arg.testInterval == 0: test(arg, crnn, test_dataset, criterion, image, text, length) # do checkpointing if i % arg.saveInterval == 0: name = '{0}/netCRNN_{1}_{2}_{3}_{4}.pth'.format( arg.model_dir, arg.num_class, arg.type, epoch, i) torch.save(crnn.state_dict(), name) print('model saved at ', name) torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(arg.model_dir, arg.num_class, arg.type))
preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost for epoch in range(opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0:
def training(): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) eval_time = 0.0 prog_bar = mmcv.ProgressBar(params.displayInterval) while i < len(train_loader): i += 1 runtime_error = False crnn.train() loss_avg.reset() start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time() - start loss_avg.add(cost.cpu()) prog_bar.update() ''' try: i += 1 #crnn.cuda() crnn.train() loss_avg.reset() start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time()-start loss_avg.add(cost.cpu()) prog_bar.update() runtime_error = False except RuntimeError as e: logger.error(e) runtime_error = True except ConnectionRefusedError as e: logger.error(e) runtime_error = True finally: if runtime_error: logger.error("Warning: Some error happen") gc.collect() torch.cuda.empty_cache() ''' if i % params.tbInterval == 0 and not runtime_error: print("\n>>>> Tensorboard Log") writer.add_scalar('train/loss', loss_avg.val(), int(i + total_steps * len(train_loader))) # record to tb if i % params.displayInterval == 0 and not runtime_error: sys.stdout.write("\r%100s\r" % ' ') sys.stdout.flush() logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' % (total_steps, params.niter, i, len(train_loader), loss_avg.val(), eval_time / i)) loss_avg.reset() if eval_time / i < 0.2: params.displayInterval = 1000 elif eval_time / i < 0.5: params.displayInterval = 400 elif eval_time / i < 1.0: params.displayInterval = 200 prog_bar = mmcv.ProgressBar( params.displayInterval) # new interval # if i % params.valInterval == 0: # val(crnn, test_dataset, criterion, total_steps, i) # torch.cuda.empty_cache() torch.cuda.empty_cache() val(crnn, test_dataset, criterion, total_steps, i) if (total_steps + 1) % params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))