def eval_detection(opts, net=None): if net == None: net = OctShuffleMLT(attention=True) net_utils.load_net(opts.model, net) if opts.cuda: net.cuda() images, gt_boxes = load_annotation(opts.eval_list) true_positives = 0 false_positives = 0 false_negatives = 0 for i in range(images.shape[0]): image = np.expand_dims(images[i], axis=0) image_boxes_gt = np.array(gt_boxes[i]) im_data = net_utils.np_to_variable(image, is_cuda=opts.cuda).permute(0, 3, 1, 2) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) boxes = get_boxes(segm, rbox, angle_pred, opts.segm_thresh) if (opts.debug): print(boxes.shape) print(image_boxes_gt.shape) print("============") false_positives += boxes.shape[0] false_negatives += image_boxes_gt.shape[0] for box in boxes: b = box[0:8].reshape(4,-1) poly = Polygon.Polygon(b) for box_gt in image_boxes_gt: b_gt = box_gt[0:8].reshape(4,-1) poly_gt = Polygon.Polygon(b_gt) intersection = poly_gt | poly union = poly_gt & poly iou = (intersection.area()+1.0) / (union.area()+1.0)-1.0 if iou > 0.5: true_positives+=1 false_negatives-=1 false_positives-=1 image_boxes_gt = np.array([bgt for bgt in image_boxes_gt if not np.array_equal(bgt, box_gt)]) break print("tp: {} fp: {} fn: {}".format(true_positives, false_positives, false_negatives)) precision = true_positives / (true_positives+false_positives) recall = true_positives / (true_positives+false_negatives) f_score = 2*precision*recall/(precision+recall) print("PRECISION: {} \t RECALL: {} \t F SCORE: {}".format(precision, recall, f_score))
def main(opts): # alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'E2E-CRNN' net = OwnModel(attention=True, nclass=nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=opts.base_lr, betas=(0.5, 0.999)) step_start = 0 ### 第一种:只修改conv11的维度 # model_dict = net.state_dict() # if os.path.exists(opts.model): # print('loading pretrained model from %s' % opts.model) # pretrained_model = OwnModel(attention=True, nclass=12) # pretrained_model.load_state_dict(torch.load(opts.model)['state_dict']) # pretrained_dict = pretrained_model.state_dict() # # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'rnn' not in k and 'conv11' not in k} # model_dict.update(pretrained_dict) # net.load_state_dict(model_dict) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## ICDAR2015数据集 e2edata = E2Edataset(train_list=opts.train_list) e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=True, collate_fn=E2Ecollate, num_workers=4) net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(step_start, opts.max_iters): for index, date in enumerate(e2edataloader): im_data, gtso, lbso = date im_data = im_data.cuda() try: loss= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True) net.zero_grad() # optimizer.zero_grad() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if index % disp_interval == 0:
def main(opts): alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'crnn' net = CRNN(nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 converter = strLabelConverter(alphabet) dataset = ImgDataset( root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', transform=None, target_transform=converter.encode ) ocrdataloader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=False, collate_fn=own_collate ) num_count = 0 net = net.eval() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(len(dataset)): try: data = next(data_iter) except: data_iter = iter(ocrdataloader) data = next(data_iter) im_data, gt_boxes, text = data im_data = im_data.cuda() try: res = process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=False) pred, target = res if pred == target[0]: num_count += 1 except: import sys, traceback traceback.print_exc(file=sys.stdout) pass print('correct/total:%d/%d'%(num_count, len(dataset)))
parser = argparse.ArgumentParser() parser.add_argument('-cuda', type=int, default=1) parser.add_argument('-model', default='./backup/E2E-MLT_280000.h5') # parser.add_argument('-model', default='./weights/e2e-mlt.h5') parser.add_argument('-segm_thresh', default=0.5) parser.add_argument('-test_folder', default=r'/home/yangna/deepblue/OCR/data/ICDAR2015/ch4_test_images/*.jpg') parser.add_argument('-output', default='./data/ICDAR2015') font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18) args = parser.parse_args() # net = ModelResNetSep2(attention=True, nclass=len(alphabet)+1) net = ModelResNetSep2(attention=True, nclass=len(alphabet)+1) net_utils.load_net(args.model, net) net = net.eval() converter = strLabelConverter(alphabet) if args.cuda: print('Using cuda ...') net = net.cuda() imagelist = glob.glob(args.test_folder) with torch.no_grad(): for path in imagelist: # path = '/home/yangna/deepblue/OCR/data/ICDAR2015/ch4_test_images/img_405.jpg' im = cv2.imread(path) im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False)
def main(opts): # alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'E2E-CRNN' net = OwnModel(attention=True, nclass=nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=opts.base_lr, betas=(0.5, 0.999)) step_start = 0 ### 第一种:只修改conv11的维度 # model_dict = net.state_dict() # if os.path.exists(opts.model): # print('loading pretrained model from %s' % opts.model) # pretrained_model = OwnModel(attention=True, nclass=12) # pretrained_model.load_state_dict(torch.load(opts.model)['state_dict']) # pretrained_dict = pretrained_model.state_dict() # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'rnn' not in k and 'conv11' not in k} # model_dict.update(pretrained_dict) # net.load_state_dict(model_dict) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 e2edata = E2Edataset(train_list=opts.train_list) e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=False, collate_fn=E2Ecollate, num_workers=4) # 电表数据集 # converter = strLabelConverter(alphabet) # dataset = ImgDataset( # root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', # csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', # transform=None, # target_transform=converter.encode # ) # ocrdataloader = torch.utils.data.DataLoader( # dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=own_collate # ) net.eval() num_count = 0 converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for index, date in enumerate(e2edataloader): im_data, gtso, lbso = date im_data = im_data.cuda() try: with torch.no_grad(): res = process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=False) pred, target = res target = ''.join(target) if pred == target: num_count += 1 except:
def main(opts): # pairs = c1, c2, label model_name = 'ICCV_OCR' net = OCRModel() if opts.cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net) else: learning_rate = base_lr print('train') net.train() # test(net) ctc_loss = CTCLoss(blank=0).cuda() data_generator = ocr_gen.get_batch(num_workers=opts.num_readers, batch_size=opts.batch_size, train_list=opts.train_list, in_train=True) train_loss = 0 cnt = 0 tq = tqdm(range(step_start, 10000000)) for step in tq: # batch images, labels, label_length = next(data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda, volatile=False).permute(0, 3, 1, 2) labels_pred = net(im_data) # backward ''' acts: Tensor of (seqLength x batch x outputDim) containing output from network labels: 1 dimensional Tensor containing all the targets of the batch in one sequence act_lens: Tensor of size (batch) containing size of each output sequence from the network act_lens: Tensor of (batch) containing label length of each example ''' torch.backends.cudnn.deterministic = True probs_sizes = Variable( torch.IntTensor([(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1]))).long() label_sizes = Variable( torch.IntTensor(torch.from_numpy( np.array(label_length)).int())).long() labels = Variable( torch.IntTensor(torch.from_numpy(np.array(labels)).int())).long() optimizer.zero_grad() #probs = nn.functional.log_softmax(labels_pred, dim=94) labels_pred = labels_pred.permute(2, 0, 1) loss = ctc_loss(labels_pred, labels, probs_sizes, label_sizes) / opts.batch_size # change 1.9. if loss.item() == np.inf: continue # loss.backward() optimizer.step() train_loss += loss.item() cnt += 1 # if step % disp_interval == 0: # train_loss /= cnt # print('epoch %d[%d], loss: %.3f, lr: %.5f ' % ( # step / batch_per_epoch, step, train_loss, learning_rate)) # # train_loss = 0 # cnt = 0 tq.set_description( 'epoch %d[%d], loss: %.3f, lr: %.5f ' % (step / batch_per_epoch, step, train_loss / cnt, learning_rate)) # if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name)) test(net)
def main(opts): nclass = len(alphabet) + 1 model_name = 'E2E-MLT' net = OwnModel(attention=True, nclass=nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) ### 第一种:只修改conv11的维度 # model_dict = net.state_dict() # if os.path.exists(opts.model): # # 载入预训练模型 # print('loading pretrained model from %s' % opts.model) # # pretrained_model = OwnModel(attention=True, nclass=7325) # pretrained_model = ModelResNetSep2(attention=True, nclass=7500) # pretrained_model.load_state_dict(torch.load(opts.model)['state_dict']) # pretrained_dict = pretrained_model.state_dict() # # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'conv11' not in k and 'rnn' not in k} # # 2. overwrite entries in the existing state dict # model_dict.update(pretrained_dict) # # 3. load the new state dict # net.load_state_dict(model_dict) ### 第二种:直接接着前面训练 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ### step_start = 0 net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() e2edata = E2Edataset(train_list=opts.train_list) e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=4, shuffle=True, collate_fn=E2Ecollate) train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 ctc_loss_val2 = 0 box_loss_val = 0 gt_g_target = 0 gt_g_proc = 0 for step in range(step_start, opts.max_iters): loss = 0 # batch images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(data_generator) im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=opts.cuda) # im_data = torch.from_numpy(images).type(torch.FloatTensor).permute(0, 3, 1, 2).cuda() # permute(0,3,1,2)和cuda的先后顺序有影响 start = timeit.timeit() try: seg_pred, roi_pred, angle_pred, features = net(im_data) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue end = timeit.timeit() # for EAST loss smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda) training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda) angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda) geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda) try: loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue bbox_loss += net.box_loss_value.data.cpu().numpy() seg_loss += net.segm_loss_value.data.cpu().numpy() angle_loss += net.angle_loss_value.data.cpu().numpy() train_loss += loss.data.cpu().numpy() try: # 10000步之前都是用文字的标注区域训练的 if step > 10000 or True: #this is just extra augumentation step ... in early stage just slows down training # ctcl, gt_target , gt_proc = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, converter, debug=opts.debug) ctcl= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True) gt_target = 1 gt_proc = 1 ctc_loss_val += ctcl.data.cpu().numpy()[0] loss = ctcl gt_g_target = gt_target gt_g_proc = gt_proc train_loss += ctcl.item() # -训练ocr识别部分的时候,采用一个data_generater生成 # imageso, labels, label_length = next(dg_ocr) # 其中应该有对倾斜文本的矫正 # im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute(0, 3, 1, 2) # features = net.forward_features(im_data_ocr) # labels_pred = net.forward_ocr(features) # probs_sizes = torch.IntTensor( [(labels_pred.permute(2,0,1).size()[0])] * (labels_pred.permute(2,0,1).size()[1]) ) # label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int() ) # labels = torch.IntTensor( torch.from_numpy(np.array(labels)).int() ) # loss_ocr = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5 # loss_ocr.backward() # ctc_loss_val2 += loss_ocr.item() net.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() except:
def main(opts): model_name = 'E2E-MLT' net = ModelResNetSep_final(attention=True) acc = [] ctc_loss = nn.CTCLoss() if opts.cuda: net.cuda() ctc_loss.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0005, max_lr=0.001, step_size_up=3000, cycle_momentum=False) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) else: learning_rate = base_lr for param_group in optimizer.param_groups: param_group['lr'] = base_lr learning_rate = param_group['lr'] print(param_group['lr']) step_start = 0 net.train() #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height) #acc.append([0, acc_test]) # ctc_loss = CTCLoss() ctc_loss = nn.CTCLoss() data_generator = ocr_gen.get_batch(num_workers=opts.num_readers, batch_size=opts.batch_size, train_list=opts.train_list, in_train=True, norm_height=opts.norm_height, rgb = True, normalize= True) val_dataset = ocrDataset(root=opts.valid_list, norm_height=opts.norm_height , in_train=False,is_crnn=False) val_generator = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=alignCollate()) # val_generator1 = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False, # collate_fn=alignCollate()) cnt = 1 cntt = 0 train_loss_lr = 0 time_total = 0 train_loss = 0 now = time.time() for step in range(step_start, 300000): # batch images, labels, label_length = next(data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(0, 3, 1, 2) features = net.forward_features(im_data) labels_pred = net.forward_ocr(features) # backward ''' acts: Tensor of (seqLength x batch x outputDim) containing output from network labels: 1 dimensional Tensor containing all the targets of the batch in one sequence act_lens: Tensor of size (batch) containing size of each output sequence from the network act_lens: Tensor of (batch) containing label length of each example ''' probs_sizes = torch.IntTensor([(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])).long() label_sizes = torch.IntTensor(torch.from_numpy(np.array(label_length)).int()).long() labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()).long() loss = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data.size(0) # change 1.9. optimizer.zero_grad() loss.backward() clipping_value = 0.5 torch.nn.utils.clip_grad_norm_(net.parameters(),clipping_value) if not (torch.isnan(loss) or torch.isinf(loss)): optimizer.step() scheduler.step() # if not np.isinf(loss.data.cpu().numpy()): train_loss += loss.data.cpu().numpy() #net.bbox_loss.data.cpu().numpy()[0] # train_loss += loss.data.cpu().numpy()[0] #net.bbox_loss.data.cpu().numpy()[0] cnt += 1 if opts.debug: dbg = labels_pred.data.cpu().numpy() ctc_f = dbg.swapaxes(1, 2) labels = ctc_f.argmax(2) det_text, conf, dec_s,_ = print_seq_ext(labels[0, :], codec) print('{0} \t'.format(det_text)) if step % disp_interval == 0: for param_group in optimizer.param_groups: learning_rate = param_group['lr'] train_loss /= cnt train_loss_lr += train_loss cntt += 1 time_now = time.time() - now time_total += time_now now = time.time() save_log = os.path.join(opts.save_path, 'loss_ocr.txt') f = open(save_log, 'a') f.write( 'epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % ( step / batch_per_epoch, step, train_loss, time_now,learning_rate, cnt)) f.close() print('epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % ( step / batch_per_epoch, step, train_loss, time_now,learning_rate, cnt)) train_loss = 0 cnt = 1 if step > step_start and (step % batch_per_epoch == 0): CER, WER = eval_ocr(val_generator, net) net.train() for param_group in optimizer.param_groups: learning_rate = param_group['lr'] # print(learning_rate) save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = {'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_name) print('save model: {}'.format(save_name)) save_logg = os.path.join(opts.save_path, 'note_eval.txt') fe = open(save_logg, 'a') fe.write('time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f\n' % ( step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER)) fe.close() print('time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f' % ( step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER)) time_total = 0 cntt = 0 train_loss_lr = 0
def main(opts): # alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'E2E-CRNN' net = OwnModel(attention=True, nclass=nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=opts.base_lr, betas=(0.5, 0.999)) step_start = 0 ### 第一种:只修改conv11的维度 # model_dict = net.state_dict() # if os.path.exists(opts.model): # print('loading pretrained model from %s' % opts.model) # pretrained_model = OwnModel(attention=True, nclass=12) # pretrained_model.load_state_dict(torch.load(opts.model)['state_dict']) # pretrained_dict = pretrained_model.state_dict() # # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'rnn' not in k and 'conv11' not in k} # model_dict.update(pretrained_dict) # net.load_state_dict(model_dict) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 e2edata = E2Edataset(train_list=opts.train_list) e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=True, collate_fn=E2Ecollate, num_workers=4) # 电表数据集 # converter = strLabelConverter(alphabet) # dataset = ImgDataset( # root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', # csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', # transform=None, # target_transform=converter.encode # ) # ocrdataloader = torch.utils.data.DataLoader( # dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=own_collate # ) net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(step_start, opts.max_iters): for index, date in enumerate(e2edataloader): im_data, gtso, lbso = date im_data = im_data.cuda() try: loss = process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True) net.zero_grad() # optimizer.zero_grad() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if index % disp_interval == 0: try: print('epoch:%d || step:%d || loss %.4f' % (step, index, loss)) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name))
def main(opts): alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'crnn' net = CRNN(nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 converter = strLabelConverter(alphabet) dataset = ImgDataset( root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', transform=None, target_transform=converter.encode ) ocrdataloader = torch.utils.data.DataLoader( dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=own_collate ) step_start = 0 net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(step_start, opts.max_iters): try: data = next(data_iter) except: data_iter = iter(ocrdataloader) data = next(data_iter) im_data, gt_boxes, text = data im_data = im_data.cuda() try: loss= process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=True) net.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step % disp_interval == 0: try: print('step:%d || loss %.4f' % (step, loss)) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = {'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_name) print('save model: {}'.format(save_name))
def main(opts): model_name = 'OctGatedMLT' net = OctMLT(attention=True) acc = [] if opts.cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net( args.model, net, optimizer, load_ocr=opts.load_ocr, load_detection=opts.load_detection, load_shared=opts.load_shared, load_optimizer=opts.load_optimizer, reset_step=opts.load_reset_step) else: learning_rate = base_lr step_start = 0 net.train() if opts.freeze_shared: net_utils.freeze_shared(net) if opts.freeze_ocr: net_utils.freeze_ocr(net) if opts.freeze_detection: net_utils.freeze_detection(net) #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height) #acc.append([0, acc_test]) ctc_loss = CTCLoss() data_generator = ocr_gen.get_batch(num_workers=opts.num_readers, batch_size=opts.batch_size, train_list=opts.train_list, in_train=True, norm_height=opts.norm_height, rgb=True) train_loss = 0 cnt = 0 for step in range(step_start, 300000): # batch images, labels, label_length = next(data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute( 0, 3, 1, 2) features = net.forward_features(im_data) labels_pred = net.forward_ocr(features) # backward ''' acts: Tensor of (seqLength x batch x outputDim) containing output from network labels: 1 dimensional Tensor containing all the targets of the batch in one sequence act_lens: Tensor of size (batch) containing size of each output sequence from the network act_lens: Tensor of (batch) containing label length of each example ''' probs_sizes = torch.IntTensor( [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])) label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / im_data.size(0) # change 1.9. optimizer.zero_grad() loss.backward() optimizer.step() if not np.isinf(loss.data.cpu().numpy()): train_loss += loss.data.cpu().numpy()[0] if isinstance( loss.data.cpu().numpy(), list) else loss.data.cpu().numpy( ) #net.bbox_loss.data.cpu().numpy()[0] cnt += 1 if opts.debug: dbg = labels_pred.data.cpu().numpy() ctc_f = dbg.swapaxes(1, 2) labels = ctc_f.argmax(2) det_text, conf, dec_s = print_seq_ext(labels[0, :], codec) print('{0} \t'.format(det_text)) if step % disp_interval == 0: train_loss /= cnt print('epoch %d[%d], loss: %.3f, lr: %.5f ' % (step / batch_per_epoch, step, train_loss, learning_rate)) train_loss = 0 cnt = 0 if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name)) #acc_test, ted = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height) #acc.append([0, acc_test, ted]) np.savez('train_acc_{0}'.format(model_name), acc=acc)
def main(opts): model_name = 'E2E-MLT' # net = ModelResNetSep2(attention=True) net = ModelResNetSep_crnn( attention=True, multi_scale=True, num_classes=400, fixed_height=norm_height, net='densenet', ) # net = ModelResNetSep_final(attention=True) print("Using {0}".format(model_name)) ctc_loss = nn.CTCLoss() if opts.cuda: net.to(device) ctc_loss.to(device) learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='max', factor=0.5, patience=4, verbose=True) scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0006, max_lr=0.001, step_size_up=3000, cycle_momentum=False) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) # net_dict = net.state_dict() step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) # step_start, learning_rate = net_utils.load_net(args.model, net, None) # # step_start = 0 net_utils.adjust_learning_rate(optimizer, learning_rate) net.train() data_generator = data_gen.get_batch(num_workers=opts.num_readers, input_size=opts.input_size, batch_size=opts.batch_size, train_list=opts.train_path, geo_type=opts.geo_type, normalize=opts.normalize) dg_ocr = ocr_gen.get_batch(num_workers=2, batch_size=opts.ocr_batch_size, train_list=opts.ocr_feed_list, in_train=True, norm_height=norm_height, rgb=True, normalize=opts.normalize) # e2edata = E2Edataset(train_list=opts.eval_path, normalize= opts.normalize) # e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=True, collate_fn=E2Ecollate # ) train_loss = 0 train_loss_temp = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 1 # ctc_loss = CTCLoss() ctc_loss_val = 0 ctc_loss_val2 = 0 ctcl = torch.tensor([0]) box_loss_val = 0 good_all = 0 gt_all = 0 train_loss_lr = 0 cntt = 0 time_total = 0 now = time.time() for step in range(step_start, opts.max_iters): # scheduler.batch_step() # batch images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next( data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute( 0, 3, 1, 2) start = timeit.timeit() # cv2.imshow('img', images) try: seg_pred, roi_pred, angle_pred, features = net(im_data) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue end = timeit.timeit() # backward smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda) training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda) angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda) geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda) try: # ? loss loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue # @ loss_val if not (torch.isnan(loss) or torch.isinf(loss)): train_loss_temp += loss.data.cpu().numpy() optimizer.zero_grad() try: if step > 1000 or True: # this is just extra augumentation step ... in early stage just slows down training ctcl, gt_b_good, gt_b_all = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, debug=opts.debug) # ? loss loss = loss + ctcl gt_all += gt_b_all good_all += gt_b_good imageso, labels, label_length = next(dg_ocr) im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute( 0, 3, 1, 2) # features = net.forward_features(im_data_ocr) labels_pred = net.forward_ocr(im_data_ocr) probs_sizes = torch.IntTensor([(labels_pred.size()[0])] * (labels_pred.size()[1])).long() label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()).long() labels = torch.IntTensor(torch.from_numpy( np.array(labels)).int()).long() loss_ocr = ctc_loss(labels_pred, labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5 loss_ocr.backward() # @ loss_val # ctc_loss_val2 += loss_ocr.item() loss.backward() clipping_value = 0.5 torch.nn.utils.clip_grad_norm_(net.parameters(), clipping_value) if opts.d1: print('loss_nan', torch.isnan(loss)) print('loss_inf', torch.isinf(loss)) print('lossocr_nan', torch.isnan(loss_ocr)) print('lossocr_inf', torch.isinf(loss_ocr)) if not (torch.isnan(loss) or torch.isinf(loss) or torch.isnan(loss_ocr) or torch.isinf(loss_ocr)): bbox_loss += net.box_loss_value.data.cpu().numpy() seg_loss += net.segm_loss_value.data.cpu().numpy() angle_loss += net.angle_loss_value.data.cpu().numpy() train_loss += train_loss_temp ctc_loss_val2 += loss_ocr.item() ctc_loss_val += ctcl.data.cpu().numpy()[0] # train_loss += loss.data.cpu().numpy()[0] #net.bbox_loss.data.cpu().numpy()[0] optimizer.step() scheduler.step() train_loss_temp = 0 cnt += 1 except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step % disp_interval == 0: if opts.debug: segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) cv2.imshow('segm_map', segm) segm_res = cv2.resize(score_maps[0], (images.shape[2], images.shape[1])) mask = np.argwhere(segm_res > 0) x_data = im_data.data.cpu().numpy()[0] x_data = x_data.swapaxes(0, 2) x_data = x_data.swapaxes(0, 1) if opts.normalize: x_data += 1 x_data *= 128 x_data = np.asarray(x_data, dtype=np.uint8) x_data = x_data[:, :, ::-1] im_show = x_data try: im_show[mask[:, 0], mask[:, 1], 1] = 255 im_show[mask[:, 0], mask[:, 1], 0] = 0 im_show[mask[:, 0], mask[:, 1], 2] = 0 except: pass cv2.imshow('img0', im_show) cv2.imshow('score_maps', score_maps[0] * 255) cv2.imshow('train_mask', training_masks[0] * 255) cv2.waitKey(10) train_loss /= cnt bbox_loss /= cnt seg_loss /= cnt angle_loss /= cnt ctc_loss_val /= cnt ctc_loss_val2 /= cnt box_loss_val /= cnt train_loss_lr += (train_loss) cntt += 1 time_now = time.time() - now time_total += time_now now = time.time() for param_group in optimizer.param_groups: learning_rate = param_group['lr'] save_log = os.path.join(opts.save_path, 'loss.txt') f = open(save_log, 'a') f.write( 'epoch %d[%d], lr: %f, loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f, lv2: %.3f, time: %.2f s, cnt: %d\n' % (step / batch_per_epoch, step, learning_rate, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, good_all / max(1, gt_all), ctc_loss_val2, time_now, cnt)) f.close() try: print( 'epoch %d[%d], lr: %f, loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f, lv2: %.3f, time: %.2f s, cnt: %d\n' % (step / batch_per_epoch, step, learning_rate, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, good_all / max(1, gt_all), ctc_loss_val2, time_now, cnt)) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 ctc_loss_val2 = 0 good_all = 0 gt_all = 0 box_loss_val = 0 # if step % valid_interval == 0: # validate(opts.valid_list, net) if step > step_start and (step % batch_per_epoch == 0): for param_group in optimizer.param_groups: learning_rate = param_group['lr'] print('learning_rate', learning_rate) save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) #evaluate re_tpe2e, re_tp, re_e1, precision = evaluate_e2e_crnn( root=args.eval_path, net=net, norm_height=norm_height, name_model=save_name, normalize=args.normalize, save_dir=args.save_path) # CER,WER = evaluate_crnn(e2edataloader,net) # scheduler.step(re_tpe2e) f = open(save_log, 'a') f.write( 'time epoch [%d]: %.2f s, loss_total: %.3f, lr:%f, re_tpe2e = %f, re_tp = %f, re_e1 = %f, precision = %f\n' % (step / batch_per_epoch, time_total, train_loss_lr / cntt, learning_rate, re_tpe2e, re_tp, re_e1, precision)) f.close() print( 'time epoch [%d]: %.2f s, loss_total: %.3f, re_tpe2e = %f, re_tp = %f, re_e1 = %f, precision = %f' % (step / batch_per_epoch, time_total, train_loss_lr / cntt, re_tpe2e, re_tp, re_e1, precision)) #print('time epoch [%d]: %.2f s, loss_total: %.3f' % (step / batch_per_epoch, time_total,train_loss_lr/cntt)) print('save model: {}'.format(save_name)) time_total = 0 cntt = 0 train_loss_lr = 0 net.train()
def run_model_input_image(im, show_boxes=False): predictions = {} parser = argparse.ArgumentParser() parser.add_argument('-cuda', type=int, default=1) parser.add_argument('-model', default='e2e-mlt-rctw.h5') parser.add_argument('-segm_thresh', default=0.5) font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18) args = parser.parse_args() net = ModelResNetSep2(attention=True) net_utils.load_net(args.model, net) net = net.eval() if args.cuda: print('Using cuda ...') net = net.cuda() with torch.no_grad(): # im = Image.open(im) # im = im.convert('RGB') im = np.asarray(im) im = im[...,:3] im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False) images = np.asarray([im_resized], dtype=np.float) images /= 128 images -= 1 im_data = net_utils.np_to_variable(images, is_cuda=args.cuda).permute(0, 3, 1, 2) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) draw2 = np.copy(im_resized) boxes = get_boxes(segm, rbox, angle_pred, args.segm_thresh) img = Image.fromarray(draw2) draw = ImageDraw.Draw(img) #if len(boxes) > 10: # boxes = boxes[0:10] out_boxes = [] prediction_i = [] for box in boxes: pts = box[0:8] pts = pts.reshape(4, -1) det_text, conf, dec_s = ocr_image(net, codec, im_data, box) if len(det_text) == 0: continue width, height = draw.textsize(det_text, font=font2) center = [box[0], box[1]] draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2) out_boxes.append(box) # det_text is one prediction prediction_i.append(det_text.lower()) predictions["frame"] = prediction_i # show each image boxes and output in pop up window. show_image_with_boxes(img, out_boxes, show=show_boxes) print(predictions) return predictions
def main(opts): train_loss = 0 train_loss_lr = 0 cnt = 1 cntt = 0 time_total = 0 now = time.time() converter = strLabelConverter(codec) model_name = 'E2E-MLT' net = ModelResNetSep_crnn( attention=True, multi_scale=True, num_classes=400, fixed_height=opts.norm_height, net='densenet', ) ctc_loss = nn.CTCLoss() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.5 ,patience=5,verbose=True) scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.00007, max_lr=0.0003, step_size_up=3000, cycle_momentum=False) step_start = 0 if opts.cuda: net.to(device) ctc_loss.to(device) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) else: learning_rate = base_lr for param_group in optimizer.param_groups: param_group['lr'] = base_lr learning_rate = param_group['lr'] print(param_group['lr']) step_start = 0 net.train() # data_generator = ocr_gen.get_batch(num_workers=opts.num_readers, # batch_size=opts.batch_size, # train_list=opts.train_list, in_train=True, norm_height=opts.norm_height, rgb = True) data_dataset = ocrDataset(root=opts.train_list, norm_height=opts.norm_height, in_train=True) data_generator1 = torch.utils.data.DataLoader(data_dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=alignCollate()) val_dataset = ocrDataset(root=opts.valid_list, norm_height=opts.norm_height, in_train=False) val_generator1 = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=alignCollate()) for step in range(step_start, 300000): # images, labels, label_length = next(data_generator) # im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(0, 3, 1, 2) try: images, label = next(dataloader_iterator) except: dataloader_iterator = iter(data_generator1) images, label = next(dataloader_iterator) labels, label_length = converter.encode(label) im_data = images.to(device) labels_pred = net.forward_ocr(im_data) # backward probs_sizes = torch.IntTensor([(labels_pred.size()[0])] * (labels_pred.size()[1])) label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss = ctc_loss(labels_pred, labels, probs_sizes, label_sizes) / im_data.size(0) # change 1.9. optimizer.zero_grad() loss.backward() clipping_value = 1.0 torch.nn.utils.clip_grad_norm_(net.parameters(), clipping_value) if not (torch.isnan(loss) or torch.isinf(loss)): optimizer.step() scheduler.step() train_loss += loss.data.cpu().numpy( ) #net.bbox_loss.data.cpu().numpy()[0] # train_loss += loss.data.cpu().numpy()[0] #net.bbox_loss.data.cpu().numpy()[0] cnt += 1 if opts.debug: dbg = labels_pred.permute(1, 2, 0).data.cpu().numpy() ctc_f = dbg.swapaxes(1, 2) labels = ctc_f.argmax(2) det_text, conf, dec_s, _ = print_seq_ext(labels[0, :], codec) print('{0} \t'.format(det_text)) if step % disp_interval == 0: for param_group in optimizer.param_groups: learning_rate = param_group['lr'] train_loss /= cnt train_loss_lr += train_loss cntt += 1 time_now = time.time() - now time_total += time_now now = time.time() save_log = os.path.join(opts.save_path, 'loss_ocr.txt') # f = open('content/drive/My_Drive/DATA_OCR/backup/ca ca/loss.txt','a') f = open(save_log, 'a') f.write( 'epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % (step / batch_per_epoch, step, train_loss, time_now, learning_rate, cnt)) f.close() print( 'epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % (step / batch_per_epoch, step, train_loss, time_now, learning_rate, cnt)) train_loss = 0 cnt = 1 if step > step_start and (step % batch_per_epoch == 0): for param_group in optimizer.param_groups: learning_rate = param_group['lr'] # print(learning_rate) save_name = os.path.join(opts.save_path, 'OCR_{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) # scheduler.step(train_loss_lr / cntt) # evaluate CER, WER = eval_ocr_crnn(val_generator1, net) # scheduler.step(CER) f = open(save_log, 'a') f.write( 'time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f' % (step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER)) f.close() print( 'time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f \n' % (step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER)) print('save model: {}'.format(save_name)) net.train() time_total = 0 cntt = 0 train_loss_lr = 0
def main(opts): nclass = len(alphabet) + 1 model_name = 'E2E-MLT' net = OwnModel(attention=True, nclass=nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) ### 第一种:只修改conv11的维度 # model_dict = net.state_dict() # if os.path.exists(opts.model): # # 载入预训练模型 # print('loading pretrained model from %s' % opts.model) # # pretrained_model = OwnModel(attention=True, nclass=7325) # pretrained_model = ModelResNetSep2(attention=True, nclass=7500) # pretrained_model.load_state_dict(torch.load(opts.model)['state_dict']) # pretrained_dict = pretrained_model.state_dict() # # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'conv11' not in k and 'rnn' not in k} # # 2. overwrite entries in the existing state dict # model_dict.update(pretrained_dict) # # 3. load the new state dict # net.load_state_dict(model_dict) ### 第二种:直接接着前面训练 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ### step_start = 0 net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() e2edata = E2Edataset(train_list=opts.train_list) e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=4, shuffle=True, collate_fn=E2Ecollate) train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 ctc_loss_val2 = 0 box_loss_val = 0 gt_g_target = 0 gt_g_proc = 0 for step in range(step_start, opts.max_iters): loss = 0 # batch images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(data_generator) im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=opts.cuda) # im_data = torch.from_numpy(images).type(torch.FloatTensor).permute(0, 3, 1, 2).cuda() # permute(0,3,1,2)和cuda的先后顺序有影响 start = timeit.timeit() try: seg_pred, roi_pred, angle_pred, features = net(im_data) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue end = timeit.timeit() # for EAST loss smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda) training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda) angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda) geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda) try: loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue bbox_loss += net.box_loss_value.data.cpu().numpy() seg_loss += net.segm_loss_value.data.cpu().numpy() angle_loss += net.angle_loss_value.data.cpu().numpy() train_loss += loss.data.cpu().numpy() try: # 10000步之前都是用文字的标注区域训练的 if step > 10000 or True: #this is just extra augumentation step ... in early stage just slows down training # ctcl, gt_target , gt_proc = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, converter, debug=opts.debug) ctcl= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True) gt_target = 1 gt_proc = 1 ctc_loss_val += ctcl.data.cpu().numpy()[0] loss = ctcl gt_g_target = gt_target gt_g_proc = gt_proc train_loss += ctcl.item() # -训练ocr识别部分的时候,采用一个data_generater生成 # imageso, labels, label_length = next(dg_ocr) # 其中应该有对倾斜文本的矫正 # im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute(0, 3, 1, 2) # features = net.forward_features(im_data_ocr) # labels_pred = net.forward_ocr(features) # probs_sizes = torch.IntTensor( [(labels_pred.permute(2,0,1).size()[0])] * (labels_pred.permute(2,0,1).size()[1]) ) # label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int() ) # labels = torch.IntTensor( torch.from_numpy(np.array(labels)).int() ) # loss_ocr = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5 # loss_ocr.backward() # ctc_loss_val2 += loss_ocr.item() net.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass cnt += 1 if step % disp_interval == 0: if opts.debug: segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) cv2.imshow('segm_map', segm) segm_res = cv2.resize(score_maps[0], (images.shape[2], images.shape[1])) mask = np.argwhere(segm_res > 0) x_data = im_data.data.cpu().numpy()[0] x_data = x_data.swapaxes(0, 2) x_data = x_data.swapaxes(0, 1) x_data += 1 x_data *= 128 x_data = np.asarray(x_data, dtype=np.uint8) x_data = x_data[:, :, ::-1] im_show = x_data try: im_show[mask[:, 0], mask[:, 1], 1] = 255 im_show[mask[:, 0], mask[:, 1], 0] = 0 im_show[mask[:, 0], mask[:, 1], 2] = 0 except: pass cv2.imshow('img0', im_show) cv2.imshow('score_maps', score_maps[0] * 255) cv2.imshow('train_mask', training_masks[0] * 255) cv2.waitKey(10) train_loss /= cnt bbox_loss /= cnt seg_loss /= cnt angle_loss /= cnt ctc_loss_val /= cnt ctc_loss_val2 /= cnt box_loss_val /= cnt try: print('epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, gt_t/gt_proc:[%d/%d] lv2 %.3f' % ( step / batch_per_epoch, step, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, gt_g_target, gt_g_proc , ctc_loss_val2)) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 good_all = 0 gt_all = 0 box_loss_val = 0 # for save mode # validate(opts.valid_list, net) if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = {'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_name) print('save model: {}'.format(save_name))
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-cuda', type=int, default=1) parser.add_argument('-model', default='e2e-mlt.h5') parser.add_argument('-segm_thresh', default=0.5) font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18) args = parser.parse_args() net = ModelResNetSep2(attention=True) device = 'cpu' if args.cuda: device = 'cuda:' + str(args.cuda) net_utils.load_net(args.model, net, device=device) net = net.eval() if args.cuda: print('Using cuda ...') net = net.cuda() cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_AUTOFOCUS, 1) ret, im = cap.read() frame_no = 0 with torch.no_grad(): while ret: ret, im = cap.read()
def main(opts): model_name = 'OCT-E2E-MLT' net = OctMLT(attention=True) print("Using {0}".format(model_name)) learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net) if opts.cuda: net.cuda() net.train() data_generator = data_gen.get_batch(num_workers=opts.num_readers, input_size=opts.input_size, batch_size=opts.batch_size, train_list=opts.train_list, geo_type=opts.geo_type) dg_ocr = ocr_gen.get_batch(num_workers=2, batch_size=opts.ocr_batch_size, train_list=opts.ocr_feed_list, in_train=True, norm_height=norm_height, rgb=True) train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss = CTCLoss() ctc_loss_val = 0 box_loss_val = 0 good_all = 0 gt_all = 0 best_step = step_start best_loss = 1000000 best_model = net.state_dict() best_optimizer = optimizer.state_dict() best_learning_rate = learning_rate max_patience = 3000 early_stop = False for step in range(step_start, opts.max_iters): # batch images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next( data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute( 0, 3, 1, 2) start = timeit.timeit() try: seg_pred, roi_pred, angle_pred, features = net(im_data) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue end = timeit.timeit() # backward smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda) training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda) angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda) geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda) try: loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue bbox_loss += net.box_loss_value.data.cpu().numpy() seg_loss += net.segm_loss_value.data.cpu().numpy() angle_loss += net.angle_loss_value.data.cpu().numpy() train_loss += loss.data.cpu().numpy() optimizer.zero_grad() try: if step > 10000: #this is just extra augumentation step ... in early stage just slows down training ctcl, gt_b_good, gt_b_all = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, debug=opts.debug) ctc_loss_val += ctcl.data.cpu().numpy()[0] loss = loss + ctcl gt_all += gt_b_all good_all += gt_b_good imageso, labels, label_length = next(dg_ocr) im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute( 0, 3, 1, 2) features = net.forward_features(im_data_ocr) labels_pred = net.forward_ocr(features) probs_sizes = torch.IntTensor( [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])) label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss_ocr = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5 loss_ocr.backward() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass cnt += 1 if step % disp_interval == 0: if opts.debug: segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) cv2.imshow('segm_map', segm) segm_res = cv2.resize(score_maps[0], (images.shape[2], images.shape[1])) mask = np.argwhere(segm_res > 0) x_data = im_data.data.cpu().numpy()[0] x_data = x_data.swapaxes(0, 2) x_data = x_data.swapaxes(0, 1) x_data += 1 x_data *= 128 x_data = np.asarray(x_data, dtype=np.uint8) x_data = x_data[:, :, ::-1] im_show = x_data try: im_show[mask[:, 0], mask[:, 1], 1] = 255 im_show[mask[:, 0], mask[:, 1], 0] = 0 im_show[mask[:, 0], mask[:, 1], 2] = 0 except: pass cv2.imshow('img0', im_show) cv2.imshow('score_maps', score_maps[0] * 255) cv2.imshow('train_mask', training_masks[0] * 255) cv2.waitKey(10) train_loss /= cnt bbox_loss /= cnt seg_loss /= cnt angle_loss /= cnt ctc_loss_val /= cnt box_loss_val /= cnt if train_loss < best_loss: best_step = step best_model = net.state_dict() best_loss = train_loss best_learning_rate = learning_rate best_optimizer = optimizer.state_dict() if best_step - step > max_patience: print("Early stopped criteria achieved.") save_name = os.path.join( opts.save_path, 'BEST_{}_{}.h5'.format(model_name, best_step)) state = { 'step': best_step, 'learning_rate': best_learning_rate, 'state_dict': best_model, 'optimizer': best_optimizer } torch.save(state, save_name) print('save model: {}'.format(save_name)) opts.max_iters = step early_stop = True try: print( 'epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f in %.3f' % (step / batch_per_epoch, step, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, good_all / max(1, gt_all), end - start)) print('max_memory_allocated {}'.format( torch.cuda.max_memory_allocated())) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 good_all = 0 gt_all = 0 box_loss_val = 0 #if step % valid_interval == 0: # validate(opts.valid_list, net) if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'max_memory_allocated': torch.cuda.max_memory_allocated() } torch.save(state, save_name) print('save model: {}\tmax memory: {}'.format( save_name, torch.cuda.max_memory_allocated())) if not early_stop: save_name = os.path.join(opts.save_path, '{}.h5'.format(model_name)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name))
action='store_true', default=True, help='use cuda for inference') args = parser.parse_args() """ For test images in a folder """ image_list, _, _, name_list = file_utils.get_files(args.demo_folder) file_utils.rm_all_dir(dir='./result/') # clean directories for next test file_utils.mkdir(dir=[ './result/', './result/bubbles/', './result/cuts/', './result/demo/', './result/chars/' ]) # load net models = net_utils.load_net(args) # initialize and load weights spaces = [] # text recognition spacing word text_warp_items = [] # text to warp bubble image demos = [] # all demo image storage t = time.time() cnt = 0 # load data for k, image_path in enumerate(image_list): print("TEST IMAGE ({:d}/{:d}): INPUT PATH:[{:s}]".format( k + 1, len(image_list), image_path), end='\r') img = imgproc.loadImage(image_path)