result_folder = './Results' if not os.path.isdir(result_folder): os.mkdir(result_folder) if __name__ == '__main__': data=pd.DataFrame(columns=['image_name', 'word_bboxes', 'pred_words', 'align_text']) data['image_name'] = image_names # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if args.refine: from refinenet import RefineNet refine_net = RefineNet()
def ground_truth(args): # initiate pretrained network net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() filelist, _, _ = file_utils.list_files('/home/ubuntu/Kyumin/Autotation/data/IC13/images') for img_name in filelist: # get datapath if 'train' in img_name: label_name = img_name.replace('images/train/', 'labels/train/gt_').replace('jpg', 'txt') else: label_name = img_name.replace('images/test/', 'labels/test/gt_').replace('jpg', 'txt') label_dir = img_name.replace('Autotation', 'craft').replace('images', 'labels').replace('.jpg', '/') os.makedirs(label_dir, exist_ok=True) image = imgproc.loadImage(img_name) gt_boxes = [] gt_words = [] with open(label_name, 'r', encoding='utf-8-sig') as f: lines = f.readlines() for line in lines: if 'IC13' in img_name: # IC13 gt_box, gt_word, _ = line.split('"') if 'train' in img_name: x1, y1, x2, y2 = [int(a) for a in gt_box.strip().split(' ')] else: x1, y1, x2, y2 = [int(a.strip()) for a in gt_box.split(',') if a.strip().isdigit()] gt_boxes.append(np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])) gt_words.append(gt_word) elif 'IC15' in img_name: gt_data = line.strip().split(',') gt_box = gt_data[:8] if len(gt_data) > 9: gt_word = ','.join(gt_data[8:]) else: gt_word = gt_data[-1] gt_box = [int(a) for a in gt_box] gt_box = np.reshape(np.array(gt_box), (4, 2)) gt_boxes.append(gt_box) gt_words.append(gt_word) score_region, score_link, conf_map = generate_gt(net, image, gt_boxes, gt_words, args) torch.save(score_region, label_dir + 'region.pt') torch.save(score_link, label_dir + 'link.pt') torch.save(conf_map, label_dir + 'conf.pt')
use_cuda = torch.cuda.is_available() device = 'cuda:0' if use_cuda else 'cpu' print('Load the synthetic data ...') data_loader = Synth80k('D:/Datasets/SynthText') train_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=True, num_workers=0, drop_last=True, pin_memory=True) batch_syn = iter(train_loader) print('Prepare the net ...') net = CRAFT() net.load_state_dict(copyStateDict( torch.load('./weigths/synweights/0.pth'))) net.to(device) data_parallel = False if torch.cuda.device_count() > 1: net = nn.DataParallel(net) data_parallel = True cudnn.benchmark = False print('Load the real data') real_data = ICDAR2013(net, 'D:/Datasets/ICDAR_2013') real_data_loader = torch.utils.data.DataLoader(real_data, batch_size=5, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
torch.save(score_region, label_dir + 'region.pt') torch.save(score_link, label_dir + 'link.pt') torch.save(conf_map, label_dir + 'conf.pt') if __name__ == '__main__': import ocr score_region = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/region.pt') score_link = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/link.pt') conf_map = torch.load('/home/ubuntu/Kyumin/craft/data/IC13/labels/train/100/conf.pt') image = imgproc.loadImage('/home/ubuntu/Kyumin/Autotation/data/IC13/images/train/100.jpg') print(score_region.shape, score_link.shape, conf_map.shape) # cv2.imshow('original', image) cv2.imshow('region', imgproc.cvt2HeatmapImg(score_region)) cv2.imshow('link', score_link) cv2.imshow('conf', conf_map) net = CRAFT().cuda() net.load_state_dict(test.copyStateDict(torch.load('weights/craft_mlt_25k.pth'))) net.eval() _, _, ref_text, ref_link, _ = test.test_net(net, image, ocr.argument_parser().parse_args()) cv2.imshow('ref text', imgproc.cvt2HeatmapImg(ref_text)) cv2.imshow('ref link', ref_link) cv2.waitKey(0) cv2.destroyAllWindows()
def train(args): # load net net = CRAFT() # initialize if not os.path.exists(args.trained_model): args.trained_model = None if args.trained_model is not None: print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False # # LinkRefiner # refine_net = None # if args.refine: # from refinenet import RefineNet # # refine_net = RefineNet() # print('Loading weights of refiner from checkpoint (' + args.refiner_model + ')') # if args.cuda: # refine_net.load_state_dict(test.copyStateDict(torch.load(args.refiner_model))) # refine_net = refine_net.cuda() # refine_net = torch.nn.DataParallel(refine_net) # else: # refine_net.load_state_dict(test.copyStateDict(torch.load(args.refiner_model, map_location='cpu'))) # # args.poly = True criterion = craft_utils.CRAFTLoss() optimizer = optim.Adam(net.parameters(), args.learning_rate) train_data = CRAFTDataset(args) dataloader = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True) t0 = time.time() for epoch in range(args.max_epoch): pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f'Epoch {epoch}') running_loss = 0.0 for i, data in pbar: x, y_region, y_link, y_conf = data x = x.cuda() y_region = y_region.cuda() y_link = y_link.cuda() y_conf = y_conf.cuda() optimizer.zero_grad() y, feature = net(x) score_text = y[:, :, :, 0] score_link = y[:, :, :, 1] L = criterion(score_text, score_link, y_region, y_link, y_conf) L.backward() optimizer.step() running_loss += L.data.item() if i % 2000 == 1999 or i == len(dataloader) - 1: pbar.set_postfix_str('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / min(i + 1, 2000))) running_loss = 0.0 # Save trained model torch.save(net.state_dict(), args.weight) print(f'training finished\n {time.time() - t0} spent for {args.max_epoch} epochs')