def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) #### # if args.cuda: # net.load_state_dict(copyStateDict(torch.load(modelpara))) # else: # net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu'))) # # if args.cuda: # net = net.cuda() # net = torch.nn.DataParallel(net) # cudnn.benchmark = False ### device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = net.to(device) net.eval() #stop update the weight of the neuron t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) print("\n bboxes = ", bboxes, "\n poly = ", polys, "\n text = ", score_text, "\n text.shape = ", score_text.shape) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) print("save in" + result_folder) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
class Ocr: def __init__(self): super().__init__() manager = Manager() self.send = manager.list() self.date = manager.list() self.quote = manager.list() self.number = manager.list() self.header = manager.list() self.sign = manager.list() self.device = torch.device('cpu') state_dict = torch.load( '/home/dung/Project/Python/ocr/craft_mlt_25k.pth') if list(state_dict.keys())[0].startswith("module"): start_idx = 1 else: start_idx = 0 new_state_dict = OrderedDict() for k, v in state_dict.items(): name = ".".join(k.split(".")[start_idx:]) new_state_dict[name] = v self.craft = CRAFT() self.craft.load_state_dict(new_state_dict) self.craft.to(self.device) self.craft.eval() self.craft.share_memory() self.config = Cfg.load_config_from_name('vgg_transformer') self.config[ 'weights'] = 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA' self.config['device'] = 'cpu' self.config['predictor']['beamsearch'] = False self.weights = '/home/dung/Documents/transformerocr.pth' # self.model, self.vocab = build_model(self.config) def predict(self, model, vocab, seq, key, idx, img): img = process_input(img, self.config['dataset']['image_height'], self.config['dataset']['image_min_width'], self.config['dataset']['image_max_width']) img = img.to(self.config['device']) with torch.no_grad(): src = model.cnn(img) memory = model.transformer.forward_encoder(src) translated_sentence = [[1] * len(img)] max_length = 0 while max_length <= 128 and not all( np.any(np.asarray(translated_sentence).T == 2, axis=1)): tgt_inp = torch.LongTensor(translated_sentence).to(self.device) output = model.transformer.forward_decoder(tgt_inp, memory) output = output.to('cpu') values, indices = torch.topk(output, 5) indices = indices[:, -1, 0] indices = indices.tolist() translated_sentence.append(indices) max_length += 1 del output translated_sentence = np.asarray(translated_sentence).T s = translated_sentence[0].tolist() s = vocab.decode(s) seq[idx] = s # print(time.time() - time1) def process(self, craft, seq, key, sub_img): img_resized, target_ratio, size_heatmap = resize_aspect_ratio( sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.) ratio_h = ratio_w = 1 / target_ratio x = normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w] x = x.to(self.device) y, feature = craft(x) score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() boxes, polys = getDetBoxes(score_text, score_link, text_threshold=0.7, link_threshold=0.4, low_text=0.4, poly=False) boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] result = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) result.append(poly) horizontal_list, free_list = group_text_box(result, slope_ths=0.8, ycenter_ths=0.5, height_ths=1, width_ths=1, add_margin=0.1) # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0] min_size = 20 if min_size: horizontal_list = [ i for i in horizontal_list if max(i[1] - i[0], i[3] - i[2]) > 10 ] free_list = [ i for i in free_list if max(diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size ] seq[:] = [None] * len(horizontal_list) model, vocab = build_model(self.config) model.load_state_dict( torch.load(self.weights, map_location=torch.device('cpu'))) for i, ele in enumerate(horizontal_list): ele = [0 if i < 0 else i for i in ele] img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img.astype(np.uint8)) p = threading.Thread(target=self.predict, args=(model, vocab, seq, key, i, img)) p.start() p.join() # print(time.time() - time1) def forward(self, img, rs): # time1 = time.time() for key, v in rs.items(): x0, y0, x1, y1 = v if key == 'send': p = mp.Process(target=self.process, args=( self.craft, self.send, key, img[y0:y1, x0:x1, :], )) elif key == 'date': p = mp.Process(target=self.process, args=( self.craft, self.date, key, img[y0:y1, x0:x1, :], )) elif key == 'quote': p = mp.Process(target=self.process, args=( self.craft, self.date, key, img[y0:y1, x0:x1, :], )) elif key == 'number': p = mp.Process(target=self.process, args=( self.craft, self.date, key, img[y0:y1, x0:x1, :], )) elif key == 'header': p = mp.Process(target=self.process, args=( self.craft, self.date, key, img[y0:y1, x0:x1, :], )) elif key == 'sign': p = mp.Process(target=self.process, args=( self.craft, self.date, key, img[y0:y1, x0:x1, :], )) p.start() p.join() return self.send[:], self.date[:], self.quote[:], self.number[:], self.header[:], self.sign[:]
print('Load the synthetic data ...') data_loader = Synth80k('D:/Datasets/SynthText') train_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=True, num_workers=0, drop_last=True, pin_memory=True) batch_syn = iter(train_loader) print('Prepare the net ...') net = CRAFT() net.load_state_dict(copyStateDict( torch.load('./weigths/synweights/0.pth'))) net.to(device) data_parallel = False if torch.cuda.device_count() > 1: net = nn.DataParallel(net) data_parallel = True cudnn.benchmark = False print('Load the real data') real_data = ICDAR2013(net, 'D:/Datasets/ICDAR_2013') real_data_loader = torch.utils.data.DataLoader(real_data, batch_size=5, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
if not os.path.isdir(args.store_sample): os.system('mkdir {0}'.format(args.store_sample)) dataset = ImageLoader_synthtext(args) assert dataset data_loader = torch.utils.data.DataLoader(dataset, args.batch_size, num_workers=4, shuffle=True, collate_fn=collate) device = 'cuda' if torch.cuda.is_available() else 'cpu' criterion = torch.nn.MSELoss(reduction='mean') criterion = criterion.to(device) craft = CRAFT(pretrained=True) if args.go_on != '': print('loading pretrained model from %s' % args.pre_model) craft.load_state_dict(torch.load(args.pre_model), strict=False) craft = craft.to(device) loss_avg = averager() optimizer = optim.Adam(craft.parameters(), lr=args.lr) def train_batch(data): div = 10 craft.train() img, char_label, interval_label = data img = img.to(device) char_label = char_label.to(device) interval_label = interval_label.to(device) img.requires_grad_() optimizer.zero_grad() preds, _ = craft(img)
config={'is_training':True, 'image_path':'/home/lbh/dataset/icdar2015'} dataset2 = ImageLoader2(config) assert dataset2 data_loader2 = torch.utils.data.DataLoader(dataset2, args.batch_size2, num_workers=4, shuffle=True, collate_fn=collate2) device = 'cuda' if torch.cuda.is_available() else 'cpu' criterion = torch.nn.MSELoss(reduction='mean') criterion = criterion.to(device) craft1 = CRAFT(pretrained=True) craft2 = CRAFT(pretrained=True) if args.go_on != '': print('loading pretrained model from %s' % args.pre_model1) print('loading pretrained model from %s' % args.pre_model2) craft1.load_state_dict(torch.load(args.pre_model1), strict=False) craft2.load_state_dict(torch.load(args.pre_model2), strict=False) craft1 = craft1.to(device) craft2 = craft2.to(device) loss_avg = averager() optimizer = optim.Adam(craft2.parameters(), lr=args.lr) def train_batch1(data): craft2.train() img, char_label, interval_label = data img = img.to(device) char_label = char_label.to(device) interval_label = interval_label.to(device) img.requires_grad_() optimizer.zero_grad() preds, _ = craft2(img) cost_char = criterion(preds[:,:,:,0], char_label).sum()
def train(train_img_path, train_gt_path, pths_path, batch_size, lr, num_workers, epoch_iter, save_interval): filenum = len(os.listdir(train_img_path)) trainset = custom_dataset(train_img_path, train_gt_path) train_loader = data.DataLoader(trainset, batch_size=batch_size, \ shuffle=True, num_workers=num_workers, drop_last=True) criterion = Maploss() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = CRAFT() data_parallel = False if torch.cuda.device_count() > 1: model = nn.DataParallel(model) data_parallel = True model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[epoch_iter // 2], gamma=0.1) step_index = 0 for epoch in range(epoch_iter): if epoch % 50 == 0 and epoch != 0: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) model.train() scheduler.step() epoch_loss = 0 epoch_time = time.time() for i, (img, gt_score, gt_geo, ignored_map) in enumerate(train_loader): start_time = time.time() img, gt_score, gt_geo, ignored_map = img.to(device), gt_score.to( device), gt_geo.to(device), ignored_map.to(device) pred_score, pred_geo = model(img) loss = criterion(gt_score, pred_score, gt_geo, pred_geo, ignored_map) epoch_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch is [{}/{}], mini-batch is [{}/{}], time consumption is {:.8f}, batch_loss is {:.8f}'.format(\ epoch+1, epoch_iter, i+1, int(file_num/batch_size), time.time()-start_time, loss.item())) print('epoch_loss is {:.8f}, epoch_time is {:.8f}'.format( epoch_loss / int(file_num / batch_size), time.time() - epoch_time)) print(time.asctime(time.localtime(time.time()))) print('=' * 50) if (epoch + 1) % interval == 0: state_dict = model.module.state_dict( ) if data_parallel else model.state_dict() torch.save( state_dict, os.path.join(pths_path, 'model_epoch_{}.pth'.format(epoch + 1)))