def __init__(self, root, list_file_name_path, size=(512, 512), transform=None): self.root = root self.list_file_name_path = list_file_name_path self.size = size self.transform = transform # input self.lbl_fol = osp.join(root, 'labels') self.img_fol = osp.join(root, 'images') self.tensor_fol = osp.join(root, 'tensor_input') self.semantic_fol = osp.join(root, 'semantic_gt') self.obj_fol = osp.join(root, 'obj_gt') self.corpus_path = osp.join(root, 'corpus.json') self.target_path = osp.join(root, 'target.json') self.target2idx_path = osp.join(root, 'target2idx.json') self.file_names = self.get_file_names(self.root, self.list_file_name_path) self.img_lst = self.get_category_file_paths(self.root, self.img_fol, self.file_names, '.png') self.tensor_lst = self.get_category_file_paths(self.root, self.tensor_fol, self.file_names, '.pt') self.semantic_lst = self.get_category_file_paths(self.root, self.semantic_fol, self.file_names, '.png') self.obj_lst = self.get_category_file_paths(self.root, self.obj_fol, self.file_names, '.json') self.idx2name = {} for idx, path in enumerate(self.tensor_lst): name = osp.basename(path).split('.')[0] self.idx2name[idx] = name self.corpus = read_json(self.corpus_path) self.target = read_json(self.target_path) self.target2idx = read_json(self.target2idx_path) self.enc = OneHotEncoder(self.corpus) self.datagenerator = MaskGenerator()
def __init__(self, corpus_path, target_path, model_path, **kwargs): self.char2idx = read_json(kwargs['char2idx_path']) self.corpus = read_json(corpus_path) self.target = read_json(target_path) self.model = Chargrid2D(len(self.corpus) + 1, len(self.target)) if kwargs['device'] == 'cpu': self.model.load_state_dict( torch.load(model_path, map_location='cpu')) else: self.model.load_state_dict(torch.load(model_path)) self.device = kwargs['device'] self.model.to(self.device) self.size = 512 self.aug = alb.Compose([ alb.LongestMaxSize(self.size + 24, interpolation=0), alb.PadIfNeeded(self.size + 24, self.size + 24, border_mode=cv2.BORDER_CONSTANT), alb.RandomCrop(self.size, self.size, p=0.3), alb.Resize(self.size, self.size, 0) ]) self.enc = OneHotEncoder(self.corpus) self.all_color = [ (0, 0, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255), (255, 0, 255), (255, 255, 0), (127, 255, 212), (69, 139, 116), (131, 139, 139), (227, 207, 87), (139, 125, 107), (138, 43, 226), (156, 102, 31), (165, 42, 42), (255, 64, 64), (255, 97, 3), (127, 255, 0), (238, 18, 137), (128, 128, 128), (34, 139, 34), (139, 105, 20), (255, 105, 180), (60, 179, 113), (139, 0, 0), (0, 139, 0), (0, 0, 139), ] self.all_color = self.all_color * ( len(self.target) // len(self.all_color) + 1)
def convert_lbl(self, lbl_fol, std_lbl_fol): all_files = glob.glob(osp.join(lbl_fol, '*.json')) for idx, file_path in enumerate(all_files): name = osp.basename(file_path) print(name) lbl_path = file_path img_path = osp.join(img_fol, name.replace('.json', '.png')) if not osp.exists(img_path): img_path = osp.join(img_fol, name.replace('.json', '.jpg')) print( f'Converting data......File {idx}/Total {len(all_files)}....Progress: {int(idx/len(all_files)*100)}%' ) print(lbl_path) print(img_path) if not osp.exists(lbl_path) or not osp.exists(img_path): print('Image or Label is not exist') exit self.path_lbls.append(lbl_path) self.path_imgs.append(img_path) lbl_data = self.__convert_data(read_json(lbl_path)) write_json(osp.join(std_lbl_fol, name), lbl_data) self.path_std_lbls.append(osp.join(std_lbl_fol, name))
def process(self, img_path, textline_path): img = cv2.imread(img_path) textlines = read_json(textline_path) doc_h, doc_w = img.shape[0], img.shape[1] mask = np.zeros((doc_h, doc_w), dtype='int16') for item in textlines: w, h = item['location'][2], item['location'][3] char_w, char_h = int(w / (len(item['value']) + 1)), int(h) cur_x, cur_y = int(item['location'][0]), int(item['location'][1]) for char in item['value']: mask[cur_y:cur_y + char_h, cur_x:cur_x + char_w] = self.get_char2idx(char) cur_x += char_w tensor = torch.from_numpy(mask) img = transforms.functional.to_pil_image(tensor) img = np.asarray(img) augmented = self.aug(image=img) img = augmented['image'].astype('int16') img = torch.from_numpy(img).type(torch.LongTensor) img = img.unsqueeze(0) img = self.enc.process(img) img = img.unsqueeze(0) img = img.to(self.device) output = self.model.forward(img) # print(output.shape()) pred = output[0].data.max(1)[1].cpu().numpy().reshape(512, 512) return pred
def __getitem__(self, idx): name = self.idx2name[idx] # print(name) tensor_path = osp.join(self.tensor_fol, name + '.pt') semantic_path = osp.join(self.semantic_fol, name + '.png') obj_path = osp.join(self.obj_fol, name + '.json') tensor = torch.load(tensor_path) semantic = Image.open(semantic_path) obj = read_json(obj_path) img = transforms.functional.to_pil_image(tensor) img = np.asarray(img) mask = np.asarray(semantic) ori_boxes, label_boxes = self.__getobjcoor__(obj) ori_boxes = ori_boxes if self.transform: augmented = self.transform(image=img, mask=mask, bboxes=ori_boxes, lbl_id=label_boxes) img = augmented['image'].astype('int16') mask = augmented['mask'].astype('int16') boxes = augmented['bboxes'] lbl_boxes = augmented['lbl_id'] img, mask = torch.from_numpy(img).type(torch.LongTensor), torch.from_numpy(mask) # boxes = np.swapaxes(boxes, 0, 1) # x_min, y_min, width, height -> we need to return 4 coordinates boxes, lbl_boxes = torch.from_numpy(np.array(boxes)).type(torch.LongTensor), torch.from_numpy( np.array(lbl_boxes)) img = img.unsqueeze(0) img = self.enc.process(img) return img, mask, torch.tensor([]), torch.tensor([]) # boxes, lbl_boxes
def __generate_object(self, img_path, lbl_path): name = osp.basename(img_path) name = name.split('.')[0] img = cv2.imread(img_path, 0) doc_h, doc_w = img.shape mask = np.zeros((doc_h, doc_w), dtype='int16') gt = np.zeros((doc_h, doc_w), dtype='int16') obj = [] lbl_data = read_json(lbl_path) for item in lbl_data: w, h = item['location'][2], item['location'][3] char_w, char_h = int(w / (len(item['value']) + 1)), int(h) // 2 cur_x, cur_y = int(item['location'][0]), int(item['location'][1]) fm_key = item['formal_key'] k_type = item['key_type'] if fm_key == 'other': cl = 'other' else: cl = k_type + '_' + fm_key for char in item['value']: mask[cur_y:cur_y + char_h, cur_x:cur_x + char_w] = self.get_char2idx(char) gt[cur_y:cur_y + char_h, cur_x:cur_x + char_w] = self.target2idx[cl] cur_x += char_w std_item = { 'text': item['value'], 'box': [ int(item['location'][0]), int(item['location'][1]), int(item['location'][2]), int(item['location'][3]) ], 'class': cl } obj.append(std_item) tensor = torch.from_numpy(mask) debug_img = np.zeros((doc_h, doc_w * 3)) debug_img[:, :doc_w] = img debug_img[:, doc_w:doc_w * 2] = mask debug_img[:, doc_w * 2:doc_w * 3] = 255 - gt return debug_img, tensor, gt, obj
def generate_target(self): for lbl_path in self.path_std_lbls: lbl_data = read_json(lbl_path) for item in lbl_data: fm_key = item['formal_key'] k_type = item['key_type'] if fm_key == 'other': cl = 'other' else: cl = k_type + '_' + fm_key if cl not in self.target: self.target.append(cl) self.target = sorted(self.target) for idx, target in enumerate(self.target): self.target2idx[target] = idx write_json('./data/target.json', self.target) write_json('./data/target2idx.json', self.target2idx)