def __init__(self, target_size=768, viz=False, debug=False): self.target_size = target_size self.viz = viz self.debug = debug self.gaussianTransformer = GaussianTransformer(imgSize=1024, region_threshold=0.35, affinity_threshold=0.15)
class craft_base_dataset(): def __init__(self, target_size=768, viz=False, debug=False): self.target_size = target_size self.viz = viz self.debug = debug self.gaussianTransformer = GaussianTransformer(imgSize=1024, region_threshold=0.35, affinity_threshold=0.15) self.count = 0 def load_image_gt_and_confidencemask(self, index): ''' confidence mask :param index: :return: ''' return None, None, None, None, None def crop_image_by_bbox(self, image, box): w = (int)(np.linalg.norm(box[0] - box[1])) h = (int)(np.linalg.norm(box[0] - box[3])) width = w height = h if h > w * 1.5: width = h height = w M = cv2.getPerspectiveTransform(np.float32(box), np.float32( np.array([[width, 0], [width, height], [0, height], [0, 0]]))) else: M = cv2.getPerspectiveTransform(np.float32(box), np.float32( np.array([[0, 0], [width, 0], [width, height], [0, height]]))) warped = cv2.warpPerspective(image, M, (width, height)) return warped, M def get_confidence(self, real_len, pursedo_len): if pursedo_len == 0: return 0. return (real_len - min(real_len, abs(real_len - pursedo_len))) / real_len def inference_pursedo_bboxes(self, net, image, word_bbox, word, viz=False): word_image, MM = self.crop_image_by_bbox(image, word_bbox) real_word_without_space = word.replace('\s', '') real_char_nums = len(real_word_without_space) input = word_image.copy() scale = 64.0 / input.shape[0] input = cv2.resize(input, None, fx=scale, fy=scale) img_torch = torch.from_numpy(imgproc.normalizeMeanVariance(input, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225))) img_torch = img_torch.permute(2, 0, 1).unsqueeze(0) img_torch = img_torch.type(torch.FloatTensor).cuda() scores, _ = net(img_torch) region_scores = scores[0, :, :, 0].cpu().data.numpy() fmf = np.uint8(np.clip(region_scores, 0, 1) * 255) bgr_region_scores = cv2.resize(region_scores, (input.shape[1], input.shape[0])) bgr_region_scores = cv2.cvtColor(bgr_region_scores, cv2.COLOR_GRAY2BGR) pursedo_bboxes = watershed(input, bgr_region_scores, False) _tmp = [] for i in range(pursedo_bboxes.shape[0]): if np.mean(pursedo_bboxes[i].ravel()) > 2: _tmp.append(pursedo_bboxes[i]) else: print("filter bboxes", pursedo_bboxes[i]) pursedo_bboxes = np.array(_tmp, np.float32) if pursedo_bboxes.shape[0] > 1: index = np.argsort(pursedo_bboxes[:, 0, 0]) pursedo_bboxes = pursedo_bboxes[index] confidence = self.get_confidence(real_char_nums, len(pursedo_bboxes)) bboxes = [] if confidence <= 0.5: width = input.shape[1] height = input.shape[0] width_per_char = width / len(word) for i, char in enumerate(word): if char == ' ': continue left = i * width_per_char right = (i + 1) * width_per_char bbox = np.array([[left, 0], [right, 0], [right, height], [left, height]]) bboxes.append(bbox) bboxes = np.array(bboxes, np.float32) confidence = 0.5 else: bboxes = pursedo_bboxes if False: _tmp_bboxes = np.int32(bboxes.copy()) _tmp_bboxes[:, :, 0] = np.clip(_tmp_bboxes[:, :, 0], 0, input.shape[1]) _tmp_bboxes[:, :, 1] = np.clip(_tmp_bboxes[:, :, 1], 0, input.shape[0]) for bbox in _tmp_bboxes: cv2.polylines(np.uint8(input), [np.reshape(bbox, (-1, 1, 2))], True, (255, 0, 0)) region_scores_color = cv2.applyColorMap(np.uint8(region_scores), cv2.COLORMAP_JET) region_scores_color = cv2.resize(region_scores_color, (input.shape[1], input.shape[0])) target = self.gaussianTransformer.generate_region(region_scores_color.shape, [_tmp_bboxes]) target_color = cv2.applyColorMap(target, cv2.COLORMAP_JET) viz_image = np.hstack([input[:, :, ::-1], region_scores_color, target_color]) cv2.imshow("crop_image", viz_image) cv2.waitKey() bboxes /= scale try: for j in range(len(bboxes)): ones = np.ones((4, 1)) tmp = np.concatenate([bboxes[j], ones], axis=-1) I = np.matrix(MM).I ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0) bboxes[j] = ori[:, :2] except Exception as e: print(e, gt_path) # for j in range(len(bboxes)): # ones = np.ones((4, 1)) # tmp = np.concatenate([bboxes[j], ones], axis=-1) # I = np.matrix(MM).I # ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0) # bboxes[j] = ori[:, :2] bboxes[:, :, 1] = np.clip(bboxes[:, :, 1], 0., image.shape[0] - 1) bboxes[:, :, 0] = np.clip(bboxes[:, :, 0], 0., image.shape[1] - 1) return bboxes, region_scores, confidence def resizeGt(self, gtmask): return cv2.resize(gtmask, (self.target_size // 2, self.target_size // 2)) def get_imagename(self, index): return None def saveInput(self, imagename, image, region_scores, affinity_scores, confidence_mask): boxes, polys = craft_utils.getDetBoxes(region_scores / 255, affinity_scores / 255, 0.7, 0.4, 0.4, False) boxes = np.array(boxes, np.int32) * 2 if len(boxes) > 0: np.clip(boxes[:, :, 0], 0, image.shape[1]) np.clip(boxes[:, :, 1], 0, image.shape[0]) for box in boxes: cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores / 255) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) gt_scores = np.hstack([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color]) confidence_mask_gray = np.hstack([np.zeros_like(confidence_mask_gray), confidence_mask_gray]) output = np.concatenate([gt_scores, confidence_mask_gray], axis=0) output = np.hstack([image, output]) outpath = os.path.join(os.path.join(os.path.dirname(__file__) + '/output'), "%s_input.jpg" % imagename) #print(outpath) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def saveImage(self, imagename, image, bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask): output_image = np.uint8(image.copy()) output_image = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR) if len(bboxes) > 0: affinity_bboxes = np.int32(affinity_bboxes) for i in range(affinity_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(affinity_bboxes[i], (-1, 1, 2))], True, (255, 0, 0)) for i in range(len(bboxes)): _bboxes = np.int32(bboxes[i]) for j in range(_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(_bboxes[j], (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores / 255) heat_map = np.concatenate([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color], axis=1) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) output = np.concatenate([output_image, heat_map, confidence_mask_gray], axis=1) outpath = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)) + '/output'), imagename) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def pull_item(self, index): # if self.get_imagename(index) == 'img_59.jpg': # pass # else: # return [], [], [], [], np.array([0]) image, character_bboxes, words, confidence_mask, confidences = self.load_image_gt_and_confidencemask(index) if len(confidences) == 0: confidences = 1.0 else: confidences = np.array(confidences).mean() region_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_bboxes = [] if len(character_bboxes) > 0: region_scores = self.gaussianTransformer.generate_region(region_scores.shape, character_bboxes) affinity_scores, affinity_bboxes = self.gaussianTransformer.generate_affinity(region_scores.shape, character_bboxes, words) if self.viz: self.saveImage(self.get_imagename(index), image.copy(), character_bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask) random_transforms = [image, region_scores, affinity_scores, confidence_mask] random_transforms = random_crop(random_transforms, (self.target_size, self.target_size), character_bboxes) random_transforms = random_horizontal_flip(random_transforms) random_transforms = random_rotate(random_transforms) cvimage, region_scores, affinity_scores, confidence_mask = random_transforms region_scores = self.resizeGt(region_scores) affinity_scores = self.resizeGt(affinity_scores) confidence_mask = self.resizeGt(confidence_mask) if self.viz: self.saveInput(self.get_imagename(index), cvimage, region_scores, affinity_scores, confidence_mask) image = Image.fromarray(cvimage) image = image.convert('RGB') #image = transforms.ColorJitter(brightness=32.0 / 255, saturation=0.5)(image) #밝기, 채 변화시키기 image = imgproc.normalizeMeanVariance(np.array(image), mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)) image_tensor = tf.convert_to_tensor(image, np.float32) #image_tensor = tf.transpose(image_tensor,[2,0,1]) region_scores_tensor = tf.convert_to_tensor(region_scores / 255, np.float32) affinity_scores_tensor = tf.convert_to_tensor(affinity_scores/255, np.float32) confidence_mask_tensor = tf.convert_to_tensor(confidence_mask / 255, np.float32) #print(confidences) #self.count += 1 return image_tensor, region_scores_tensor, affinity_scores_tensor, confidence_mask_tensor, confidences def generate_data(self): for i in range(0,len(self)): index = self.random_index_list[i] output = self.pull_item(index) self.count += 1 if i == len(self)-1: i=0 yield output
def __init__(self, target_size=768, viz=False, debug=False): self.target_size = target_size self.viz = viz self.debug = debug self.gaussianTransformer = GaussianTransformer(imgSize=512)
class craft_base_dataset(data.Dataset): def __init__(self, target_size=768, viz=False, debug=False): self.target_size = target_size self.viz = viz self.debug = debug self.gaussianTransformer = GaussianTransformer(imgSize=512, distanceRatio=1.7) def load_image_gt_and_confidencemask(self, index): return None, None, None, None def crop_image_by_bbox(self, image, box): w = (int)(np.linalg.norm(box[0] - box[1])) h = (int)(np.linalg.norm(box[0] - box[3])) width = w height = h if h > w * 1.5: width = h height = w M = cv2.getPerspectiveTransform( np.float32(box), np.float32( np.array([[width, 0], [width, height], [0, height], [0, 0]]))) else: M = cv2.getPerspectiveTransform( np.float32(box), np.float32( np.array([[0, 0], [width, 0], [width, height], [0, height]]))) warped = cv2.warpPerspective(image, M, (width, height)) return warped, M def get_confidence(self, real_len, pursedo_len): if pursedo_len == 0: return 0. return (real_len - min(real_len, abs(real_len - pursedo_len))) / real_len def inference_pursedo_bboxes(self, net, img, word, viz=False): net.eval() real_word_without_space = word.replace('\s', '') real_char_nums = len(real_word_without_space) input = img.copy() scale = 64.0 / input.shape[0] input = cv2.resize(input, None, fx=scale, fy=scale) img_torch = torch.from_numpy( imgproc.normalizeMeanVariance(input, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225))) img_torch = img_torch.permute(2, 0, 1).unsqueeze(0) img_torch = img_torch.type(torch.FloatTensor).cuda() scores, _ = net(img_torch) region_scores = scores[0, :, :, 0].cpu().data.numpy() region_scores = np.uint8(np.clip(region_scores, 0, 1) * 255) bgr_region_scores = cv2.cvtColor(region_scores, cv2.COLOR_GRAY2BGR) pursedo_bboxes = watershed2(bgr_region_scores) if pursedo_bboxes.shape[0] > 1: index = np.argsort(pursedo_bboxes[:, 0, 0]) pursedo_bboxes = pursedo_bboxes[index] confidence = self.get_confidence(real_char_nums, len(pursedo_bboxes)) bboxes = [] if confidence <= 0.5: width = input.shape[1] height = input.shape[0] width_per_char = width / len(word) for i, char in enumerate(word): if char == ' ': continue left = i * width_per_char right = (i + 1) * width_per_char bbox = np.array([[left, 0], [right, 0], [right, height], [left, height]]) bboxes.append(bbox) bboxes = np.array(bboxes, np.float32) confidence = 0.5 else: bboxes = pursedo_bboxes bboxes *= 2 if viz: _tmp_bboxes = np.int32(bboxes.copy()) for bbox in _tmp_bboxes: cv2.polylines(np.uint8(input), [np.reshape(bbox, (-1, 1, 2))], True, (255, 0, 0)) region_scores_color = cv2.applyColorMap(np.uint8(region_scores), cv2.COLORMAP_JET) region_scores_color = cv2.resize(region_scores_color, (input.shape[1], input.shape[0])) viz_image = np.hstack([input[:, :, ::-1], region_scores_color]) # cv2.imshow("crop_image", viz_image) # cv2.waitKey() bboxes /= scale return bboxes, region_scores, confidence def resizeGt(self, gtmask): return cv2.resize(gtmask, (self.target_size // 2, self.target_size // 2)) def get_imagename(self, index): return None def saveInput(self, imagename, image, region_scores, affinity_scores, confidence_mask): target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg( affinity_scores / 255) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) gt_scores = np.hstack([ target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color ]) confidence_mask_gray = np.hstack( [np.zeros_like(confidence_mask_gray), confidence_mask_gray]) output = np.concatenate([gt_scores, confidence_mask_gray], axis=0) output = np.hstack([image, output]) outpath = os.path.join( os.path.join(os.path.dirname(__file__) + '/output'), "%s_input.jpg" % imagename) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def saveImage(self, imagename, image, bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask): output_image = np.uint8(image.copy()) output_image = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR) if len(bboxes) > 0: # affinity_bboxes = affinity_bboxes * 2 affinity_bboxes = np.int32(affinity_bboxes) for i in range(affinity_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(affinity_bboxes[i], (-1, 1, 2))], True, (255, 0, 0)) for i in range(len(bboxes)): _bboxes = np.int32(bboxes[i]) for j in range(_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(_bboxes[j], (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg( affinity_scores / 255) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) output = np.concatenate([ output_image, target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color, confidence_mask_gray ], axis=1) # output = np.hstack([image, output]) outpath = os.path.join( os.path.join(os.path.dirname(__file__) + '/output'), imagename) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def pull_item(self, index): image, character_bboxes, words, confidence_mask = self.load_image_gt_and_confidencemask( index) region_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_bboxes = [] if len(character_bboxes) > 0: region_scores = self.gaussianTransformer.generate_region( image.shape, character_bboxes) affinity_scores, affinity_bboxes = self.gaussianTransformer.generate_affinity( image.shape, character_bboxes, words) if self.viz: self.saveImage(self.get_imagename(index), image.copy(), character_bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask) random_transforms = [ image, region_scores, affinity_scores, confidence_mask ] random_transforms = random_horizontal_flip(random_transforms) random_transforms = random_rotate(random_transforms) random_transforms = random_crop(random_transforms, (self.target_size, self.target_size)) cvimage, region_scores, affinity_scores, confidence_mask = random_transforms region_scores = self.resizeGt(region_scores) affinity_scores = self.resizeGt(affinity_scores) confidence_mask = self.resizeGt(confidence_mask) if self.viz: self.saveInput(self.get_imagename(index), cvimage, region_scores, affinity_scores, confidence_mask) image = Image.fromarray(cvimage) image = image.convert('RGB') image = transforms.ColorJitter(brightness=32.0 / 255, saturation=0.5)(image) # image = cvimage image = imgproc.normalizeMeanVariance(np.array(image), mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)) image = torch.from_numpy(image).float().permute(2, 0, 1) region_scores_torch = torch.from_numpy(region_scores / 255).float() affinity_scores_torch = torch.from_numpy(affinity_scores / 255).float() confidence_mask_torch = torch.from_numpy(confidence_mask).float() return image.double(), region_scores_torch.double( ), affinity_scores_torch.double(), confidence_mask_torch.double()