def __getitem__(self, index): img_path = self.fnames[index] bbox = self.bboxs[index] texts = self.texts[index] texts_encoded = self.texts_encoded[index] # loading img img = Image.open(os.path.join(self.path_img_folder, img_path)) if img.mode != 'RGB': img = img.convert('RGB') if self.train: #img, boxes = random_flip(img, bbox) #img, boxes = random_crop(img, bbox) img, boxes = resize(img, bbox, (self.figsize, self.figsize)) else: img, boxes = resize(img, bbox, (self.figsize, self.figsize)) img, boxes = center_crop(img, bbox, (self.figsize, self.figsize)) img = self.transform(img) # scale to -1~1 img = 2 * img - 1 return texts_encoded, img, boxes
def __getitem__(self, index): fname = os.path.join(self.im_pth, self.fnames[index]) img = Image.open(fname) if img.mode!='RGB': img = img.convert('RGB') boxes = self.boxes[index].clone() size = self.size #print(img.size) if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, size) else: img, boxes = center_crop(img, boxes, size) img, boxes = resize(img, boxes, size) if self.transform is not None: img = self.transform(img) dense_map = torch.zeros([1, img.size()[1], img.size()[2]], dtype=torch.float32) #print(dense_map.size()) box_num = 0 for box in boxes: area = (box[2]-box[0])*(box[3]-box[1]) #print(box[0], box[1], box[2], box[3], area) if area<100.: continue box_num += 1 try: dense_map[:, box[1].type(torch.int32):box[3].type(torch.int32), box[0].type(torch.int32):box[2].type(torch.int32)] += 1/area except: print(fname, dense_map.size()) print(box[1].type(torch.int32), box[3].type(torch.int32), box[0].type(torch.int32), box[2].type(torch.int32), area) return img, dense_map, box_num
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] img = Image.open(os.path.join(self.root, fname)) if img.mode != 'RGB': img = img.convert('RGB') boxes = self.boxes[idx].clone() labels = self.labels[idx] size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size,size)) else: img, boxes = resize(img, boxes, size) img, boxes = center_crop(img, boxes, (size,size)) img = self.transform(img) return img, boxes, labels
def __getitem__(self, idx): size = self.input_size img_path = self.excuse_list[idx] img = Image.open(img_path) if img.mode != 'RGB': img = img.convert('RGB') if self.att_flag: att_map = self.get_att.get_att(img) if self.align_flag: img = self.alignment(img) img = resize(img, size) id_ = self.excuse_ids[idx] #return self.transform(img), img_path, id_ if self.att_flag: att_map = resize(att_map, size) if self.flip_flag: img, att_map = random_flip([img, att_map]) att_map = self.transform_att(att_map) att_map = torch.floor(100*att_map) att_map = self.thresh(att_map) img = self.transform(img) return img, img_path, id_, att_map else: if self.flip_flag: img = random_flip(img) img = self.transform(img) return img, img_path, id_
def __getitem__(self, idx): # Load image and boxes. size = self.input_size fname_pair1 = self.valid_pair[idx][0] fname_pair2 = self.valid_pair[idx][1] img_path_pair1 = os.path.join(self.root, fname_pair1) img_path_pair2 = os.path.join(self.root, fname_pair2) img_pair1 = Image.open(img_path_pair1) img_pair2 = Image.open(img_path_pair2) if img_pair1.mode != 'RGB': img_pair1 = img_pair1.convert('RGB') boxes_pair1 = torch.zeros(2,4) img_pair1 = resize(img_pair1, boxes_pair1, size, test_flag=True) img_pair1 = self.transform(img_pair1) if img_pair2.mode != 'RGB': img_pair2 = img_pair2.convert('RGB') boxes_pair2 = torch.zeros(2,4) img_pair2 = resize(img_pair2, boxes_pair2, size, test_flag=True) img_pair2 = self.transform(img_pair2) return img_pair1, img_pair2, self.valid_pair[idx][2]
def data_augmentation(img_name, idx): ''' Data Augmentation on the segmented image. The file is from pytorch_pytorch_retinanet.retinanet_dataset.py All transformation method are import from transform.py, which are from pytorch_retinanet.utils Note: Right Now, it is randomly augment the image ''' img = Image.open(os.path.join(SEG_IMG_DIR, img_name)) if img.mode != 'RGB': img = img.convert('RGB') size = 600 # the desired image size img = random_flip(img) img = random_crop(img) img = resize(img, (size, size)) if random.random() > 0.5: img = ImageEnhance.Color(img).enhance(random.uniform(0, 1)) img = ImageEnhance.Brightness(img).enhance(random.uniform(0.5, 2)) img = ImageEnhance.Contrast(img).enhance(random.uniform(0.5, 1.5)) img = ImageEnhance.Sharpness(img).enhance(random.uniform(0.5, 1.5)) # im1 = img.filter(ImageFilter.BLUR) # Gaussian Blur else: img = resize(img, (size, size)) # img, boxes = center_crop(img, boxes, (size, size)) filename = img_name[:-4] + "_" + str(idx) + ".png" img.save(os.path.join(SEG_IMG_DIR + "/transformed", filename), "PNG")
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] img = Image.open(os.path.join(self.root, fname)) if img.mode != 'RGB': img = img.convert('RGB') boxes = self.boxes[idx].clone() labels = self.labels[idx] size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, size) img, boxes = center_crop(img, boxes, (size, size)) img = self.transform(img) return img, boxes, labels
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. size = self.input_size fname = self.fnames[idx] img_path = os.path.join(self.root, fname) img = Image.open(img_path) att_map, out_catch = self.get_att.get_att(img) if img.mode != 'RGB': img = img.convert('RGB') boxes = torch.zeros(2, 4) img = resize(img, boxes, size, test_flag=True) att_map = resize(att_map, boxes, size, test_flag=True) img = center_crop(img, boxes, (size, size), test_flag=True) att_map = center_crop(att_map, boxes, (size, size), test_flag=True) img = self.transform(img) att_map = self.transform_att(att_map) att_map = torch.floor(100 * att_map) att_map = self.thresh(att_map) #att_map = np.array(att_map, dtype=np.float32) id_ = self.ids[idx] return img, img_path, id_, att_map
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] prefix_name = fname[:2] if self.train: image_path = self.root + '/' + prefix_name + '/' + fname else: image_path = self.root + '/' + prefix_name + '/' + fname # img = Image.open(os.path.join(self.root, fname)) img_a = Image.open(image_path + '_a.jpg') img_b = Image.open(image_path + '_b.jpg') img_c = Image.open(image_path + '_c.jpg') img = Image.merge('RGB', (img_a, img_b, img_c)) # if img.mode != 'RGB': # img = img.convert('RGB') boxes = self.boxes[idx].clone() labels = self.labels[idx] size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size,size)) else: img, boxes = resize(img, boxes, size) img, boxes = center_crop(img, boxes, (size,size)) img = self.transform(img) # if self.transforms is not None: # # if img is a byte or uint8 array, it will convert from 0-255 to 0-1 # # this converts from (HxWxC) to (CxHxW) as well # img_a, img_b, img_c = image # img_a = self.transforms(img_a) # img_b = self.transforms(img_b) # img_c = self.transforms(img_c) # img = (img_a, img_b, img_c) return img, boxes, labels
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' ####anchor # Load image and boxes. triplet_id = [] triplet_id.append(idx) pos_counter = self.ids.count(self.ids(idx)) pos_id = random.randint(self.ids.count(self.ids(idx)), self.ids.count(self.ids(idx))+pos_counter) triplet_id.append(pos_id) neg = self.ids(idx) neg_id = self.ids[random.randint(0, len(self.ids)-1)] while neg == neg_id: neg_id = self.ids[random.randint(0, len(self.ids)-1)] triplet_id.append(neg_id) tri_img = [] tri_img_path = [] tri_att_map = [] for get_idx in triplet_id: size = self.input_size fname = self.fnames[get_idx] img_path = os.path.join(self.root, fname) tri_img_path.append(img_path) img = Image.open(img_path) att_map, out_catch = self.get_att.get_att(img) if img.mode != 'RGB': img = img.convert('RGB') boxes = torch.zeros(2,4) img = resize(img, boxes, size, test_flag=True) att_map = resize(att_map, boxes, size, test_flag=True) img = center_crop(img, boxes, (size,size), test_flag=True) att_map = center_crop(att_map, boxes, (size,size), test_flag=True) img = self.transform(img) tri_img.append(img) att_map = self.transform_att(att_map) att_map = torch.floor(100*att_map) att_map = self.thresh(att_map) tri_att_map.append(att_map) return tri_img, tri_img_path, triplet_id, tri_att_map
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] img = Image.open(os.path.join(self.root, fname)) if img.mode != 'RGB': img = img.convert('RGB') width, height = img.size flabel = fname.replace('images/', 'labels/').replace('.jpg', '.txt').replace( '.png', '.txt').replace('.jpeg', '.txt') box = [] label = [] with open(flabel) as f: lines = f.readlines() for line in lines: ls = line.strip().split() x = float(ls[1]) * width y = float(ls[2]) * height w = float(ls[3]) * width h = float(ls[4]) * height box.append([x - w / 2, y - h / 2, x + w / 2, y + h / 2]) label.append(int(ls[0])) boxes = torch.Tensor(box) labels = torch.LongTensor(label) size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, size) img, boxes = center_crop(img, boxes, (size, size)) img = self.transform(img) return img, boxes, labels
def rebuild_pred(self, pred, meta, save_dir=None): affine = meta['affine'] cropped_shape = meta['cropped_shape'] original_shape = meta['shape'] orient = meta['orient'] # pad_width for np.pad pad_width = meta['nonair_bbox'] for i in range(len(original_shape)): pad_width[i][1] = original_shape[i] - (pad_width[i][1] + 1) print('Resample pred to original spacing...') pred = resize(pred, cropped_shape, is_label=True) print('Add padding to pred...') pred = np.pad(pred, pad_width, constant_values=0) pred = nib.orientations.apply_orientation(pred, orient) if save_dir: save_dir = Path(save_dir) if not save_dir.exists(): save_dir.mkdir(parents=True) pred_nib = nib.Nifti1Pair(pred, np.array(affine)) nib_fname = '%s_pred.nii.gz' % meta['case_id'] nib.save(pred_nib, str(save_dir / nib_fname)) return {'pred': pred, 'meta': meta}
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. size = self.input_size fname = self.fnames[idx] img_path = os.path.join(self.root, fname) img = Image.open(img_path) if img.mode != 'RGB': img = img.convert('RGB') boxes = torch.zeros(2, 4) img = resize(img, boxes, size, test_flag=True) img = center_crop(img, boxes, (size, size), test_flag=True) img = self.transform(img) id_ = self.ids[idx] return img, img_path, id_
def run(self, img): if random.random() > self.chance: return img #1. shrink it fac = random.uniform(self.fac_low, self.fac_high) log.debug("fac = %.3f" % (fac)) if math.fabs(fac - 1.0) < 0.02: return img h, w, _ = img.shape h, w = int(fac*h), int(fac * w) img2 = tr.resize(img, (w, h)) #2. past it to background image bg_img = self._randomImg(img.shape) #3. paste it to the background image dh = img.shape[0] - h dw = img.shape[1] - w x_offset = random.randint(0, dw) y_offset = random.randint(0, dh) img2 = tr.paste(bg_img, img2, x_offset, y_offset) return img2
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] img_path = os.path.join(self.root, fname) img = Image.open(img_path) if img.mode != 'RGB': img = img.convert('RGB') boxes = self.boxes[idx].clone() labels = self.labels[idx] size = self.input_size src_shape = self.shape_list[idx] att_map = np.zeros([src_shape[0], src_shape[1]]) for att_box in boxes: att_map[int(att_box[0]):int(att_box[2]), int(att_box[1]):int(att_box[3])] = 1 # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, size) img, boxes = center_crop(img, boxes, (size, size)) att_map = Image.fromarray(att_map) att_map = att_map.resize((size // 2, size // 2), Image.BILINEAR) #img.save('test_in_datagen.jpg') img = self.transform(img) att_map = self.transform(att_map) return img, boxes, labels, att_map, img_path
def crop_img(im, size): if im.shape[0] > im.shape[1]: c = (im.shape[0]-im.shape[1]) / 2 im = im[c:c+im.shape[1],:,:] else: c = (im.shape[1]-im.shape[0]) / 2 im = im[:,c:c+im.shape[0],:] im = transform.resize(im, (size[1],size[0])) return im
def __getitem__(self, idx): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. fname = self.fnames[idx] img = Image.open(os.path.join(self.root, fname)) if img.mode != 'RGB': img = img.convert('RGB') boxes = self.boxes[idx].clone() labels = self.labels[idx] size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, (size, size)) # img, boxes = center_crop(img, boxes, (size,size)) img = self.transform(img) if self.is_debug: filled_labels = np.zeros((self.max_objects, 4), dtype=np.float32) if boxes is not None: filled_labels[range( len(boxes))[:self.max_objects]] = boxes[:self.max_objects] else: print('no object') filled_labels = torch.from_numpy(filled_labels) return img, filled_labels else: #error_indices_1 = np.where(labels<0) #error_indices_2 = np.where(labels>19) #print('_______:',labels[error_indices_1]) #print(labels.shape) return img, boxes, labels, fname
def __getitem__(self, index): '''Load image. Args: idx: (int) image index. Returns: img: (tensor) image tensor. loc_targets: (tensor) location targets. cls_targets: (tensor) class label targets. ''' # Load image and boxes. img_path = self._imgpath[index].rstrip() fname = img_path.split('/')[-1].split('.')[0] # print(img_path) img = Image.open(img_path) if img.mode != 'RGB': img = img.convert('RGB') label_path = self._labpath[index].rstrip() # print(label_path) targets = np.loadtxt(label_path).reshape(-1, 5) # targets = np.array(targets) # print(targets) boxes = torch.Tensor(targets[:, 1:]) labels = torch.LongTensor(targets[:, 0]) size = self.input_size # Data augmentation. if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, (size, size)) # img, boxes = center_crop(img, boxes, (size,size)) img = self.transform(img) return img, boxes, labels, fname
def testEffect(imgs): worker = getChain1() log.info("%s" % (worker)) for i in range(len(imgs)): img = imgs[i] img2 = worker.run(img) img = tr.resize(img, SIZE) tr.showImgs([img, img2]) #tr.saveImgs([img, img2], "./result/%d.jpg"%(i)) return
def build_mPA_GT(root, ann_root, dataType, img_size=360): ''' Args: root: (str) ditectory to images. ''' print("<" + "=" * 20 + ">") print("[Building GT for mPA]") fnames = os.listdir(root) fnames.sort() coco = COCO(ann_root) print("Total number of images : ", len(fnames)) count = 0 for i, name in enumerate(tqdm(fnames)): img_num = int(name.replace(".jpg", "")) image_path = os.path.join(root, name) img = Image.open(image_path) annIds = coco.getAnnIds(imgIds=[img_num], iscrowd=None) anns = coco.loadAnns(annIds) if len(anns) == 0: continue count += 1 file = open("./mPA/ground-truth/%s.txt" % (name.replace(".jpg", "")), "w") bbox_resize = [] label_resize = [] for i, ann in enumerate(anns): coco_label = int(ann['category_id']) label = class_map(coco_label) # note that the order of BBox in COCO dataset is xywh where x and y is the up-left point # Not the center of BBox xywh = [ float(ann['bbox'][0]), float(ann['bbox'][1]), float(ann['bbox'][2]), float(ann['bbox'][3]) ] bbox = change_box_order(xywh, 'xywh2xyxy') label_resize.append(label) bbox_resize.append(bbox) bbox_resize = torch.Tensor(bbox_resize) img, boxes = resize(img, bbox_resize, (img_size, img_size)) for i, (bbox, label) in enumerate(zip(boxes, label_resize)): file.write("%s %.3f %.3f %.3f %.3f\n" % (my_cate[label], bbox[0], bbox[1], bbox[2], bbox[3])) print("Total image convert : ", count) print("[Done]") return True
def test(): transform = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)) ]) cfg_path = './config/GAWWN_v1.cfg' train_dataset = GAWWN_Dataset(cfg=cfg_path, train=True, transform=transform) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=4) train_iter = iter(train_loader) #print(len(train_loader.dataset)) #print(len(train_loader)) for epoch in range(1): texts, img, boxes = next(train_iter) print("image shape : ", img.shape) print("texts shape : ", texts.shape) print("boxes shape : ", boxes.shape) print("boxes : ", boxes) grid = torchvision.utils.make_grid(img, 1) if not os.path.exists("./test"): os.makedirs("./test") torchvision.utils.save_image(grid, './test/test.jpg') img = Image.open('./test/test.jpg') draw = ImageDraw.Draw(img) for i, (box, text) in enumerate(zip(boxes, texts)): draw.rectangle(list(box[0]), outline='red', width=3) img.save('./test/test_bbox.jpg') #cropping testing img = Image.open('./test/test.jpg') img, _ = resize(img, boxes, (16, 16)) img_torch = transforms.ToTensor()(img).unsqueeze(0) img_torch = img_torch.repeat(10, 1, 1, 1) boxes = boxes.repeat(10, 1, 1) print('img_torch : ', img_torch.shape) print('boxes : ', boxes.shape) grid = compute_bbox_grid(img_torch, boxes, crop_size=16., img_size=128) output = F.grid_sample(img_torch, grid) print('output : ', output.shape) new_img_torch = output[0] plt.imshow(new_img_torch.numpy().transpose(1, 2, 0)) plt.savefig('./test/crop.jpg')
def __call__(self, img): hsv = color.rgb2hsv(img) hsv[:,:,2] = exposure.equalize_hist(hsv[:,:,2]) img = color.hsv2rgb(hsv) min_side = min(img.shape[:-1]) centre = img.shape[0] // 2, img.shape[1] // 2 img = img[centre[0] - min_side // 2:centre[0] + min_side // 2, centre[1] - min_side // 2:centre[1] + min_side // 2, :] img = transform.resize(img, (self.img_size, self.img_size)) img = np.rollaxis(img, -1) return img
def testBg(): #adir = "../data/driver" adir = "/Users/songbin/dev/data/docID/small/" imgs = loadImgs(adir) bg_imgs = "/Users/songbin/dev/data/bg" bgwarper = wraper.BackGroundWraper(chance=1.0) bgwarper.loadImgs(bg_imgs, (800, 600)) noiser = wraper.NoiseWraper(chance=1.0, maxSigma=13) for i in range(len(imgs)): img = imgs[i] img = tr.resize(img, (480, 360)) img2 = img.copy() img2 = bgwarper.run(img2) img2 = noiser.run(img2) tr.showImgs([img, img2]) return
def tranImgs(imgs): noise = wraper.NoiseWraper(0.5, maxSigma=5) fmodel = "./model/haarcascade_frontalface_default.xml" face = wraper.FaceWraper(fmodel, 1.0) color = wraper.ColorWraper(0.5) aspect = wraper.AspectWraper(1.0) shadow = wraper.ShadowWraper(1.0) shrink = wraper.ShrinkWraper(chance=1.0) rotate2D = wraper.Rotate2DWraper(chance=1.0, angle=(-30, 30)) rotate2DX = wraper.Rotate2DXWraper(chance=1.0, angle=(-0.5, 0.5)) rotate3D = wraper.Rotate3DWraper(chance=1.0) rotate3DX = wraper.Rotate3DXWraper(chance=1.0) croper = wraper.EraseWraper(chance=1.0) print(noise) print(color) for i in range(len(imgs)): img = imgs[i] img2 = img.copy() img = tr.resize(img, (480, 360)) img2 = face.run(img2) #img2 = tr.resize(img2, (480, 360)) #img2 = eraser.run(img2) #img2 = croper.run(img2) #img2 = rotate2DX.run(img2) img2 = rotate3DX.run(img2) #img2 = noise.run(img2) #img2 = color.run(img2) #img2 shadow.run(img2) #img2 = aspect.run(img2) #img2 = shrink.run(img2) tr.showImgs([img, img2]) #tr.saveImgs([img, img2], "./result/%d.jpg"%(i)) return
def generate(data_dir, batch_size=16, image_size=640, min_text_size=8, shrink_ratio=0.4, thresh_min=0.3, thresh_max=0.7, is_training=True): split = 'train' if is_training else 'test' with open(osp.join(data_dir, f'{split}_list.txt')) as f: image_fnames = f.readlines() image_paths = [osp.join( data_dir, f'{split}_images', image_fname.strip()) for image_fname in image_fnames] gt_paths = [osp.join(data_dir, f'{split}_gts', image_fname.strip( ) + '.txt') for image_fname in image_fnames] all_anns = load_all_anns(gt_paths) transform_aug = iaa.Sequential( [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize((0.5, 3.0))]) dataset_size = len(image_paths) indices = np.arange(dataset_size) if is_training: np.random.shuffle(indices) current_idx = 0 b = 0 while True: if current_idx >= dataset_size: if is_training: np.random.shuffle(indices) current_idx = 0 if b == 0: # Init batch arrays batch_images = np.zeros( [batch_size, image_size, image_size, 3], dtype=np.float32) batch_gts = np.zeros( [batch_size, image_size, image_size], dtype=np.float32) batch_masks = np.zeros( [batch_size, image_size, image_size], dtype=np.float32) batch_thresh_maps = np.zeros( [batch_size, image_size, image_size], dtype=np.float32) batch_thresh_masks = np.zeros( [batch_size, image_size, image_size], dtype=np.float32) batch_loss = np.zeros([batch_size, ], dtype=np.float32) i = indices[current_idx] image_path = image_paths[i] anns = all_anns[i] image = cv2.imread(image_path) # show_polys(image.copy(), anns, 'before_aug') if is_training: transform_aug = transform_aug.to_deterministic() image, anns = transform(transform_aug, image, anns) image, anns = crop(image, anns) image, anns = resize(image_size, image, anns) # show_polys(image.copy(), anns, 'after_aug') # cv2.waitKey(0) anns = [ann for ann in anns if Polygon(ann['poly']).is_valid] gt = np.zeros((image_size, image_size), dtype=np.float32) mask = np.ones((image_size, image_size), dtype=np.float32) thresh_map = np.zeros((image_size, image_size), dtype=np.float32) thresh_mask = np.zeros((image_size, image_size), dtype=np.float32) for ann in anns: poly = np.array(ann['poly']) height = max(poly[:, 1]) - min(poly[:, 1]) width = max(poly[:, 0]) - min(poly[:, 0]) polygon = Polygon(poly) # generate gt and mask if polygon.area < 1 or min(height, width) < min_text_size or ann['text'] == '###': cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0) continue else: distance = polygon.area * \ (1 - np.power(shrink_ratio, 2)) / polygon.length subject = [tuple(l) for l in ann['poly']] padding = pyclipper.PyclipperOffset() padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked = padding.Execute(-distance) if len(shrinked) == 0: cv2.fillPoly(mask, poly.astype( np.int32)[np.newaxis, :, :], 0) continue else: shrinked = np.array(shrinked[0]).reshape(-1, 2) if shrinked.shape[0] > 2 and Polygon(shrinked).is_valid: cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1) else: cv2.fillPoly(mask, poly.astype( np.int32)[np.newaxis, :, :], 0) continue # generate thresh map and thresh mask draw_thresh_map(ann['poly'], thresh_map, thresh_mask, shrink_ratio=shrink_ratio) thresh_map = thresh_map * (thresh_max - thresh_min) + thresh_min image = image.astype(np.float32) image[..., 0] -= mean[0] image[..., 1] -= mean[1] image[..., 2] -= mean[2] batch_images[b] = image batch_gts[b] = gt batch_masks[b] = mask batch_thresh_maps[b] = thresh_map batch_thresh_masks[b] = thresh_mask b += 1 current_idx += 1 if b == batch_size: inputs = [batch_images, batch_gts, batch_masks, batch_thresh_maps, batch_thresh_masks] outputs = batch_loss yield inputs, outputs b = 0
import transform import numpy as np import argparse from skimage.filters import threshold_adaptive # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required = True, help = "Path to the image to be scanned") args = vars(ap.parse_args()) image = cv2.imread(args["image"]) print('Original image shape: ',image.shape) ratio = image.shape[1]/300.0 orig = image.copy() image = transform.resize(image, height = 300.0) print('image shape: ', image.shape) # conver the image to grayscale, blur it, and find edges gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5,5), 0) edged = cv2.Canny(gray, 75, 200) # show the original image and the edge detected image print('STEP 1: Edge Detection') cv2.imshow("Image", image) cv2.imshow("Edged", edged) cv2.waitKey(0) cv2.destroyAllWindows() # find the countours in the edges image, keeping only the largest ones, # and initialize hte screen contour
def Load_Images(img): pred_img = np.array(img).astype('float32')/255 pred_img = transform.resize(pred_img,(200,200,3)) pred_img = np.expand_dims(pred_img,axis=0) return pred_img
if verbose: print('Resampling the case for prediction...') case_ = resample_normalize_case(case, target_spacing, normalize_stats) if verbose: print('Predicting the case...') pred = predict_per_patch(case_['image'], model, num_classes, patch_size, step_per_patch, verbose, one_hot) if verbose: print('Resizing the case to origial shape...') case['pred'] = resize(pred, orig_shape, is_label=one_hot is False) case['affine'] = affine if verbose: print('All done!') return case def batch_predict_case(load_dir, save_dir, model, target_spacing, normalize_stats, num_classes=3, patch_size=(240, 240, 80), step_per_patch=4, data_range=None):
def generate(cfg, train_or_val='train'): def init_input(): batch_images = np.zeros( [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3], dtype=np.float32) batch_gts = np.zeros([cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32) batch_masks = np.zeros( [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32) batch_thresh_maps = np.zeros( [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32) batch_thresh_masks = np.zeros( [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32) # batch_loss = np.zeros([cfg.BATCH_SIZE, ], dtype=np.float32) return [ batch_images, batch_gts, batch_masks, batch_thresh_maps, batch_thresh_masks ] data_path = cfg.TRAIN_DATA_PATH if train_or_val == 'train' else cfg.VAL_DATA_PATH with open(data_path, encoding='utf8') as f: data = json.load(f) data_root_dir = data["data_root"] data_list = data["data_list"] image_paths = [] all_anns = [] for data_item in data_list: img_name = data_item["img_name"] annotations = data_item["annotations"] anns = [] for annotation in annotations: item = {} text = annotation["text"] poly = annotation["polygon"] if len(poly) < 3: continue item['text'] = text item['poly'] = poly anns.append(item) image_paths.append(osp.join(data_root_dir, img_name)) all_anns.append(anns) transform_aug = iaa.Sequential( [iaa.Affine(rotate=(-10, 10)), iaa.Resize((0.5, 3.0))]) dataset_size = len(image_paths) indices = np.arange(dataset_size) if train_or_val == 'train': np.random.shuffle(indices) current_idx = 0 b = 0 while True: if current_idx >= dataset_size: if train_or_val == 'train': np.random.shuffle(indices) current_idx = 0 if b == 0: batch_images, batch_gts, batch_masks, batch_thresh_maps, batch_thresh_masks = init_input( ) i = indices[current_idx] image_path = image_paths[i] anns = all_anns[i] """ [{'text': 'chinese', 'poly': [[17.86985870232934, 29.2253341902275], [18.465581783660582, 7.2334012599376365], [525.2796724953414, 20.9621104524324], [524.6839494140104, 42.954043382722375]]}, {'text': 'chinese', 'poly': [[9.746362138723043, 329.1153286941807], [10.667025082598343, 295.12779598373265], [589.454714475228, 310.8061443514931], [588.5340515313526, 344.79367706194114]]}] """ image = cv2.imread(image_path) # show_polys(image.copy(), anns, 'before_aug') if train_or_val == 'train': transform_aug = transform_aug.to_deterministic() image, anns = transform(transform_aug, image, anns) image, anns = crop(image, anns) image, anns = resize(cfg.IMAGE_SIZE, image, anns) # show_polys(image.copy(), anns, 'after_aug') # cv2.waitKey(0) anns = [ann for ann in anns if Polygon(ann['poly']).is_valid] gt = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32) mask = np.ones((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32) thresh_map = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32) thresh_mask = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32) for ann in anns: poly = np.array(ann['poly']) height = max(poly[:, 1]) - min(poly[:, 1]) width = max(poly[:, 0]) - min(poly[:, 0]) polygon = Polygon(poly) # generate gt and mask if polygon.area < 1 or min( height, width ) < cfg.MIN_TEXT_SIZE or ann['text'] in cfg.IGNORE_TEXT: cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0) continue else: distance = polygon.area * ( 1 - np.power(cfg.SHRINK_RATIO, 2)) / polygon.length subject = [tuple(l) for l in ann['poly']] padding = pyclipper.PyclipperOffset() padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked = padding.Execute(-distance) if len(shrinked) == 0: cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0) continue else: shrinked = np.array(shrinked[0]).reshape(-1, 2) if shrinked.shape[0] > 2 and Polygon(shrinked).is_valid: cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1) else: cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0) continue # generate thresh map and thresh mask draw_thresh_map(ann['poly'], thresh_map, thresh_mask, shrink_ratio=cfg.SHRINK_RATIO) thresh_map = thresh_map * (cfg.THRESH_MAX - cfg.THRESH_MIN) + cfg.THRESH_MIN image = image.astype(np.float32) image -= mean batch_images[b] = image batch_gts[b] = gt batch_masks[b] = mask batch_thresh_maps[b] = thresh_map batch_thresh_masks[b] = thresh_mask b += 1 current_idx += 1 if b == cfg.BATCH_SIZE: inputs = [ batch_images, batch_gts, batch_masks, batch_thresh_maps, batch_thresh_masks ] # outputs = batch_loss outputs = [] yield inputs, outputs b = 0
def resizeBbImgs(self, w, h): sz = (w, h) for i in range(len(self.bg_imgs)): img = self.bg_imgs[i] self.bg_imgs[i] = tr.resize(img, sz) return
def run(self, img): img2 = tr.resize(img, (self.w, self.h)) return img2