Beispiel #1
0
    def __getitem__(self, index):
        img_path = self.fnames[index]
        bbox = self.bboxs[index]
        texts = self.texts[index]
        texts_encoded = self.texts_encoded[index]

        # loading img
        img = Image.open(os.path.join(self.path_img_folder, img_path))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        if self.train:
            #img, boxes = random_flip(img, bbox)
            #img, boxes = random_crop(img, bbox)
            img, boxes = resize(img, bbox, (self.figsize, self.figsize))

        else:
            img, boxes = resize(img, bbox, (self.figsize, self.figsize))
            img, boxes = center_crop(img, bbox, (self.figsize, self.figsize))

        img = self.transform(img)
        # scale to -1~1
        img = 2 * img - 1

        return texts_encoded, img, boxes
Beispiel #2
0
    def __getitem__(self, index):
        fname = os.path.join(self.im_pth, self.fnames[index])
        img = Image.open(fname)
        if img.mode!='RGB':
            img = img.convert('RGB')
        boxes = self.boxes[index].clone()
        size = self.size
        #print(img.size)
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, size)
        else:
            img, boxes = center_crop(img, boxes, size)
            img, boxes = resize(img, boxes, size)
        if self.transform is not None:
            img = self.transform(img)

        dense_map = torch.zeros([1, img.size()[1], img.size()[2]], dtype=torch.float32)
        #print(dense_map.size())
        box_num = 0
        for box in boxes:
            area = (box[2]-box[0])*(box[3]-box[1])
            #print(box[0], box[1], box[2], box[3], area)
            if area<100.:
                continue
            box_num += 1
            try:
                dense_map[:, box[1].type(torch.int32):box[3].type(torch.int32), box[0].type(torch.int32):box[2].type(torch.int32)] += 1/area
            except:
                print(fname, dense_map.size())
                print(box[1].type(torch.int32), box[3].type(torch.int32), box[0].type(torch.int32), box[2].type(torch.int32), area)
        return img, dense_map, box_num
Beispiel #3
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels
Beispiel #4
0
 def __getitem__(self, idx):
     size = self.input_size
     img_path = self.excuse_list[idx]
     img = Image.open(img_path)
     if img.mode != 'RGB':
         img = img.convert('RGB')
     if self.att_flag:
         att_map = self.get_att.get_att(img)
     if self.align_flag:
         img = self.alignment(img)
     img = resize(img, size)
     id_ = self.excuse_ids[idx]
     #return self.transform(img), img_path, id_
     if self.att_flag:
         att_map = resize(att_map, size)
         if self.flip_flag:  img, att_map = random_flip([img, att_map])
         att_map = self.transform_att(att_map)
         att_map = torch.floor(100*att_map)
         att_map = self.thresh(att_map)
         img = self.transform(img)
         return img, img_path, id_, att_map
     else:
         if self.flip_flag:  img = random_flip(img)
         img = self.transform(img)
         return img, img_path, id_
Beispiel #5
0
    def __getitem__(self, idx):
        # Load image and boxes.
        size = self.input_size
        fname_pair1 = self.valid_pair[idx][0]
        fname_pair2 = self.valid_pair[idx][1]

        img_path_pair1 = os.path.join(self.root, fname_pair1)
        img_path_pair2 = os.path.join(self.root, fname_pair2)

        img_pair1 = Image.open(img_path_pair1)
        img_pair2 = Image.open(img_path_pair2)

        if img_pair1.mode != 'RGB':
            img_pair1 = img_pair1.convert('RGB')
        boxes_pair1 = torch.zeros(2,4)
        img_pair1 = resize(img_pair1, boxes_pair1, size, test_flag=True)
        img_pair1 = self.transform(img_pair1)

        if img_pair2.mode != 'RGB':
            img_pair2 = img_pair2.convert('RGB')
        boxes_pair2 = torch.zeros(2,4)
        img_pair2 = resize(img_pair2, boxes_pair2, size, test_flag=True)
        img_pair2 = self.transform(img_pair2)

        return img_pair1, img_pair2, self.valid_pair[idx][2]
Beispiel #6
0
def data_augmentation(img_name, idx):
    '''
    Data Augmentation on the segmented image. The file is from pytorch_pytorch_retinanet.retinanet_dataset.py
    All transformation method are import from transform.py, which are from pytorch_retinanet.utils
    
    Note:
    Right Now, it is randomly augment the image
    '''
    img = Image.open(os.path.join(SEG_IMG_DIR, img_name))
    if img.mode != 'RGB':
        img = img.convert('RGB')

    size = 600  # the desired image size

    img = random_flip(img)
    img = random_crop(img)
    img = resize(img, (size, size))
    if random.random() > 0.5:
        img = ImageEnhance.Color(img).enhance(random.uniform(0, 1))
        img = ImageEnhance.Brightness(img).enhance(random.uniform(0.5, 2))
        img = ImageEnhance.Contrast(img).enhance(random.uniform(0.5, 1.5))
        img = ImageEnhance.Sharpness(img).enhance(random.uniform(0.5, 1.5))


#         im1 = img.filter(ImageFilter.BLUR) # Gaussian Blur
    else:
        img = resize(img, (size, size))
        # img, boxes = center_crop(img, boxes, (size, size))

    filename = img_name[:-4] + "_" + str(idx) + ".png"
    img.save(os.path.join(SEG_IMG_DIR + "/transformed", filename), "PNG")
Beispiel #7
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
Beispiel #8
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        size = self.input_size
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        att_map, out_catch = self.get_att.get_att(img)

        if img.mode != 'RGB':
            img = img.convert('RGB')
        boxes = torch.zeros(2, 4)
        img = resize(img, boxes, size, test_flag=True)
        att_map = resize(att_map, boxes, size, test_flag=True)
        img = center_crop(img, boxes, (size, size), test_flag=True)
        att_map = center_crop(att_map, boxes, (size, size), test_flag=True)
        img = self.transform(img)
        att_map = self.transform_att(att_map)
        att_map = torch.floor(100 * att_map)
        att_map = self.thresh(att_map)
        #att_map = np.array(att_map, dtype=np.float32)

        id_ = self.ids[idx]

        return img, img_path, id_, att_map
Beispiel #9
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]

        prefix_name = fname[:2]

        if self.train:
            image_path = self.root + '/' + prefix_name + '/' + fname
        else:
            image_path = self.root + '/' + prefix_name + '/' + fname
            
        
        # img = Image.open(os.path.join(self.root, fname))

        img_a = Image.open(image_path + '_a.jpg')
        img_b = Image.open(image_path + '_b.jpg')
        img_c = Image.open(image_path + '_c.jpg')
        img = Image.merge('RGB', (img_a, img_b, img_c))


        # if img.mode != 'RGB':
            # img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        # if self.transforms is not None:
        #     # if img is a byte or uint8 array, it will convert from 0-255 to 0-1
        #     # this converts from (HxWxC) to (CxHxW) as well
        #     img_a, img_b, img_c = image
        #     img_a = self.transforms(img_a)
        #     img_b = self.transforms(img_b)
        #     img_c = self.transforms(img_c)
        #     img = (img_a, img_b, img_c)

        return img, boxes, labels
Beispiel #10
0
    def __getitem__(self, idx):
        '''Load image.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''

        ####anchor
        # Load image and boxes.

        triplet_id = []
        triplet_id.append(idx)
        pos_counter = self.ids.count(self.ids(idx))
        pos_id = random.randint(self.ids.count(self.ids(idx)), self.ids.count(self.ids(idx))+pos_counter)
        triplet_id.append(pos_id)
        neg = self.ids(idx)
        neg_id = self.ids[random.randint(0, len(self.ids)-1)]
        while neg == neg_id:
            neg_id = self.ids[random.randint(0, len(self.ids)-1)]
        triplet_id.append(neg_id)

        tri_img = []
        tri_img_path = []
        tri_att_map = []
        for get_idx in triplet_id:
            size = self.input_size
            fname = self.fnames[get_idx]
            img_path = os.path.join(self.root, fname)
            tri_img_path.append(img_path)
            img = Image.open(img_path)
            att_map, out_catch = self.get_att.get_att(img)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            boxes = torch.zeros(2,4)
            img = resize(img, boxes, size, test_flag=True)
            att_map = resize(att_map, boxes, size, test_flag=True)
            img = center_crop(img, boxes, (size,size), test_flag=True)
            att_map = center_crop(att_map, boxes, (size,size), test_flag=True)
            img = self.transform(img)
            tri_img.append(img)
            att_map = self.transform_att(att_map)
            att_map = torch.floor(100*att_map)
            att_map = self.thresh(att_map)
            tri_att_map.append(att_map)
        
        return tri_img, tri_img_path, triplet_id, tri_att_map
Beispiel #11
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')
        width, height = img.size

        flabel = fname.replace('images/',
                               'labels/').replace('.jpg', '.txt').replace(
                                   '.png', '.txt').replace('.jpeg', '.txt')
        box = []
        label = []
        with open(flabel) as f:
            lines = f.readlines()
            for line in lines:
                ls = line.strip().split()
                x = float(ls[1]) * width
                y = float(ls[2]) * height
                w = float(ls[3]) * width
                h = float(ls[4]) * height
                box.append([x - w / 2, y - h / 2, x + w / 2, y + h / 2])
                label.append(int(ls[0]))

        boxes = torch.Tensor(box)
        labels = torch.LongTensor(label)
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
Beispiel #12
0
    def rebuild_pred(self, pred, meta, save_dir=None):
        affine = meta['affine']
        cropped_shape = meta['cropped_shape']
        original_shape = meta['shape']
        orient = meta['orient']

        # pad_width for np.pad
        pad_width = meta['nonair_bbox']
        for i in range(len(original_shape)):
            pad_width[i][1] = original_shape[i] - (pad_width[i][1] + 1)

        print('Resample pred to original spacing...')
        pred = resize(pred, cropped_shape, is_label=True)
        print('Add padding to pred...')
        pred = np.pad(pred, pad_width, constant_values=0)
        pred = nib.orientations.apply_orientation(pred, orient)

        if save_dir:
            save_dir = Path(save_dir)
            if not save_dir.exists():
                save_dir.mkdir(parents=True)

            pred_nib = nib.Nifti1Pair(pred, np.array(affine))
            nib_fname = '%s_pred.nii.gz' % meta['case_id']
            nib.save(pred_nib, str(save_dir / nib_fname))

        return {'pred': pred, 'meta': meta}
Beispiel #13
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        size = self.input_size
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        boxes = torch.zeros(2, 4)
        img = resize(img, boxes, size, test_flag=True)
        img = center_crop(img, boxes, (size, size), test_flag=True)
        img = self.transform(img)
        id_ = self.ids[idx]

        return img, img_path, id_
Beispiel #14
0
    def run(self, img):
        if random.random() > self.chance:
            return img

        #1. shrink it
        fac = random.uniform(self.fac_low, self.fac_high)
        log.debug("fac = %.3f" % (fac))
        if math.fabs(fac - 1.0) < 0.02:
            return img

        h, w, _ = img.shape
        h, w = int(fac*h), int(fac * w)
        img2 = tr.resize(img, (w, h))
        
        #2. past it to background image
        bg_img = self._randomImg(img.shape)

        #3. paste it to the background image
        dh = img.shape[0] - h
        dw = img.shape[1] - w
        x_offset = random.randint(0, dw)
        y_offset = random.randint(0, dh)

        img2 = tr.paste(bg_img, img2, x_offset, y_offset)
        return img2
Beispiel #15
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        src_shape = self.shape_list[idx]

        att_map = np.zeros([src_shape[0], src_shape[1]])

        for att_box in boxes:
            att_map[int(att_box[0]):int(att_box[2]),
                    int(att_box[1]):int(att_box[3])] = 1

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))
        att_map = Image.fromarray(att_map)
        att_map = att_map.resize((size // 2, size // 2), Image.BILINEAR)

        #img.save('test_in_datagen.jpg')

        img = self.transform(img)
        att_map = self.transform(att_map)

        return img, boxes, labels, att_map, img_path
 def crop_img(im, size):
     if im.shape[0] > im.shape[1]:
         c = (im.shape[0]-im.shape[1]) / 2
         im = im[c:c+im.shape[1],:,:]
     else:
         c = (im.shape[1]-im.shape[0]) / 2
         im = im[:,c:c+im.shape[0],:]
     im = transform.resize(im, (size[1],size[0]))
     return im
Beispiel #17
0
    def __getitem__(self, idx):
        '''Load image.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))
        img = self.transform(img)

        if self.is_debug:
            filled_labels = np.zeros((self.max_objects, 4), dtype=np.float32)
            if boxes is not None:
                filled_labels[range(
                    len(boxes))[:self.max_objects]] = boxes[:self.max_objects]
            else:
                print('no object')
            filled_labels = torch.from_numpy(filled_labels)
            return img, filled_labels
        else:
            #error_indices_1 = np.where(labels<0)
            #error_indices_2 = np.where(labels>19)
            #print('_______:',labels[error_indices_1])
            #print(labels.shape)
            return img, boxes, labels, fname
Beispiel #18
0
    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)
        # targets = np.array(targets)
        # print(targets)
        boxes = torch.Tensor(targets[:, 1:])
        labels = torch.LongTensor(targets[:, 0])

        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels, fname
Beispiel #19
0
def testEffect(imgs):
    worker = getChain1()
    log.info("%s" % (worker))
    for i in range(len(imgs)):
        img = imgs[i]
        img2 = worker.run(img)
        img = tr.resize(img, SIZE)
        tr.showImgs([img, img2])
        #tr.saveImgs([img, img2], "./result/%d.jpg"%(i))
    return
def build_mPA_GT(root, ann_root, dataType, img_size=360):
    '''
    Args:
      root: (str) ditectory to images.
    '''
    print("<" + "=" * 20 + ">")
    print("[Building GT for mPA]")
    fnames = os.listdir(root)
    fnames.sort()
    coco = COCO(ann_root)
    print("Total number of images : ", len(fnames))
    count = 0
    for i, name in enumerate(tqdm(fnames)):
        img_num = int(name.replace(".jpg", ""))
        image_path = os.path.join(root, name)
        img = Image.open(image_path)

        annIds = coco.getAnnIds(imgIds=[img_num], iscrowd=None)
        anns = coco.loadAnns(annIds)
        if len(anns) == 0:
            continue
        count += 1
        file = open("./mPA/ground-truth/%s.txt" % (name.replace(".jpg", "")),
                    "w")

        bbox_resize = []
        label_resize = []
        for i, ann in enumerate(anns):
            coco_label = int(ann['category_id'])
            label = class_map(coco_label)
            # note that the order of BBox in COCO dataset is xywh where x and y is the up-left point
            # Not the center of BBox
            xywh = [
                float(ann['bbox'][0]),
                float(ann['bbox'][1]),
                float(ann['bbox'][2]),
                float(ann['bbox'][3])
            ]
            bbox = change_box_order(xywh, 'xywh2xyxy')
            label_resize.append(label)
            bbox_resize.append(bbox)

        bbox_resize = torch.Tensor(bbox_resize)
        img, boxes = resize(img, bbox_resize, (img_size, img_size))
        for i, (bbox, label) in enumerate(zip(boxes, label_resize)):
            file.write("%s %.3f %.3f %.3f %.3f\n" %
                       (my_cate[label], bbox[0], bbox[1], bbox[2], bbox[3]))

    print("Total image convert : ", count)
    print("[Done]")
    return True
Beispiel #21
0
def test():
    transform = transforms.Compose([
        transforms.ToTensor(),
        #transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
    ])

    cfg_path = './config/GAWWN_v1.cfg'
    train_dataset = GAWWN_Dataset(cfg=cfg_path,
                                  train=True,
                                  transform=transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=4)
    train_iter = iter(train_loader)
    #print(len(train_loader.dataset))
    #print(len(train_loader))
    for epoch in range(1):
        texts, img, boxes = next(train_iter)
        print("image shape : ", img.shape)
        print("texts shape : ", texts.shape)
        print("boxes shape : ", boxes.shape)
        print("boxes : ", boxes)

        grid = torchvision.utils.make_grid(img, 1)
        if not os.path.exists("./test"):
            os.makedirs("./test")
        torchvision.utils.save_image(grid, './test/test.jpg')
        img = Image.open('./test/test.jpg')
        draw = ImageDraw.Draw(img)
        for i, (box, text) in enumerate(zip(boxes, texts)):
            draw.rectangle(list(box[0]), outline='red', width=3)
        img.save('./test/test_bbox.jpg')

        #cropping testing

        img = Image.open('./test/test.jpg')
        img, _ = resize(img, boxes, (16, 16))
        img_torch = transforms.ToTensor()(img).unsqueeze(0)
        img_torch = img_torch.repeat(10, 1, 1, 1)
        boxes = boxes.repeat(10, 1, 1)
        print('img_torch : ', img_torch.shape)
        print('boxes : ', boxes.shape)
        grid = compute_bbox_grid(img_torch, boxes, crop_size=16., img_size=128)
        output = F.grid_sample(img_torch, grid)
        print('output : ', output.shape)
        new_img_torch = output[0]
        plt.imshow(new_img_torch.numpy().transpose(1, 2, 0))
        plt.savefig('./test/crop.jpg')
Beispiel #22
0
	def __call__(self, img):
		hsv = color.rgb2hsv(img)
		hsv[:,:,2] = exposure.equalize_hist(hsv[:,:,2])
		img = color.hsv2rgb(hsv)

		min_side = min(img.shape[:-1])
		centre = img.shape[0] // 2, img.shape[1] // 2
		img = img[centre[0] - min_side // 2:centre[0] + min_side // 2,
			centre[1] - min_side // 2:centre[1] + min_side // 2,
			:]

		img = transform.resize(img, (self.img_size, self.img_size))
		img = np.rollaxis(img, -1)

		return img
Beispiel #23
0
def testBg():
    #adir = "../data/driver"
    adir = "/Users/songbin/dev/data/docID/small/"
    imgs = loadImgs(adir)
    bg_imgs = "/Users/songbin/dev/data/bg"

    bgwarper = wraper.BackGroundWraper(chance=1.0)
    bgwarper.loadImgs(bg_imgs, (800, 600))

    noiser = wraper.NoiseWraper(chance=1.0, maxSigma=13)

    for i in range(len(imgs)):
        img = imgs[i]

        img = tr.resize(img, (480, 360))
        img2 = img.copy()
        img2 = bgwarper.run(img2)
        img2 = noiser.run(img2)

        tr.showImgs([img, img2])
    return
Beispiel #24
0
def tranImgs(imgs):
    noise = wraper.NoiseWraper(0.5, maxSigma=5)
    fmodel = "./model/haarcascade_frontalface_default.xml"
    face = wraper.FaceWraper(fmodel, 1.0)
    color = wraper.ColorWraper(0.5)
    aspect = wraper.AspectWraper(1.0)
    shadow = wraper.ShadowWraper(1.0)

    shrink = wraper.ShrinkWraper(chance=1.0)
    rotate2D = wraper.Rotate2DWraper(chance=1.0, angle=(-30, 30))
    rotate2DX = wraper.Rotate2DXWraper(chance=1.0, angle=(-0.5, 0.5))

    rotate3D = wraper.Rotate3DWraper(chance=1.0)
    rotate3DX = wraper.Rotate3DXWraper(chance=1.0)

    croper = wraper.EraseWraper(chance=1.0)

    print(noise)
    print(color)
    for i in range(len(imgs)):
        img = imgs[i]

        img2 = img.copy()
        img = tr.resize(img, (480, 360))

        img2 = face.run(img2)
        #img2 = tr.resize(img2, (480, 360))
        #img2 = eraser.run(img2)
        #img2 = croper.run(img2)
        #img2 = rotate2DX.run(img2)
        img2 = rotate3DX.run(img2)
        #img2 = noise.run(img2)

        #img2 = color.run(img2)
        #img2  shadow.run(img2)
        #img2 = aspect.run(img2)
        #img2 = shrink.run(img2)
        tr.showImgs([img, img2])
        #tr.saveImgs([img, img2], "./result/%d.jpg"%(i))
    return
Beispiel #25
0
def generate(data_dir, batch_size=16, image_size=640, min_text_size=8, shrink_ratio=0.4, thresh_min=0.3,
             thresh_max=0.7, is_training=True):
    split = 'train' if is_training else 'test'
    with open(osp.join(data_dir, f'{split}_list.txt')) as f:
        image_fnames = f.readlines()
        image_paths = [osp.join(
            data_dir, f'{split}_images', image_fname.strip()) for image_fname in image_fnames]
        gt_paths = [osp.join(data_dir, f'{split}_gts', image_fname.strip(
        ) + '.txt') for image_fname in image_fnames]
        all_anns = load_all_anns(gt_paths)
    transform_aug = iaa.Sequential(
        [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize((0.5, 3.0))])
    dataset_size = len(image_paths)
    indices = np.arange(dataset_size)
    if is_training:
        np.random.shuffle(indices)
    current_idx = 0
    b = 0
    while True:
        if current_idx >= dataset_size:
            if is_training:
                np.random.shuffle(indices)
            current_idx = 0
        if b == 0:
            # Init batch arrays
            batch_images = np.zeros(
                [batch_size, image_size, image_size, 3], dtype=np.float32)
            batch_gts = np.zeros(
                [batch_size, image_size, image_size], dtype=np.float32)
            batch_masks = np.zeros(
                [batch_size, image_size, image_size], dtype=np.float32)
            batch_thresh_maps = np.zeros(
                [batch_size, image_size, image_size], dtype=np.float32)
            batch_thresh_masks = np.zeros(
                [batch_size, image_size, image_size], dtype=np.float32)
            batch_loss = np.zeros([batch_size, ], dtype=np.float32)
        i = indices[current_idx]
        image_path = image_paths[i]
        anns = all_anns[i]
        image = cv2.imread(image_path)
        # show_polys(image.copy(), anns, 'before_aug')
        if is_training:
            transform_aug = transform_aug.to_deterministic()
            image, anns = transform(transform_aug, image, anns)
            image, anns = crop(image, anns)
        image, anns = resize(image_size, image, anns)
        # show_polys(image.copy(), anns, 'after_aug')
        # cv2.waitKey(0)
        anns = [ann for ann in anns if Polygon(ann['poly']).is_valid]
        gt = np.zeros((image_size, image_size), dtype=np.float32)
        mask = np.ones((image_size, image_size), dtype=np.float32)
        thresh_map = np.zeros((image_size, image_size), dtype=np.float32)
        thresh_mask = np.zeros((image_size, image_size), dtype=np.float32)
        for ann in anns:
            poly = np.array(ann['poly'])
            height = max(poly[:, 1]) - min(poly[:, 1])
            width = max(poly[:, 0]) - min(poly[:, 0])
            polygon = Polygon(poly)
            # generate gt and mask
            if polygon.area < 1 or min(height, width) < min_text_size or ann['text'] == '###':
                cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
                continue
            else:
                distance = polygon.area * \
                    (1 - np.power(shrink_ratio, 2)) / polygon.length
                subject = [tuple(l) for l in ann['poly']]
                padding = pyclipper.PyclipperOffset()
                padding.AddPath(subject, pyclipper.JT_ROUND,
                                pyclipper.ET_CLOSEDPOLYGON)
                shrinked = padding.Execute(-distance)
                if len(shrinked) == 0:
                    cv2.fillPoly(mask, poly.astype(
                        np.int32)[np.newaxis, :, :], 0)
                    continue
                else:
                    shrinked = np.array(shrinked[0]).reshape(-1, 2)
                    if shrinked.shape[0] > 2 and Polygon(shrinked).is_valid:
                        cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
                    else:
                        cv2.fillPoly(mask, poly.astype(
                            np.int32)[np.newaxis, :, :], 0)
                        continue
            # generate thresh map and thresh mask
            draw_thresh_map(ann['poly'], thresh_map,
                            thresh_mask, shrink_ratio=shrink_ratio)
        thresh_map = thresh_map * (thresh_max - thresh_min) + thresh_min

        image = image.astype(np.float32)
        image[..., 0] -= mean[0]
        image[..., 1] -= mean[1]
        image[..., 2] -= mean[2]
        batch_images[b] = image
        batch_gts[b] = gt
        batch_masks[b] = mask
        batch_thresh_maps[b] = thresh_map
        batch_thresh_masks[b] = thresh_mask

        b += 1
        current_idx += 1
        if b == batch_size:
            inputs = [batch_images, batch_gts, batch_masks,
                      batch_thresh_maps, batch_thresh_masks]
            outputs = batch_loss
            yield inputs, outputs
            b = 0
import transform
import numpy as np
import argparse
from skimage.filters import threshold_adaptive

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True,
    help = "Path to the image to be scanned")
args = vars(ap.parse_args())

image = cv2.imread(args["image"])
print('Original image shape: ',image.shape)
ratio = image.shape[1]/300.0
orig = image.copy()
image = transform.resize(image, height = 300.0)
print('image shape: ', image.shape)
# conver the image to grayscale, blur it, and find edges
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
edged = cv2.Canny(gray, 75, 200)

# show the original image and the edge detected image
print('STEP 1: Edge Detection')
cv2.imshow("Image", image)
cv2.imshow("Edged", edged)
cv2.waitKey(0)
cv2.destroyAllWindows()

# find the countours in the edges image, keeping only the largest ones,
# and initialize hte screen contour
Beispiel #27
0
def Load_Images(img):
  pred_img = np.array(img).astype('float32')/255
  pred_img = transform.resize(pred_img,(200,200,3))
  pred_img = np.expand_dims(pred_img,axis=0)
  return pred_img
Beispiel #28
0
    if verbose:
        print('Resampling the case for prediction...')
    case_ = resample_normalize_case(case, target_spacing, normalize_stats)

    if verbose:
        print('Predicting the case...')
    pred = predict_per_patch(case_['image'],
                             model,
                             num_classes,
                             patch_size,
                             step_per_patch,
                             verbose,
                             one_hot)
    if verbose:
        print('Resizing the case to origial shape...')
    case['pred'] = resize(pred, orig_shape, is_label=one_hot is False)
    case['affine'] = affine
    if verbose:
        print('All done!')
    return case


def batch_predict_case(load_dir,
                       save_dir,
                       model,
                       target_spacing,
                       normalize_stats,
                       num_classes=3,
                       patch_size=(240, 240, 80),
                       step_per_patch=4,
                       data_range=None):
Beispiel #29
0
def generate(cfg, train_or_val='train'):
    def init_input():
        batch_images = np.zeros(
            [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3],
            dtype=np.float32)
        batch_gts = np.zeros([cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE],
                             dtype=np.float32)
        batch_masks = np.zeros(
            [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32)
        batch_thresh_maps = np.zeros(
            [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32)
        batch_thresh_masks = np.zeros(
            [cfg.BATCH_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE], dtype=np.float32)
        # batch_loss = np.zeros([cfg.BATCH_SIZE, ], dtype=np.float32)
        return [
            batch_images, batch_gts, batch_masks, batch_thresh_maps,
            batch_thresh_masks
        ]

    data_path = cfg.TRAIN_DATA_PATH if train_or_val == 'train' else cfg.VAL_DATA_PATH

    with open(data_path, encoding='utf8') as f:
        data = json.load(f)

    data_root_dir = data["data_root"]
    data_list = data["data_list"]

    image_paths = []
    all_anns = []

    for data_item in data_list:
        img_name = data_item["img_name"]
        annotations = data_item["annotations"]
        anns = []
        for annotation in annotations:
            item = {}
            text = annotation["text"]
            poly = annotation["polygon"]
            if len(poly) < 3:
                continue
            item['text'] = text
            item['poly'] = poly
            anns.append(item)
        image_paths.append(osp.join(data_root_dir, img_name))
        all_anns.append(anns)

    transform_aug = iaa.Sequential(
        [iaa.Affine(rotate=(-10, 10)),
         iaa.Resize((0.5, 3.0))])
    dataset_size = len(image_paths)
    indices = np.arange(dataset_size)
    if train_or_val == 'train':
        np.random.shuffle(indices)

    current_idx = 0
    b = 0
    while True:
        if current_idx >= dataset_size:
            if train_or_val == 'train':
                np.random.shuffle(indices)
            current_idx = 0
        if b == 0:
            batch_images, batch_gts, batch_masks, batch_thresh_maps, batch_thresh_masks = init_input(
            )
        i = indices[current_idx]
        image_path = image_paths[i]
        anns = all_anns[i]
        """
        [{'text': 'chinese', 'poly': [[17.86985870232934, 29.2253341902275], [18.465581783660582, 7.2334012599376365], [525.2796724953414, 20.9621104524324], [524.6839494140104, 42.954043382722375]]},
        {'text': 'chinese', 'poly': [[9.746362138723043, 329.1153286941807], [10.667025082598343, 295.12779598373265], [589.454714475228, 310.8061443514931], [588.5340515313526, 344.79367706194114]]}]
        """
        image = cv2.imread(image_path)
        # show_polys(image.copy(), anns, 'before_aug')
        if train_or_val == 'train':
            transform_aug = transform_aug.to_deterministic()
            image, anns = transform(transform_aug, image, anns)
            image, anns = crop(image, anns)
        image, anns = resize(cfg.IMAGE_SIZE, image, anns)
        # show_polys(image.copy(), anns, 'after_aug')
        # cv2.waitKey(0)
        anns = [ann for ann in anns if Polygon(ann['poly']).is_valid]
        gt = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32)
        mask = np.ones((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), dtype=np.float32)
        thresh_map = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE),
                              dtype=np.float32)
        thresh_mask = np.zeros((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE),
                               dtype=np.float32)
        for ann in anns:
            poly = np.array(ann['poly'])
            height = max(poly[:, 1]) - min(poly[:, 1])
            width = max(poly[:, 0]) - min(poly[:, 0])
            polygon = Polygon(poly)
            # generate gt and mask
            if polygon.area < 1 or min(
                    height, width
            ) < cfg.MIN_TEXT_SIZE or ann['text'] in cfg.IGNORE_TEXT:
                cv2.fillPoly(mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
                continue
            else:
                distance = polygon.area * (
                    1 - np.power(cfg.SHRINK_RATIO, 2)) / polygon.length
                subject = [tuple(l) for l in ann['poly']]
                padding = pyclipper.PyclipperOffset()
                padding.AddPath(subject, pyclipper.JT_ROUND,
                                pyclipper.ET_CLOSEDPOLYGON)
                shrinked = padding.Execute(-distance)
                if len(shrinked) == 0:
                    cv2.fillPoly(mask,
                                 poly.astype(np.int32)[np.newaxis, :, :], 0)
                    continue
                else:
                    shrinked = np.array(shrinked[0]).reshape(-1, 2)
                    if shrinked.shape[0] > 2 and Polygon(shrinked).is_valid:
                        cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
                    else:
                        cv2.fillPoly(mask,
                                     poly.astype(np.int32)[np.newaxis, :, :],
                                     0)
                        continue
            # generate thresh map and thresh mask
            draw_thresh_map(ann['poly'],
                            thresh_map,
                            thresh_mask,
                            shrink_ratio=cfg.SHRINK_RATIO)
        thresh_map = thresh_map * (cfg.THRESH_MAX -
                                   cfg.THRESH_MIN) + cfg.THRESH_MIN

        image = image.astype(np.float32)
        image -= mean
        batch_images[b] = image
        batch_gts[b] = gt
        batch_masks[b] = mask
        batch_thresh_maps[b] = thresh_map
        batch_thresh_masks[b] = thresh_mask

        b += 1
        current_idx += 1
        if b == cfg.BATCH_SIZE:
            inputs = [
                batch_images, batch_gts, batch_masks, batch_thresh_maps,
                batch_thresh_masks
            ]
            # outputs = batch_loss
            outputs = []
            yield inputs, outputs
            b = 0
Beispiel #30
0
 def resizeBbImgs(self, w, h):
     sz = (w, h)
     for i in range(len(self.bg_imgs)):
         img = self.bg_imgs[i]
         self.bg_imgs[i] = tr.resize(img, sz)
     return
Beispiel #31
0
 def run(self, img):
     img2 = tr.resize(img, (self.w, self.h))
     return img2