コード例 #1
0
    def __call__(self, img, gt, hha, depth, coord, camera_params):
        img, gt, depth, coord = random_mirror(img, gt, depth, coord)
        if config.train_scale_array is not None:
            img, gt, hha, depth, coord, scale = random_scale(
                img, gt, hha, depth, coord, config.train_scale_array)
            camera_params['scale'] = torch.from_numpy(
                np.array(scale, dtype=np.float32)).float()

        img = normalize(img, self.img_mean, self.img_std)
        depth = normalize(depth, self.depth_mean, self.depth_var)
        # hha = normalize(hha, self.hha_mean, self.hha_std)

        crop_size = (config.image_height, config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255)
        p_hha, _ = random_crop_pad_to_shape(hha, crop_pos, crop_size, 0)
        depth, _ = random_crop_pad_to_shape(depth, crop_pos, crop_size, 0)
        coord, _ = random_crop_pad_to_shape(coord, crop_pos, crop_size, -1)

        p_img = p_img.transpose(2, 0, 1)
        p_hha = p_hha.transpose(2, 0, 1)
        depth = depth[np.newaxis, ...]
        coord = coord.transpose(2, 0, 1)

        extra_dict = {
            'hha_img': p_hha,
            'depth_img': depth,
            'coord_img': coord,
            'camera_params': camera_params
        }

        return p_img, p_gt, extra_dict
コード例 #2
0
    def __call__(self, ref_img, cur_img, ref_mask, cur_mask):

        common_bbox = generate_random_common_bbox(ref_mask, cur_mask)
        ref_img = ref_img[common_bbox[1]:common_bbox[3],
                          common_bbox[0]:common_bbox[2], :]
        cur_img = cur_img[common_bbox[1]:common_bbox[3],
                          common_bbox[0]:common_bbox[2], :]
        cur_mask = cur_mask[common_bbox[1]:common_bbox[3],
                            common_bbox[0]:common_bbox[2]]
        #ref_img, cur_img, ref_mask, cur_mask = random_scale_crop(ref_img, cur_img, ref_mask, cur_mask)

        ref_img = cv2.resize(ref_img,
                             (config.image_width, config.image_height))
        cur_img = cv2.resize(cur_img,
                             (config.image_width, config.image_height))
        cur_mask = cv2.resize(cur_mask,
                              (config.image_width, config.image_height),
                              interpolation=cv2.INTER_NEAREST)

        ref_img, cur_img, cur_mask = random_hflip_adnet(
            ref_img, cur_img, cur_mask)
        ref_img = normalize(ref_img, self.img_mean, self.img_std)
        cur_img = normalize(cur_img, self.img_mean, self.img_std)
        ref_img, cur_img, cur_mask = random_rotation_adnet(
            ref_img, cur_img, cur_mask)

        ref_img = ref_img.transpose(2, 0, 1)
        cur_img = cur_img.transpose(2, 0, 1)
        cur_mask = np.expand_dims(cur_mask, 0)

        extra_dict = None

        return ref_img, cur_img, cur_mask, extra_dict
コード例 #3
0
ファイル: dataloader.py プロジェクト: superxiaoying/TorchSSC
    def __call__(self, img, hha):
        img = normalize(img, self.img_mean, self.img_std)
        hha = normalize(hha, self.img_mean, self.img_std)

        p_img = img.transpose(2, 0, 1)
        p_hha = hha.transpose(2, 0, 1)

        extra_dict = {'hha_img': p_hha}

        return p_img, extra_dict
コード例 #4
0
ファイル: dataloader.py プロジェクト: zxt881108/TorchSeg
    def __call__(self, img, gt):
        img, gt = random_mirror(img, gt)
        short_size = random.randint(int(config.base_size * 0.5),
                                    int(config.base_size * 2.0))
        h, w, c = img.shape
        if h > w:
            ow = short_size
            oh = int(1.0 * h * ow / w)
        else:
            oh = short_size
            ow = int(1.0 * w * oh / h)

        img = cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR)
        gt = cv2.resize(gt, (ow, oh), interpolation=cv2.INTER_NEAREST)

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (config.image_height, config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255)
        p_gt = cv2.resize(p_gt,
                          (config.image_width // config.gt_down_sampling,
                           config.image_height // config.gt_down_sampling),
                          interpolation=cv2.INTER_NEAREST)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #5
0
ファイル: dataloader.py プロジェクト: zbwxp/Deeperlab-pytorch
    def __call__(self, img, gt):
        #flip
        img, gt = random_mirror(img, gt)
        #according the paper
        if config.train_scale_array is not None:
            img, gt, scale = random_scale(img, gt, config.train_scale_array)

        id255 = np.where(gt == 255)
        no255_gt = np.array(gt)
        no255_gt[id255] = 0
        cgt = cv2.Canny(no255_gt, 5, 5, apertureSize=7)
        #get border imformation from canny
        cgt = cv2.dilate(cgt, self.edge_kernel)
        cgt[cgt == 255] = 1

        #img white
        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (config.image_height, config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255)
        p_cgt, _ = random_crop_pad_to_shape(cgt, crop_pos, crop_size, 255)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = {'aux_label': p_cgt}

        return p_img, p_gt, extra_dict
コード例 #6
0
    def __call__(self, img, gt):
        img, gt = random_mirror(img, gt)
        if config.train_scale_array is not None:
            img, gt, scale = random_scale(img, gt, config.train_scale_array)

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (config.image_height, config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 0)

        # scaled_gt = cv2.resize(p_gt, (
        #     config.image_width // 8, config.image_height // 8),
        #                        interpolation=cv2.INTER_NEAREST)
        #
        # C = config.num_classes + 1
        # one_hot_gt = convert_to_one_hot(scaled_gt.astype(np.int), C)
        # similarity_gt = np.dot(one_hot_gt, one_hot_gt.T)

        p_img = p_img.transpose(2, 0, 1)
        # p_gt = p_gt - 1

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #7
0
    def process_image_rgbd_coord(self, img, hha, depth, coord, crop_size=None):
        p_img = img
        p_hha = hha
        p_depth = depth
        p_coord = coord

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)
        # p_depth = normalize(p_depth, 0, 1)
        

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_hha, margin = pad_image_to_shape(p_hha, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_depth, margin = pad_image_to_shape(p_depth, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
            p_coord, margin = pad_image_to_shape(p_coord, crop_size,
                                               cv2.BORDER_CONSTANT, value=0)
        p_img = p_img.transpose(2, 0, 1)
        p_hha = p_hha.transpose(2, 0, 1)
        p_depth = p_depth[np.newaxis,...]
        p_coord = p_coord.transpose(2, 0, 1)

        return p_img, p_hha, p_depth, p_coord, margin
コード例 #8
0
    def __call__(self, img, gt, edge, midline):
        img, gt, edge, midline = random_mirror(img, gt, edge, midline)
        gt = img_to_black(gt)
        edge = img_to_black(edge)
        midline = img_to_black(midline)
        if config.train_scale_array is not None:
            img, gt, scale, edge, midline = random_scale(
                img, gt, config.train_scale_array, edge, midline)
        img = normalize(img, self.img_mean, self.img_std)
        p_img, p_gt, p_edge, p_midline = img, gt, edge, midline

        p_img = cv2.resize(p_img,
                           (config.image_width // config.gt_down_sampling,
                            config.image_height // config.gt_down_sampling),
                           interpolation=cv2.INTER_NEAREST)
        p_gt = cv2.resize(p_gt,
                          (config.image_width // config.gt_down_sampling,
                           config.image_height // config.gt_down_sampling),
                          interpolation=cv2.INTER_NEAREST)
        p_edge = cv2.resize(p_edge,
                            (config.image_width // config.gt_down_sampling,
                             config.image_height // config.gt_down_sampling),
                            interpolation=cv2.INTER_NEAREST)
        p_midline = cv2.resize(
            p_midline, (config.image_width // config.gt_down_sampling,
                        config.image_height // config.gt_down_sampling),
            interpolation=cv2.INTER_NEAREST)
        p_img = p_img.transpose(2, 0, 1)
        extra_dict = None

        if p_gt.max() > 1:
            print(p_gt)

        return p_img, p_gt, extra_dict, p_edge, p_midline
コード例 #9
0
    def __call__(self, img, gt, edge, midline):
        img, gt, edge, midline = random_mirror(
            img, gt, edge,
            midline)  # images are randomly flipped to increase variance

        gt = img_to_black(gt)  # binary filter on gt.
        edge = img_to_black(edge)
        midline = img_to_black(midline)

        if config.train_scale_array is not None:
            img, gt, scale, edge, midline = random_scale(
                img, gt, config.train_scale_array, edge,
                midline)  # scale the images with supplied list

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (200, 200)
        crop_pos = generate_random_crop_pos(
            img.shape[:2], crop_size)  # obtain random location

        p_img, _ = random_crop_pad_to_shape(
            img, crop_pos, crop_size,
            0)  # get the cropped images and re-sized to crop-size
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, -1)
        p_edge, _ = random_crop_pad_to_shape(edge, crop_pos, crop_size, -1)
        p_midline, _ = random_crop_pad_to_shape(midline, crop_pos, crop_size,
                                                -1)

        p_img = cv2.resize(
            p_img, (config.image_width // config.gt_down_sampling,
                    config.image_height // config.gt_down_sampling),
            interpolation=cv2.INTER_NEAREST)  # resize by downsampling

        p_gt = cv2.resize(p_gt,
                          (config.image_width // config.gt_down_sampling,
                           config.image_height // config.gt_down_sampling),
                          interpolation=cv2.INTER_NEAREST)

        p_edge = cv2.resize(p_edge,
                            (config.image_width // config.gt_down_sampling,
                             config.image_height // config.gt_down_sampling),
                            interpolation=cv2.INTER_NEAREST)
        p_midline = cv2.resize(
            p_midline, (config.image_width // config.gt_down_sampling,
                        config.image_height // config.gt_down_sampling),
            interpolation=cv2.INTER_NEAREST)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = None

        if p_gt.max() > 1:
            print(p_gt)

        return p_img, p_gt, extra_dict, p_edge, p_midline
コード例 #10
0
ファイル: evaluator.py プロジェクト: superxiaoying/TorchSSC
    def process_image_rgbd(self, img, disp, crop_size=None):
        p_img = img
        p_disp = disp

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)
        if len(disp.shape) == 2:
            p_disp = normalize(p_disp, 0, 1)
        else:
            p_disp = normalize(p_disp, self.image_mean, self.image_std)

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img,
                                               crop_size,
                                               cv2.BORDER_CONSTANT,
                                               value=0)

            p_disp, _ = pad_image_to_shape(p_disp,
                                           crop_size,
                                           cv2.BORDER_CONSTANT,
                                           value=0)
            p_img = p_img.transpose(2, 0, 1)
            if len(disp.shape) == 2:
                p_disp = p_disp[np.newaxis, ...]
            else:
                p_disp = p_disp.transpose(2, 0, 1)

            return p_img, p_disp, margin

        p_img = p_img.transpose(2, 0, 1)

        if len(disp.shape) == 2:
            p_disp = p_disp[np.newaxis, ...]
        else:
            p_disp = p_disp.transpose(2, 0, 1)

        return p_img, p_disp
def pre_processing(img_path):
    img = read_image(img_path)
    img = cv2.resize(img, size)
    img = Image.fromarray(img)

    img = transforms.ToTensor()(img)
    # img = normalize(img, tuple([0.5, 0.5, 0.5]), tuple([0.5, 0.5, 0.5]),
    #                 inplace=True)
    img = normalize(img,
                    tuple([0.408, 0.447, 0.47]),
                    tuple([0.289, 0.274, 0.278]),
                    inplace=True)
    return img
コード例 #12
0
    def __getitem__(self, index):
        datafiles = self.files[index]
        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)

        size = image.shape
        name = datafiles["name"]

        image = normalize(image, np.array(self.mean), np.array(self.std))

        image = image.transpose(2, 0, 1)

        return image.copy(), label.copy(), np.array(size), name
コード例 #13
0
    def __call__(self, img, gt):
        img, gt = random_mirror(img, gt)
        if config.train_scale_array is not None:
            img, gt, scale = random_scale(img, gt, config.train_scale_array)

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (config.image_height, config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 0)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #14
0
    def __call__(self, img, gt):
        img, gt = random_mirror(img, gt)
        if self.config.train_scale_array is not None:
            img, gt, scale = random_scale(img, gt, self.config.train_scale_array)

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (self.config.image_height, self.config.image_width)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)
        p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255)
        p_gt = cv2.resize(p_gt, (self.config.image_width // self.config.gt_down_sampling, self.config.image_height // self.config.gt_down_sampling), interpolation=cv2.INTER_NEAREST)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #15
0
    def __call__(self, img, gt):
        (img, gt) = random_mirror(img, gt)
        gt = img_to_black(gt)
        if config.train_scale_array is not None:
            (img, gt, scale) = random_scale(img, gt, config.train_scale_array)
        img = normalize(img, self.img_mean, self.img_std)
        (p_img, p_gt) = (img, gt)

        p_img = cv2.resize(p_img,
                           (config.image_width // config.gt_down_sampling,
                            config.image_height // config.gt_down_sampling),
                           interpolation=cv2.INTER_NEAREST)
        p_gt = cv2.resize(p_gt,
                          (config.image_width // config.gt_down_sampling,
                           config.image_height // config.gt_down_sampling),
                          interpolation=cv2.INTER_NEAREST)
        p_img = p_img.transpose(2, 0, 1)
        extra_dict = None
        return (p_img, p_gt, extra_dict)
コード例 #16
0
    def process_image(self, img, crop_size=None):
        p_img = img

        if img.shape[2] < 3:
            im_b = p_img
            im_g = p_img
            im_r = p_img
            p_img = np.concatenate((im_b, im_g, im_r), axis=2)

        p_img = normalize(p_img, self.image_mean, self.image_std)

        if crop_size is not None:
            p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0)
            p_img = p_img.transpose(2, 0, 1)

            return p_img, margin

        p_img = p_img.transpose(2, 0, 1)

        return p_img
コード例 #17
0
    def __call__(self, img, gt):
        # img, gt = random_mirror(img, gt)
        # if self.config.train_scale_array is not None:
        #     img, gt, scale = random_scale(img, gt, self.config.train_scale_array)
        #
        # img = normalize(img, self.img_mean, self.img_std)
        #
        # crop_size = (self.config.image_height, self.config.image_width)
        # crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)
        # p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0)
        # p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255)
        # p_gt = cv2.resize(p_gt, (self.config.image_width // self.config.gt_down_sampling, self.config.image_height // self.config.gt_down_sampling), interpolation=cv2.INTER_NEAREST)

        augment = self.augmenter(image=img, mask=gt)
        p_img, p_gt = augment['image'], augment['mask']
        img = normalize(p_img, self.img_mean, self.img_std)
        p_img = img.transpose(2, 0, 1)

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #18
0
    def __call__(self, img, gt):
        img, gt = random_mirror(
            img, gt)  # images are randomly flipped to increase variance

        gt = img_to_black(gt)  # binary filter on gt.

        if config.train_scale_array is not None:
            img, gt, scale = random_scale(
                img, gt, config.train_scale_array
            )  # scale the images with supplied list

        img = normalize(img, self.img_mean, self.img_std)

        crop_size = (200, 200)
        crop_pos = generate_random_crop_pos(img.shape[:2], crop_size)

        p_img, _ = random_crop_pad_to_shape(
            img, crop_pos, crop_size,
            0)  # get the cropped images and re-sized to crop-size
        p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size,
                                           255)  # value=

        p_img = cv2.resize(p_img,
                           (config.image_width // config.gt_down_sampling,
                            config.image_height // config.gt_down_sampling),
                           interpolation=cv2.INTER_NEAREST)

        p_gt = cv2.resize(p_gt,
                          (config.image_width // config.gt_down_sampling,
                           config.image_height // config.gt_down_sampling),
                          interpolation=cv2.INTER_NEAREST)

        p_img = p_img.transpose(2, 0, 1)

        extra_dict = None

        return p_img, p_gt, extra_dict
コード例 #19
0
    def __getitem__(self, index):
        datafiles = self.files[index]
        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)

        size = image.shape
        name = datafiles["name"]

        image = normalize(image, np.array(self.mean), np.array(self.std))
        image, label = random_mirror(image, label)

        if self.scale:
            image, label, scale = random_scale(image, label,
                                               [0.75, 1, 1.25, 1.5, 1.75, 2.0])
            crop_pos = generate_random_crop_pos(image.shape[:2],
                                                self.crop_size)
            image, _ = random_crop_pad_to_shape(image, crop_pos,
                                                self.crop_size, 0)
            label, _ = random_crop_pad_to_shape(label, crop_pos,
                                                self.crop_size, 255)

        image = image.transpose(2, 0, 1)

        return image.copy(), label.copy(), np.array(size), name
コード例 #20
0
def predict(models: nn.ModuleList, img_path, path2save, thresh=0.5):
    """
        Perfrom prediction for single image
        Params:
            models     : NN models
            img_path   : path to an image
            path2save  :
            thresh     : preiction threshold 
    """

    img_path = Path(img_path)

    if not img_path.exists():
        raise FileNotFoundError("File '{}' not found.".format(str(img_path)))

    src_img = cv2.imread(str(img_path))

    transform = test_trasformations()
    augmented = transform(image=src_img)
    src_img = augmented["image"]

    img2predict = src_img.copy()
    img2predict = cv2.cvtColor(img2predict,
                               cv2.COLOR_BGR2RGB).astype(dtype=np.float32)
    img2predict = normalize(img2predict)

    img2predict = utils.to_gpu(
        numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float()

    if len(models) == 1:
        #evaluate mode
        model = models[0].eval()

        with torch.set_grad_enabled(False):
            predict = model(img2predict)

        #Probs
        predict = F.sigmoid(predict).squeeze(0).squeeze(0)

        mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
        overlayed_img = alpha_overlay(src_img, mask)
    else:
        #Averaging all predictions for one point of test data
        sum_predicts = utils.to_gpu(
            torch.zeros((1, 1, src_img.shape[0], src_img.shape[1])).float())

        for model in models:
            model.eval()
            with torch.set_grad_enabled(False):
                predict = model(img2predict)
            sum_predicts += F.sigmoid(predict)

        predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float()

        mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
        overlayed_img = alpha_overlay(src_img, mask)

    #save
    cv2.imwrite(path2save, overlayed_img)

    #show
    cv2.imshow("Predicted", overlayed_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    print("Image '{}' was processed successfully.".format(str(img_path)))
コード例 #21
0
def predict_batch(models: nn.ModuleList, path2images, path2save, thresh=0.5):
    """
        Perfrom prediction for a batch images
        Params:
            models          : NN models
            path2images     : path to an image
            path2save       : should be a dir
            thresh          : preiction threshold 
    """

    path2images = Path(path2images)
    path2save = Path(path2save)

    if not path2images.is_dir():
        raise RuntimeError("File '{}' is not dir.".format(str(path2images)))

    if not path2save.is_dir():
        raise RuntimeError("File '{}' is not dir.".format(str(path2save)))

    imgs_paths = sorted(list(path2images.glob("*")))

    count_processed = 0
    for idx, ip in enumerate(imgs_paths):
        src_img = cv2.imread(str(ip))

        transform = test_trasformations()
        augmented = transform(image=src_img)
        src_img = augmented["image"]

        img2predict = src_img.copy()
        img2predict = cv2.cvtColor(img2predict,
                                   cv2.COLOR_BGR2RGB).astype(dtype=np.float32)
        img2predict = normalize(img2predict)

        img2predict = utils.to_gpu(
            numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float()

        if len(models) == 1:
            model = models[0].eval()

            with torch.set_grad_enabled(False):
                predict = model(img2predict)

            #Probs
            predict = F.sigmoid(predict).squeeze(0).squeeze(0)

            mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
            overlayed_img = alpha_overlay(src_img, mask)
        else:
            #Averaging all predictions for one point of test data
            sum_predicts = utils.to_gpu(
                torch.zeros(
                    (1, 1, src_img.shape[0], src_img.shape[1])).float())

            for model in models:
                model.eval()
                with torch.set_grad_enabled(False):
                    predict = model(img2predict)
                sum_predicts += F.sigmoid(predict)

            predict = (sum_predicts /
                       len(models)).squeeze(0).squeeze(0).float()

            mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8)
            overlayed_img = alpha_overlay(src_img, mask)

        #save
        cv2.imwrite(str(path2save / "{}".format(ip.name)), overlayed_img)

        print("Image '{}' was processed successfully.".format(str(ip)))
        count_processed += 1

    print("{} images were processed.".format(count_processed))
コード例 #22
0
def main():
    cudnn.enabled = True

    if args.model == 'base':
        model = ResNetDeepLabv3(backbone=args.backbone)
    elif args.model == 'intra':
        model = IntraFrameNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling,
                              embedding=args.embedding_size, batch_mode='sync')
    elif args.model == 'inter':
        model = InterFrameNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling,
                              embedding=args.embedding_size, batch_mode='sync')
    elif args.model == 'concat':
        model = ConcatNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling,
                          embedding=args.embedding_size, batch_mode='sync')
    elif args.model == 'ad':
        cfg = get_cfg()
        cfg.merge_from_file('./fpn_config/semantic_R_50_FPN_1x.yaml')
        model = AnchorDiffNet(cfg,
                              embedding=args.embedding_size, batch_mode='sync')

    model.load_state_dict(torch.load('./log/snapshot/epoch-last.pth')['model'])
    model.eval()
    model.float()
    model.cuda()

    with torch.no_grad():
        video_mean_iou_list = []
        model.eval()
        videos = [i_id.strip() for i_id in open(osp.join(args.data_dir, 'ImageSets', '2016', 'val.txt'))]
        if args.video and args.video in videos:
            videos = [args.video]

        for vid, video in enumerate(videos, start=1):
            curr_video_iou_list = []
            img_files = sorted(glob.glob(osp.join(args.data_dir, 'JPEGImages', '480p', video, '*.jpg')))
            ann_files = sorted(glob.glob(osp.join(args.data_dir, 'Annotations', '480p', video, '*.png')))
            #img_files = img_files[:10]
            #ann_files = ann_files[:10]

            if args.ms_mirror:
                w = 512
                h = 256
                resize_shape = [(w*0.5, h*0.5), (w, h), (w*1.5, h*1.5)]
                resize_shape = [(int((s[0])), int((s[1]))) for s in resize_shape]
                mirror = True
            else:
                #resize_shape = [(857, 481)]
                resize_shape = [(704, 480)]
                #resize_shape = [(720, 432)]
                mirror = False

            reference_img = []
            for s in resize_shape:
                reference_img.append(normalize(np.asarray(cv2.resize(cv2.imread(img_files[0], cv2.IMREAD_COLOR)[:,:,::-1], s),
                    np.float32), config.image_mean, config.image_std).transpose((2, 0, 1)))
            if mirror:
                for r in range(len(reference_img)):
                    reference_img.append(reference_img[r][:, :, ::-1].copy())
            reference_img = [torch.from_numpy(np.expand_dims(r, axis=0)).float().cuda() for r in reference_img]
            reference_mask = np.array(Image.open(ann_files[0])) > 0
            reference_mask = torch.from_numpy(np.expand_dims(np.expand_dims(reference_mask.astype(np.float32),
                                                                            axis=0), axis=0)).cuda()
            H, W = reference_mask.size(2), reference_mask.size(3)

            if args.visualize:
                colors = np.random.randint(128, 255, size=(1, 3), dtype="uint8")
                colors = np.vstack([[0, 0, 0], colors]).astype("uint8")

            last_mask_num = 0
            last_mask = None
            last_mask_final = None
            kernel1 = np.ones((15, 15), np.uint8)
            kernel2 = np.ones((101, 101), np.uint8)
            kernel3 = np.ones((31, 31), np.uint8)
            predictions_all = []
            gt_all = []

            for f, (img_file, ann_file) in enumerate(zip(img_files, ann_files)):
                current_img = []
                for s in resize_shape:
                    current_img.append(normalize(np.asarray(cv2.resize(
                        cv2.imread(img_file, cv2.IMREAD_COLOR)[:,:,::-1], s),
                        np.float32), config.image_mean, config.image_std).transpose((2, 0, 1)))

                if mirror:
                    for c in range(len(current_img)):
                        current_img.append(current_img[c][:, :, ::-1].copy())

                current_img = [torch.from_numpy(np.expand_dims(c, axis=0)).float().cuda() for c in current_img]

                #current_mask = np.array(Image.open(ann_file)) > 0
                current_mask = Image.open(ann_file)
                current_mask = np.atleast_3d(current_mask)[...,0]
                current_mask = current_mask.copy()
                current_mask[current_mask > 0] = 1
                current_mask = torch.from_numpy(np.expand_dims(np.expand_dims(current_mask.astype(np.float32), axis=0), axis=0)).cuda()

                if args.model in ['base']:
                    predictions = [model(cur) for ref, cur in zip(reference_img, current_img)]
                    predictions = [F.interpolate(input=p[0], size=(H, W), mode='bilinear', align_corners=True) for p in predictions]
                elif args.model in ['intra']:
                    predictions = [model(cur) for ref, cur in zip(reference_img, current_img)]
                    predictions = [F.interpolate(input=p, size=(H, W), mode='bilinear', align_corners=True) for p in predictions]
                elif args.model in ['inter', 'concat', 'ad']:
                    predictions = [model(ref, cur) for ref, cur in zip(reference_img, current_img)]
                    predictions = [F.interpolate(input=p, size=(H, W), mode='bilinear', align_corners=True) for p in predictions]

                if mirror:
                    for r in range(len(predictions)//2, len(predictions)):
                        predictions[r] = torch.flip(predictions[r], [3])
                predictions = torch.mean(torch.stack(predictions, dim=0), 0)

                predictions_all.append(predictions.sigmoid().data.cpu().numpy()[0, 0].copy())
                gt_all.append(current_mask.data.cpu().numpy()[0, 0].astype(np.uint8).copy())

                if args.inst_prune:
                    result_dir = os.path.join('inst_prune', video)
                    if os.path.exists(os.path.join(result_dir, img_file.split('/')[-1].split('.')[0] + '.png')):
                        detection_mask = np.array(
                            Image.open(os.path.join(result_dir, img_file.split('/')[-1].split('.')[0] + '.png'))) > 0
                        detection_mask = torch.from_numpy(
                            np.expand_dims(np.expand_dims(detection_mask.astype(np.float32), axis=0), axis=0)).cuda()
                        predictions = predictions * detection_mask

                    process_now = (predictions > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0]
                    if 100000 > process_now.sum() > 40000:
                        last_mask_numpy = (predictions > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0]
                        last_mask_numpy = cv2.morphologyEx(last_mask_numpy, cv2.MORPH_OPEN, kernel1)
                        dilation = cv2.dilate(last_mask_numpy, kernel3, iterations=1)
                        contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
                        cnt_area = [cv2.contourArea(cnt) for cnt in contours]
                        if len(contours) > 1:
                            contour = contours[np.argmax(cnt_area)]
                            polygon = contour.reshape(-1, 2)
                            x, y, w, h = cv2.boundingRect(polygon)
                            x0, y0 = x, y
                            x1 = x + w
                            y1 = y + h
                            mask_rect = torch.from_numpy(np.zeros_like(dilation).astype(np.float32)).cuda()
                            mask_rect[y0:y1, x0:x1] = 1
                            mask_rect = mask_rect.unsqueeze(0).unsqueeze(0)
                            if np.max(cnt_area) > 30000:
                                if last_mask_final is None or get_iou(last_mask_final, mask_rect, thresh=args.threshold) > 0.3:
                                    predictions = predictions * mask_rect
                    last_mask_final = predictions.clone()

                if 100000 > last_mask_num > 5000:
                    last_mask_numpy = (last_mask > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0]
                    last_mask_numpy = cv2.morphologyEx(last_mask_numpy, cv2.MORPH_OPEN, kernel1)
                    dilation = cv2.dilate(last_mask_numpy, kernel2, iterations=1)
                    dilation = torch.from_numpy(dilation.astype(np.float32)).cuda()

                    last_mask = predictions.clone()
                    last_mask_num = (predictions > args.threshold).sum()

                    predictions = predictions*dilation
                else:
                    last_mask = predictions.clone()
                    last_mask_num = (predictions > args.threshold).sum()

                iou_temp = get_iou(predictions, current_mask, thresh=args.threshold)
                if 0 < f < (len(ann_files)-1):
                    curr_video_iou_list.append(iou_temp)

                if args.visualize:
                    mask = colors[(predictions.cpu().numpy().squeeze() > args.threshold).astype(np.uint8)]
                    output = ((0.4 * cv2.imread(img_file)) + (0.6 * mask)).astype("uint8")
                    cv2.putText(output, "%.3f" % (iou_temp.item()),
                                (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

                    cv2.imshow(video, output)
                    cv2.waitKey(1)

                    suffix = args.ms_mirror*'ms_mirror'+(not args.ms_mirror)*'single'+args.inst_prune*'_prune'
                    visual_path = osp.join('visualization', args.model + '_' + suffix, img_file.split('/')[-2])
                    if not osp.exists(visual_path):
                        os.makedirs(visual_path)
                    cv2.imwrite(osp.join(visual_path, ann_file.split('/')[-1]), output)

                if args.save_mask:
                    suffix = args.ms_mirror*'ms_mirror'+(not args.ms_mirror)*'single'+args.inst_prune*'_prune'
                    if not osp.exists(osp.join(args.save_mask_dir, args.model, suffix, video)):
                        os.makedirs(osp.join(args.save_mask_dir, args.model, suffix, video))
                    cv2.imwrite(osp.join(args.save_mask_dir, args.model, suffix, video, ann_file.split('/')[-1]),
                                (predictions.squeeze() > args.threshold).cpu().numpy().astype(np.uint8))

            cv2.destroyAllWindows()
            video_mean_iou_list.append(sum(curr_video_iou_list)/len(curr_video_iou_list))
            print('{} {} {}'.format(vid, video, video_mean_iou_list[-1]))

            if args.eval_sal:
                if not osp.exists(args.save_heatmap_dir):
                    os.makedirs(args.save_heatmap_dir)
                with open(args.save_heatmap_dir + video + '.pkl', 'wb') as f:
                    pickle.dump({'pred': np.array(predictions_all), 'gt': np.array(gt_all)}, f, pickle.HIGHEST_PROTOCOL)

        mean_iou = sum(video_mean_iou_list)/len(video_mean_iou_list)
        print('mean_iou {}'.format(mean_iou))
    end = timeit.default_timer()
    print(end-start, 'seconds')
    # ==========================
    if args.eval_sal:
        pkl_files = glob.glob(args.save_heatmap_dir + '*.pkl')
        heatmap_gt = []
        heatmap_pred = []
        for i, pkl_file in enumerate(pkl_files):
            with open(pkl_file, 'rb') as f:
                info = pickle.load(f)
                heatmap_gt.append(np.array(info['gt'][1:-1]).flatten())
                heatmap_pred.append(np.array(info['pred'][1:-1]).flatten())
        heatmap_gt = np.hstack(heatmap_gt).flatten()
        heatmap_pred = np.hstack(heatmap_pred).flatten()
        precision, recall, _ = precision_recall_curve(heatmap_gt, heatmap_pred)
        Fmax = 2 * (precision * recall) / (precision + recall)
        print('MAE', np.mean(abs(heatmap_pred - heatmap_gt)))
        print('F_max', Fmax.max())

        n_sample = len(precision)//1000
        import scipy.io
        scipy.io.savemat('davis.mat', {'recall': recall[0::n_sample], 'precision': precision[0::n_sample]})