Exemple #1
0
 def process(img):
     img = bytes_to_file(img)
     try:
         img_A, img_B = read_split_image(img)
         if augment:
             # augment the image by:
             # 1) enlarge the image
             # 2) random crop the image back to its original size
             # NOTE: image A and B needs to be in sync as how much
             # to be shifted
             w, h = img_A.shape
             multiplier = random.uniform(1.00, 1.20)
             # add an eps to prevent cropping issue
             nw = int(multiplier * w) + 1
             nh = int(multiplier * h) + 1
             shift_x = int(np.ceil(np.random.uniform(0.01, nw - w)))
             shift_y = int(np.ceil(np.random.uniform(0.01, nh - h)))
             img_A = shift_and_resize_image(img_A, shift_x, shift_y, nw, nh)
             img_B = shift_and_resize_image(img_B, shift_x, shift_y, nw, nh)
         img_A = normalize_image(img_A)
         img_B = normalize_image(img_B)
         img_A = np.expand_dims(img_A, axis=2)
         img_B = np.expand_dims(img_B, axis=2)
         return np.concatenate([img_A, img_B], axis=2)
     finally:
         img.close()
Exemple #2
0
def prepare_images_and_depths(image1, image2, depth1, depth2):
    """ padding, normalization, and scaling """

    image1 = F.pad(image1, [0, 0, 0, 4], mode='replicate')
    image2 = F.pad(image2, [0, 0, 0, 4], mode='replicate')
    depth1 = F.pad(depth1[:, None], [0, 0, 0, 4], mode='replicate')[:, 0]
    depth2 = F.pad(depth2[:, None], [0, 0, 0, 4], mode='replicate')[:, 0]

    depth1 = (DEPTH_SCALE * depth1).float()
    depth2 = (DEPTH_SCALE * depth2).float()
    image1 = normalize_image(image1)
    image2 = normalize_image(image2)

    return image1, image2, depth1, depth2
def main():
    @tf.RegisterGradient("GuidedRelu")
    def _GuidedReluGrad(op, grad):
        return tf.where(0. < grad, gen_nn_ops._relu_grad(grad, op.outputs[0]),
                        tf.zeros_like(grad))

    g = tf.Graph()
    sess = tf.Session(graph=g)
    with g.as_default():
        with g.gradient_override_map({'Relu': 'GuidedRelu'}):
            with sess.as_default():
                model = VGG16()

    dirname = os.path.dirname(os.path.abspath(__file__))
    layer_idx = 17

    for i, layer in enumerate(model.layers):
        print(i, layer.output)

    img_disp = cv2.imread('{}/images/woh.png'.format(dirname))
    img_disp = cv2.resize(img_disp, (224, 224))
    img = img_disp[np.newaxis, :, :, :]
    img = img.astype(np.float32)
    img = img - np.array([103.939, 116.779, 123.68])  # bgr

    out = utils.deconv(img, model, layer_idx, g=g, sess=sess)
    out = utils.normalize_image(out, per_image=True)
    out = utils.combine_and_fit(out, is_deconv=True, disp_w=4000)
    out = utils.to_255(out)

    cv2.imwrite('{}_deconv_out.png'.format(model.layers[layer_idx].name), out)
Exemple #4
0
    def __data_generation(self, list_IDs_temp):
        '''
        generates batches of shape:
        (n_samples, tile_side, tile_side, n_channels)

        For now it might not be necesary, but it will be when we want to
        preprocess the images before feeding them.

        input:
         - list_IDs_temp: list with image filenames. For now, it only consists
           on a list with one element
        '''
        image = imread("data/train/split/X/" + list_IDs_temp[0])
        image = normalize_image(image)
        mask = imread("data/train/split/mask/" + list_IDs_temp[0],
                      IMREAD_GRAYSCALE)

        X = convert_image_to_stack_of_tiles(image, self.tile_side,
                                            self.tile_side)
        y = convert_mask_to_labels(mask, self.tile_side, self.tile_side)

        # print("Shape of X: " + str(X.shape))
        # print("Length of y: " + str(len(y)))

        return shuffle(X, y)
Exemple #5
0
    def __data_generation(self, list_IDs_temp):
        '''
        generates batches of shape:
        (n_samples, tile_side, tile_side, n_channels)

        For now it might not be necesary, but it will be when we want to
        preprocess the images before feeding them.

        input:
         - list_IDs_temp: list with image filenames. For now, it only consists
           on a list with one element
        '''

        X = np.empty((self.batch_size, self.tile_side, self.tile_side, 3))
        y = np.empty((self.batch_size), dtype=int)

        for i, fname in enumerate(list_IDs_temp):
            X[i] = normalize_image(
                imread(self.source_directory + "/" +
                       self.image_label_directory[fname] + "/" + fname))
            y[i] = int(self.image_label_directory[fname].replace("-1", "0"))

        # print("Shape of X: " + str(X.shape))
        # print("Length of y: " + str(len(y)))

        return X, y
Exemple #6
0
def read_image(path):

    data_flownet = h5py.File(path, 'r')
    # print(list(data_flownet.keys()))  --> returns ['flow-mri']

    data_flownet_ = data_flownet['flow-mri']
    # print(list(data_flownet_.keys()))  --> returns ['intensity', 't_coordinates', 'velocity_cov', 'velocity_mean', 'x_coordinates', 'y_coordinates', 'z_coordinates']

    # get array shape and create and empty array of this size
    image = np.zeros(
        list(data_flownet_['intensity'].shape) + [4],
        data_flownet_['intensity'].dtype)
    image[..., 0] = data_flownet_['intensity']  # z, x, y, t
    image[..., 1:4] = data_flownet_['velocity_mean']  # z, x, y, t

    # close the hdf5 file
    data_flownet.close()

    # transpose and flip lr in the x-y direction
    for n0 in range(image.shape[0]):
        for n3 in range(image.shape[3]):
            for n4 in range(image.shape[4]):
                image[n0, :, :, n3, n4] = np.fliplr(image[n0, :, :, n3, n4].T)

    # normalize the image
    image = utils.normalize_image(image)

    # crop or pad to make the same shape as the freiburg images
    image = utils.crop_or_pad_4dvol_along_0(image, 32)
    image = utils.crop_or_pad_4dvol_along_1(image, 144)
    image = utils.crop_or_pad_4dvol_along_2(image, 112)
    image = utils.crop_or_pad_4dvol_along_3(image, 48)

    return image
Exemple #7
0
    def __init__(self, config):
        super().__init__(config)

        naive_img = plt.imread(
            os.path.join(config['path_dataset_root'], config['path_naive']))
        self.naive_img = normalize_image(naive_img)
        mask = plt.imread(
            os.path.join(config['path_dataset_root'],
                         config['path_mask']))[..., :3]
        self.mask = torch.tensor(np.transpose(mask, [2, 0, 1])).to(
            self.device, torch.float)  # [np.newaxis])

        self.naive_height_pyramid, self.naive_width_pyramid, self.naive_pyramid = [], [], []
        width, height, _ = self.naive_img.shape
        for scale in range(self.num_scale + 1):
            multiplier = self.multiplier_pyramid[scale]
            height_scaled = int(round(height * multiplier))
            width_scaled = int(round(width * multiplier))

            self.naive_height_pyramid.append(height_scaled)
            self.naive_width_pyramid.append(width_scaled)

            processed = cv2.resize(self.naive_img,
                                   (height_scaled, width_scaled))
            processed = torch.tensor(
                np.transpose(processed, [2, 0, 1])[np.newaxis])
            self.naive_pyramid.append(processed.to(self.device, torch.float))
Exemple #8
0
    def forward(self, image):
        B, _, H, W = image.shape
        image = utils.normalize_image(image)
        
        # CNN (joint backbone, separate decoder heads)
        features = self.backbone(image)
        S = self.score_decoder(features)
        Prel = self.position_decoder(features)
        F = self.descriptor_decoder(features)
        
        # Relative to absolute pixel coordinates
        P = rel_to_abs(Prel)
        
        # Flatten
        Sflat = S.view(B, -1)
        Pflat = P.view(B, 2, -1)
        Prelflat = Prel.view(B, 2, -1)
        Fflat = F.view(B, 256, -1)

        # Get data with top N score (S)
        Smax, ids = torch.topk(Sflat, k=self.N, dim=1, largest=True, sorted=False)
        Pmax = torch.stack([Pflat[i,:,ids[i]] for i in range(ids.shape[0])], dim=0)
        #Prelmax = torch.stack([Prelflat[i,:,ids[i]] for i in range(ids.shape[0])], dim=0)
        Fmax = torch.stack([Fflat[i,:,ids[i]] for i in range(ids.shape[0])], dim=0)

        outputs = {
            "S": Smax,
            "P": Pmax,
            "Prel": Prelflat,
            "F": Fmax,
        }

        return outputs
Exemple #9
0
    def __data_generation(self, list_IDs_temp):
        '''
        generates batches of shape:
        (n_samples, tile_side, tile_side, n_channels)

        input:
         - list_IDs_temp: list with image filenames. For now, it only consists
           on a list with one element
        '''

        X = np.empty((self.batch_size, self.tile_side, self.tile_side, 3),
                     dtype='float32')
        y = np.empty((self.batch_size), dtype=int)

        for i, fname in enumerate(list_IDs_temp):
            X[i] = imread(self.source_directory + "/" +
                          self.image_label_directory[fname] + "/" + fname)
            if self.hsv == True:
                X[i] = to_hsv(X[i])
            X[i] = normalize_image(image=X[i], hsv=self.hsv)
            y[i] = int(self.image_label_directory[fname].replace("-1", "0"))

        # print("Shape of X: " + str(X.shape))
        # print("Length of y: " + str(len(y)))

        return X, y
Exemple #10
0
 def normFaces(self, images):
     imgs = np.zeros((len(images), 224, 224, 3))
     count = 0
     for it in images:
         imgs[count] = utils.normalize_image(
             cv2.cvtColor(it, cv2.COLOR_BGR2RGB))
         count += 1
     return imgs
Exemple #11
0
def blur(image, filter_size=5):
    """
    Function that uses a convolution filter to blur an image.
    :param image: An image (numpy.ndarray)
    :param filter_size: The size of each dimension of the filter
                        For example, for filter_size=5 a 5x5 convolution filter will be applied.
    :return: The blurred image (numpy.ndarray).
    """
    return convolve(normalize_image(image), np.zeros((filter_size, filter_size)) + 1 / filter_size ** 2)
Exemple #12
0
 def cropFaces(self, faces, img_path):
     img = cv2.imread(img_path)
     imgs = np.zeros((len(faces), 224, 224, 3))
     count = 0
     for (x, y, w, h) in faces:
         imgs[count] = utils.normalize_image(
             cv2.cvtColor(img[y:y + h, x:x + w, :], cv2.COLOR_BGR2RGB))
         count += 1
     return imgs
Exemple #13
0
def prepare_images_and_depths(image1, image2, depth1, depth2, depth_scale=1.0):
    """ padding, normalization, and scaling """
    
    ht, wd = image1.shape[-2:]
    pad_h = (-ht) % 8
    pad_w = (-wd) % 8

    image1 = F.pad(image1, [0,pad_w,0,pad_h], mode='replicate')
    image2 = F.pad(image2, [0,pad_w,0,pad_h], mode='replicate')
    depth1 = F.pad(depth1[:,None], [0,pad_w,0,pad_h], mode='replicate')[:,0]
    depth2 = F.pad(depth2[:,None], [0,pad_w,0,pad_h], mode='replicate')[:,0]

    depth1 = (depth_scale * depth1).float()
    depth2 = (depth_scale * depth2).float()
    image1 = normalize_image(image1.float())
    image2 = normalize_image(image2.float())

    depth1 = depth1.float()
    depth2 = depth2.float()

    return image1, image2, depth1, depth2, (pad_w, pad_h)
Exemple #14
0
    def forward(self, input_image):
        self.features = []
        x = utils.normalize_image(input_image)
        x = self.encoder.conv1(x)
        x = self.encoder.bn1(x)
        self.features.append(self.encoder.relu(x))
        self.features.append(
            self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
        self.features.append(self.encoder.layer2(self.features[-1]))
        self.features.append(self.encoder.layer3(self.features[-1]))
        self.features.append(self.encoder.layer4(self.features[-1]))

        return self.features
def quantize(im_orig, n_quant, n_iter):
    """Performs optimal quantization of a grayscale or RGB image.
    :param im_orig: Input grayscale or RGB image to be quantized (float64 image with values in [0, 1]).
    :param n_quant: Number of intensities the output im_quant image should have.
    :param n_iter: Maximum number of iterations of the optimization procedure (may converge earlier).
    :return: Quantized output image (copy of the original image).
    """
    if n_quant <= 0 or n_iter <= 0:
        raise ValueError("Error: n_quant and n_iter must be positive")

    if im_orig.ndim == 3:
        img = utils.rgb2yiq(im_orig)
        img_hist = utils.get_histogram(img[:, :, 0])
    else:
        img = im_orig.copy()
        img_hist = utils.get_histogram(img)

    q = np.zeros(n_quant).astype(np.float64)
    z = _calculate_initial_z(img_hist, n_quant)
    last_iter_z = z.copy()
    for i in range(n_iter):
        q = _get_q(z, q, img_hist)
        z = _get_z(q, z)

        # Checks for convergence.
        if np.array_equal(last_iter_z, z):
            break
        last_iter_z = z.copy()

    lookup_table = np.zeros(256)
    for i in range(len(z) - 1):
        start = int(np.round(z[i]))
        end = int(z[i + 1]) + 1
        lookup_table[start:end] = q[i]

    if im_orig.ndim == 3:
        img[:, :, 0] = utils.normalize_image(lookup_table[np.rint(img[:, :, 0] * 255).astype(np.uint8)])
        return utils.yiq2rgb(img)
    return utils.normalize_image(lookup_table[np.rint(img * 255).astype(np.uint8)])
Exemple #16
0
def main():
    parser = argparse.ArgumentParser('Run YoloV3 on input image.')
    parser.add_argument('image', nargs='+')
    args = parser.parse_args()

    print("* Running on:", device)

    yolov3 = load_model()
    for img_path in args.image:
        x = Variable(utils.normalize_image(img_path)).to(device)
        y = yolov3(x)
        detections = utils.parse_detections(y)[0]
        detections = utils.non_max_suppression(detections)
        pprint.pprint(detections)
Exemple #17
0
    def stage_ONet(self, model, img, refined_boxes):
        h, w, _ = img.shape
        input_imgs = []
        for box in refined_boxes:
            x1, y1, x2, y2 = box
            crop_img = img[y1:y2, x1:x2]
            crop_img = cv2.resize(crop_img, (48, 48))
            input_img = utils.normalize_image(crop_img)
            input_imgs.append(input_img)

        input_imgs = np.array(input_imgs)
        cls_list, reg_list, lmk_list = model.predict(input_imgs)

        refined_boxes = self.filter_face_48net(cls_list, reg_list, lmk_list,
                                               refined_boxes, w, h, 0.6)
        return refined_boxes
Exemple #18
0
    def forward(self, inputs):

        inputs = utils.normalize_image(inputs)

        assert (inputs.shape[1] == (self.nb_ref_imgs + 1) * 3)

        out_conv1 = self.conv1(inputs)
        out_conv2 = self.conv2(out_conv1)
        out_conv3 = self.conv3(out_conv2)
        out_conv4 = self.conv4(out_conv3)
        out_conv5 = self.conv5(out_conv4)
        out_conv6 = self.conv6(out_conv5)
        out_conv7 = self.conv7(out_conv6)

        pose = self.pose_pred(out_conv7)
        pose = pose.mean(3).mean(2)
        pose = 0.01 * pose.view(pose.size(0), self.nb_ref_imgs, 6)

        exp_mask = []

        if self.output_exp:
            out_upconv5 = self.upconv5(out_conv5)[:, :, 0:out_conv4.size(2),
                                                  0:out_conv4.size(3)]
            out_upconv4 = self.upconv4(out_upconv5)[:, :, 0:out_conv3.size(2),
                                                    0:out_conv3.size(3)]
            out_upconv3 = self.upconv3(out_upconv4)[:, :, 0:out_conv2.size(2),
                                                    0:out_conv2.size(3)]
            out_upconv2 = self.upconv2(out_upconv3)[:, :, 0:out_conv1.size(2),
                                                    0:out_conv1.size(3)]
            out_upconv1 = self.upconv1(out_upconv2)[:, :, 0:inputs.size(2),
                                                    0:inputs.size(3)]

            exp_mask4 = sigmoid(self.predict_mask4(out_upconv4))
            exp_mask3 = sigmoid(self.predict_mask3(out_upconv3))
            exp_mask2 = sigmoid(self.predict_mask2(out_upconv2))
            exp_mask1 = sigmoid(self.predict_mask1(out_upconv1))
            exp_mask = [exp_mask1, exp_mask2, exp_mask3, exp_mask4]

        outputs = {"pose": pose}

        if len(exp_mask):
            outputs = {**outputs, "exp_mask": exp_mask}

        return outputs
def worker_task(path, queue, use_augs, mean, std, class_mapping):
    """
    function that is performed on each data generation worker. Reads images and gt, normalizes the image if mean and
    std is given, performs a class re-mapping if a new class mapping is given performs augmentations if use_augs is
    true and puts the data in a queue. Currently the augmentations that are performed are fixed
    (flip_h, random_rotation each with 0.25 probability). If the given queue has > 10 elements the worker sleeps
    to avoid memory problems.

    :param path: path to the image file
    :param queue: multiprocessing queue in which the results should be stored
    :param use_augs: whether or not augmentations should be performed
    :param mean: mean of the dataset or None if images should not be normalized
    :param std: std of the dataset or None if images should not be normalized
    :param class_mapping: new class mapping for gt or None if it should not be changed
    """
    global running
    if not running:
        return
    # don't continue if queue is already filled
    while queue.qsize() > 300:
        if not running:
            return
        time.sleep(0.1)

    image = read_image(path)
    image = resize_image(image, 64)

    if len(image.shape) < 3:
        return

    if mean and std:
        image = normalize_image(image, mean, std)

    class_id = path.split("/")[-1].split("_")[0]
    gt = class_mapping[class_id]["id"]

    if use_augs:
        augs = [flip_h, random_rotation]
        probs = [0.25, 0.25]
        image, gt = perform_augmentations(image, gt, augs, probs)

    queue.put((image, gt))
Exemple #20
0
def inference(net, img, scales, base_height, stride,
              pad_value=(0, 0, 0), mean_img=(128, 128, 128), img_scale=1 / 256):
    normalized_img = normalize_image(img, mean_img, img_scale)
    height, width, _ = normalized_img.shape
    scales_ratios = [scale * base_height / float(height) for scale in scales]
    avg_ft_map = np.zeros((height, width, 3), dtype=np.float32)

    for ratio in scales_ratios:
        scaled_img = cv2.resize(normalized_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC)
        min_dims = [base_height, max(scaled_img.shape[1], base_height)]
        padded_img, pad = pad_image(scaled_img, stride, pad_value, min_dims)
        tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float().cuda()
        _ft_maps = net(tensor_img)
        _ft_maps = np.transpose(_ft_maps.squeeze().cpu().data.numpy(), (1, 2, 0))
        _ft_maps = cv2.resize(_ft_maps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
        _ft_maps = _ft_maps[pad[0]: _ft_maps.shape[0] - pad[2],
                   pad[1]: _ft_maps.shape[1] - pad[3]:, :]
        _ft_maps = cv2.resize(_ft_maps, (width, height), interpolation=cv2.INTER_CUBIC)
        avg_ft_map = avg_ft_map + _ft_maps / len(scales_ratios)

    return avg_ft_map
Exemple #21
0
    def stage_RNet(self, model, img, refined_boxes):
        h, w, _ = img.shape
        input_imgs = []
        for box in refined_boxes:
            x1, y1, x2, y2 = box
            if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
                continue
            crop_img = img[y1:y2, x1:x2]
            crop_img = cv2.resize(crop_img, (24, 24))
            input_img = utils.normalize_image(crop_img)
            input_imgs.append(input_img)

        input_imgs = np.array(input_imgs)
        cls_list, reg_list, _ = model.predict(input_imgs)
        cls_list = np.array(cls_list)
        reg_list = np.array(reg_list)

        refined_boxes = self.filter_face_24net(cls_list, reg_list,
                                               refined_boxes, w, h, 0.6)

        return np.array(refined_boxes)
    def preprocess(self, files=CIFAR10_DATA_FILES):
        with h5py.File(H5_FILE, 'w') as h5_file:
            data = np.zeros(shape=(NUM_TRAIN, *IMAGE_SIZE, IMAGE_CHANNELS),
                            dtype=np.float32)
            labels = np.zeros(shape=(NUM_TRAIN, NUM_CLASSES), dtype=int)
            for i, file in enumerate(tqdm(files)):
                file_path = f'{CIFAR10_DATA_PATH}{file}'
                batch = unpickle(file_path)
                data_batch = batch[b'data']
                labels_batch = batch[b'labels']

                data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] = np.moveaxis(
                    np.reshape(normalize_image(data_batch),
                               (BATCH_SIZE, IMAGE_CHANNELS, *IMAGE_SIZE)), 1,
                    -1)
                labels[np.arange(i * BATCH_SIZE, (i + 1) * BATCH_SIZE),
                       labels_batch] = 1.

            h5_file.create_dataset(DATASET_X, data=data[1:2])
            h5_file.create_dataset(DATASET_Y, data=labels[1:2])
            h5_file.close()
Exemple #23
0
    def refresh_image(self, txt_label, data, scale_factor):
        """

        Preprocess a data array (scaling and normalizing) and load it to a Gtk
        image holder, pixbuf

        Input:
            txt_label -- text label of the image
            data -- data array to show
            scale_factor -- scale factor for resizing the image

        """

        data = utils.normalize_image(data)
        
        h = data.shape[0]
        w = data.shape[1]
        data.shape = (h, w, -1)
        pixbuf = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, False, 8, w, h)
        pixbuf.pixel_array[:] = data
        pixbuf = pixbuf.scale_simple(w*scale_factor, h*scale_factor, gtk.gdk.INTERP_TILES)

        self.images[txt_label].set_from_pixbuf(pixbuf)
Exemple #24
0
 def process(img):
     image = bytes_to_file(img)
     try:
         img = misc.imread(image).astype(np.float)
         if augment:
             # augment the image by:
             # 1) enlarge the image
             # 2) random crop the image back to its original size
             # NOTE: image A and B needs to be in sync as how much
             # to be shifted
             w, h = img.shape
             multiplier = random.uniform(1.00, 1.20)
             # add an eps to prevent cropping issue
             nw = int(multiplier * w) + 1
             nh = int(multiplier * h) + 1
             shift_x = int(np.ceil(np.random.uniform(0.01, nw - w)))
             shift_y = int(np.ceil(np.random.uniform(0.01, nh - h)))
             img = shift_and_resize_image(img, shift_x, shift_y, nw, nh)
         img = normalize_image(img)
         img = np.expand_dims(img, axis=2)
         return img
     finally:
         image.close()
Exemple #25
0
    def stage_PNet(self, model, img):
        h, w, _ = img.shape
        img_size = (w, h)

        boxes_tot = np.empty((0, 5))
        reg_offsets = np.empty((0, 4))

        scales = self.get_image_pyramid_scales(self.min_face_size, img_size)

        print(scales)

        for scale in scales:
            resized = utils.scale_image(img, scale)
            normalized = utils.normalize_image(resized)
            net_input = np.expand_dims(normalized, 0)

            cls_map, reg_map, _ = model.predict(net_input)
            cls_map = cls_map.squeeze()[:, :, 1]  # here
            reg_map = reg_map.squeeze()

            boxes, indices = self.generate_bboxes_with_scores(cls_map,
                                                              scale,
                                                              threshold=0.7)
            reg_deltas = reg_map[indices]

            indices = self.non_maximum_suppression(boxes, 0.5, 'union')
            boxes_tot = np.append(boxes_tot, boxes[indices], axis=0)
            reg_offsets = np.append(reg_offsets, reg_deltas[indices], axis=0)

        indices = self.non_maximum_suppression(boxes_tot, 0.7, 'union')
        boxes_tot = boxes_tot[indices]
        reg_offsets = reg_offsets[indices]

        # refine bounding boxes
        refined_boxes = self.refine_bboxes(boxes_tot, reg_offsets)
        return refined_boxes
Exemple #26
0
def main():
    img_name = 'street.mp4'
    cap = cv2.VideoCapture(img_name)
    # Read until video is completed
    is_first_frame = True
    while cap.isOpened():
        # Capture frame-by-frame
        ret, frame = cap.read()
        # Blur and convert to grayscale
        frame_normalized = normalize_image(img=frame)
        if ret:
            h, w = frame_normalized.shape
            height_threshold = (h // 3) * 2
            # # Detect lanes
            lanes = get_lanes(img=frame)
            _, points = draw_lanes(img=frame,
                                   lines=lanes,
                                   height=h,
                                   height_threshold=height_threshold)
            # Assume first frame to be background
            if is_first_frame:
                roi_start, roi_end = points[0], points[-1]
                frame_roi = frame_normalized[-height_threshold:,
                                             roi_start[0]:roi_end[0]]
                first_frame = frame_roi.copy()
                is_first_frame = False
            else:
                frame_roi = frame_normalized[-height_threshold:,
                                             roi_start[0]:roi_end[0]]
                frame_color_roi = frame[-height_threshold:,
                                        roi_start[0]:roi_end[0]]
                # Mask roi
                mask = np.zeros(frame.shape, dtype=np.uint8)
                roi_corners = np.array([points])
                cv2.fillPoly(mask, roi_corners, (255, 255, 255))
                # Calculate the difference
                frame_difference = cv2.absdiff(first_frame, frame_roi)
                mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
                mask_roi = mask_gray[-height_threshold:,
                                     roi_start[0]:roi_end[0]]
                frame_difference_masked = cv2.bitwise_and(
                    src1=frame_difference, src2=mask_roi)
                _, thresh = cv2.threshold(frame_difference_masked,
                                          thresh=110,
                                          maxval=255,
                                          type=cv2.THRESH_BINARY)
                contours, _ = cv2.findContours(thresh,
                                               mode=cv2.RETR_EXTERNAL,
                                               method=cv2.CHAIN_APPROX_NONE)
                contours_sorted = sorted(contours,
                                         key=lambda x: cv2.contourArea(x))
                if contours_sorted:
                    area = cv2.contourArea(contours_sorted[-1])
                    if area > 8_000:
                        bounding_rectangle = cv2.boundingRect(
                            contours_sorted[-1])
                        x, y, w, h = bounding_rectangle
                        # Draw bounding box
                        cv2.rectangle(frame_color_roi, (x, y - 50),
                                      (x + w, y + h), (0, 255, 0), 2)
                    else:
                        pass
                cv2.imshow("Frame", frame)
                # Press Q on keyboard to  exit
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
        # Break the loop
        else:
            break

    # Release the video capture object and close all frames
    cap.release()
    cv2.destroyAllWindows()
Exemple #27
0
    def train(self):
        data_iter = iter(self.train_dataloader)

        if self.train_config.resume_checkpoint:
            start = self.resume_step + 1
        else:
            start = 0

        moving_max_grad = 0
        moving_grad_moment = 0.999
        max_grad = 0

        for step in range(start, self.train_config.total_step + 1):
            try:
                image_dict = next(data_iter)
            except:
                data_iter = iter(self.train_dataloader)
                image_dict = next(data_iter)

            image, alpha, trimap, mask = image_dict['image'], image_dict[
                'alpha'], image_dict['trimap'], image_dict['mask']
            image = image.cuda()
            alpha = alpha.cuda()
            trimap = trimap.cuda()
            mask = mask.cuda()
            fg_norm, bg_norm = image_dict['fg'].cuda(), image_dict['bg'].cuda()
            # train() of DistributedDataParallel has no return
            self.G.train()
            log_info = ""
            loss = 0
            """===== Update Learning Rate ====="""
            if step < self.train_config.warmup_step and self.train_config.resume_checkpoint is None:
                cur_G_lr = utils.warmup_lr(self.train_config.G_lr, step + 1,
                                           self.train_config.warmup_step)
                utils.update_lr(cur_G_lr, self.G_optimizer)

            else:
                self.G_scheduler.step()
                cur_G_lr = self.G_scheduler.get_lr()[0]
            """===== Forward G ====="""

            pred = self.G(image, mask)
            alpha_pred_os1, alpha_pred_os4, alpha_pred_os8 = pred[
                'alpha_os1'], pred['alpha_os4'], pred['alpha_os8']

            weight_os8 = utils.get_unknown_tensor(trimap)
            weight_os8[...] = 1

            flag = False
            if step < self.train_config.warmup_step:
                flag = True
                weight_os4 = utils.get_unknown_tensor(trimap)
                weight_os1 = utils.get_unknown_tensor(trimap)
            elif step < self.train_config.warmup_step * 3:
                if random.randint(0, 1) == 0:
                    flag = True
                    weight_os4 = utils.get_unknown_tensor(trimap)
                    weight_os1 = utils.get_unknown_tensor(trimap)
                else:
                    weight_os4 = utils.get_unknown_tensor_from_pred(
                        alpha_pred_os8,
                        rand_width=CONFIG.model.self_refine_width1,
                        train_mode=True)
                    alpha_pred_os4[weight_os4 == 0] = alpha_pred_os8[weight_os4
                                                                     == 0]
                    weight_os1 = utils.get_unknown_tensor_from_pred(
                        alpha_pred_os4,
                        rand_width=CONFIG.model.self_refine_width2,
                        train_mode=True)
                    alpha_pred_os1[weight_os1 == 0] = alpha_pred_os4[weight_os1
                                                                     == 0]
            else:
                weight_os4 = utils.get_unknown_tensor_from_pred(
                    alpha_pred_os8,
                    rand_width=CONFIG.model.self_refine_width1,
                    train_mode=True)
                alpha_pred_os4[weight_os4 == 0] = alpha_pred_os8[weight_os4 ==
                                                                 0]
                weight_os1 = utils.get_unknown_tensor_from_pred(
                    alpha_pred_os4,
                    rand_width=CONFIG.model.self_refine_width2,
                    train_mode=True)
                alpha_pred_os1[weight_os1 == 0] = alpha_pred_os4[weight_os1 ==
                                                                 0]
            """===== Calculate Loss ====="""
            if self.train_config.rec_weight > 0:
                self.loss_dict['rec'] = (self.regression_loss(alpha_pred_os1, alpha, loss_type='l1', weight=weight_os1) * 2 +\
                 self.regression_loss(alpha_pred_os4, alpha, loss_type='l1', weight=weight_os4) * 1 +\
                  self.regression_loss(alpha_pred_os8, alpha, loss_type='l1', weight=weight_os8) * 1) / 5.0 * self.train_config.rec_weight

            if self.train_config.comp_weight > 0:
                self.loss_dict['comp'] = (self.composition_loss(alpha_pred_os1, fg_norm, bg_norm, image, weight=weight_os1) * 2 +\
                 self.composition_loss(alpha_pred_os4, fg_norm, bg_norm, image, weight=weight_os4) * 1 +\
                  self.composition_loss(alpha_pred_os8, fg_norm, bg_norm, image, weight=weight_os8) * 1) / 5.0 * self.train_config.comp_weight

            if self.train_config.lap_weight > 0:
                self.loss_dict['lap'] = (self.lap_loss(logit=alpha_pred_os1, target=alpha, gauss_filter=self.gauss_filter, loss_type='l1', weight=weight_os1) * 2 +\
                 self.lap_loss(logit=alpha_pred_os4, target=alpha, gauss_filter=self.gauss_filter, loss_type='l1', weight=weight_os4) * 1 +\
                  self.lap_loss(logit=alpha_pred_os8, target=alpha, gauss_filter=self.gauss_filter, loss_type='l1', weight=weight_os8) * 1) / 5.0 * self.train_config.lap_weight

            for loss_key in self.loss_dict.keys():
                if self.loss_dict[loss_key] is not None and loss_key in [
                        'rec', 'comp', 'lap'
                ]:
                    loss += self.loss_dict[loss_key]
            """===== Back Propagate ====="""
            self.reset_grad()

            loss.backward()
            """===== Clip Large Gradient ====="""
            if self.train_config.clip_grad:
                if moving_max_grad == 0:
                    moving_max_grad = nn_utils.clip_grad_norm_(
                        self.G.parameters(), 1e+6)
                    max_grad = moving_max_grad
                else:
                    max_grad = nn_utils.clip_grad_norm_(
                        self.G.parameters(), 2 * moving_max_grad)
                    moving_max_grad = moving_max_grad * moving_grad_moment + max_grad * (
                        1 - moving_grad_moment)
            """===== Update Parameters ====="""
            self.G_optimizer.step()
            """===== Write Log and Tensorboard ====="""
            # stdout log
            if step % self.log_config.logging_step == 0:
                # reduce losses from GPUs
                if CONFIG.dist:
                    self.loss_dict = utils.reduce_tensor_dict(self.loss_dict,
                                                              mode='mean')
                    loss = utils.reduce_tensor(loss)
                # create logging information
                for loss_key in self.loss_dict.keys():
                    if self.loss_dict[loss_key] is not None:
                        log_info += loss_key.upper() + ": {:.4f}, ".format(
                            self.loss_dict[loss_key])

                self.logger.debug(
                    "Image tensor shape: {}. Trimap tensor shape: {}".format(
                        image.shape, trimap.shape))
                log_info = "[{}/{}], ".format(
                    step, self.train_config.total_step) + log_info
                log_info += "lr: {:6f}".format(cur_G_lr)
                self.logger.info(log_info)

                # tensorboard
                if step % self.log_config.tensorboard_step == 0 or step == start:  # and step > start:
                    self.tb_logger.scalar_summary('Loss', loss, step)

                    # detailed losses
                    for loss_key in self.loss_dict.keys():
                        if self.loss_dict[loss_key] is not None:
                            self.tb_logger.scalar_summary(
                                'Loss_' + loss_key.upper(),
                                self.loss_dict[loss_key], step)

                    self.tb_logger.scalar_summary('LearnRate', cur_G_lr, step)

                    if self.train_config.clip_grad:
                        self.tb_logger.scalar_summary('Moving_Max_Grad',
                                                      moving_max_grad, step)
                        self.tb_logger.scalar_summary('Max_Grad', max_grad,
                                                      step)
            """===== TEST ====="""
            if ((step % self.train_config.val_step) == 0 or step
                    == self.train_config.total_step):  # and step > start:
                self.G.eval()
                test_loss = 0
                log_info = ""

                self.test_loss_dict['mse'] = 0
                self.test_loss_dict['sad'] = 0
                for loss_key in self.loss_dict.keys():
                    if loss_key in self.test_loss_dict and self.loss_dict[
                            loss_key] is not None:
                        self.test_loss_dict[loss_key] = 0

                with torch.no_grad():
                    for image_dict in self.test_dataloader:
                        image, alpha, trimap, mask = image_dict[
                            'image'], image_dict['alpha'], image_dict[
                                'trimap'], image_dict['mask']
                        alpha_shape = image_dict['alpha_shape']
                        image = image.cuda()
                        alpha = alpha.cuda()
                        trimap = trimap.cuda()
                        mask = mask.cuda()

                        pred = self.G(image, mask)

                        alpha_pred_os1, alpha_pred_os4, alpha_pred_os8 = pred[
                            'alpha_os1'], pred['alpha_os4'], pred['alpha_os8']
                        alpha_pred = alpha_pred_os8.clone().detach()
                        weight_os4 = utils.get_unknown_tensor_from_pred(
                            alpha_pred,
                            rand_width=CONFIG.model.self_refine_width1,
                            train_mode=False)
                        alpha_pred[weight_os4 > 0] = alpha_pred_os4[
                            weight_os4 > 0]
                        weight_os1 = utils.get_unknown_tensor_from_pred(
                            alpha_pred,
                            rand_width=CONFIG.model.self_refine_width2,
                            train_mode=False)
                        alpha_pred[weight_os1 > 0] = alpha_pred_os1[
                            weight_os1 > 0]

                        h, w = alpha_shape
                        alpha_pred = alpha_pred[..., :h, :w]
                        trimap = trimap[..., :h, :w]

                        weight = utils.get_unknown_tensor(trimap)
                        weight[...] = 1

                        # value of MSE/SAD here is different from test.py and matlab version
                        self.test_loss_dict['mse'] += self.mse(
                            alpha_pred, alpha, weight)
                        self.test_loss_dict['sad'] += self.sad(
                            alpha_pred, alpha, weight)

                        if self.train_config.rec_weight > 0:
                            self.test_loss_dict['rec'] += self.regression_loss(alpha_pred, alpha, weight=weight) \
                                                          * self.train_config.rec_weight

                # reduce losses from GPUs
                if CONFIG.dist:
                    self.test_loss_dict = utils.reduce_tensor_dict(
                        self.test_loss_dict, mode='mean')
                """===== Write Log and Tensorboard ====="""
                # stdout log
                for loss_key in self.test_loss_dict.keys():
                    if self.test_loss_dict[loss_key] is not None:
                        self.test_loss_dict[loss_key] /= len(
                            self.test_dataloader)
                        # logging
                        log_info += loss_key.upper() + ": {:.4f} ".format(
                            self.test_loss_dict[loss_key])
                        self.tb_logger.scalar_summary(
                            'Loss_' + loss_key.upper(),
                            self.test_loss_dict[loss_key],
                            step,
                            phase='test')

                        if loss_key in ['rec']:
                            test_loss += self.test_loss_dict[loss_key]

                self.logger.info("TEST: LOSS: {:.4f} ".format(test_loss) +
                                 log_info)
                self.tb_logger.scalar_summary('Loss',
                                              test_loss,
                                              step,
                                              phase='test')

                # if self.model_config.trimap_channel == 3:
                #     trimap = trimap.argmax(dim=1, keepdim=True)
                # alpha_pred[trimap==2] = 1
                # alpha_pred[trimap==0] = 0
                image_set = {
                    'image':
                    (utils.normalize_image(image[-1, ...]).data.cpu().numpy() *
                     255).astype(np.uint8),
                    'mask':
                    (mask[-1, ...].data.cpu().numpy() * 255).astype(np.uint8),
                    'alpha':
                    (alpha[-1, ...].data.cpu().numpy() * 255).astype(np.uint8),
                    'alpha_pred': (alpha_pred[-1, ...].data.cpu().numpy() *
                                   255).astype(np.uint8)
                }

                self.tb_logger.image_summary(image_set, step, phase='test')
                """===== Save Model ====="""
                if (step % self.log_config.checkpoint_step == 0 or step == self.train_config.total_step) \
                        and CONFIG.local_rank == 0 and (step > start):
                    self.logger.info(
                        'Saving the trained models from step {}...'.format(
                            iter))
                    self.save_model("latest_model", step, loss)
                    if self.test_loss_dict['mse'] < self.best_loss:
                        self.best_loss = self.test_loss_dict['mse']
                        self.save_model("best_model", step, loss)

                torch.cuda.empty_cache()
Exemple #28
0
def pipeline(img,
             low,
             high,
             roi_percentile=85,
             focal_scope='global',
             maxima_areas='small',
             merge_type='blend',
             merge_alpha=0.5,
             filter_type='percentage',
             filter_percentage=15,
             filter_threshold=0.6):
    """
    Visualization of the whole workflow. Requires the original image and the high and low res CAMs to work. Performs
    the following steps:

    1. Applies a filter to blur the high-res map.
    2. Extracts the ROI from the low-res map through a percentile.
    3. Identifies the focal points of the low-res map by locating it's local maxima.
    4. Computes the gradient of the high-res map through a sobel filter.
    5. Draws a histogram of the gradient. Only considers areas corresponding to the ROI extracted from the low-res map.
    6. Calculates a 'lower' and 'upper' bound on the 25th and 75th percentile, respectively.
    7. Performs a region-growing segmentation algorithm on the gradient. The boundaries are the previous percentiles,
       while the focal points are set as the initial seeds (from where to start growing).
    8. Merges the result of the segmentation with the low-res map.
    9. Segments the original image according to the result of the previous merger.

    Note: it would be more efficient and elegant if I went for 'axes fraction' instead of 'data' for the coordinates
          of the ConnectionPatches, but it's too much of a hassle to change.

    :param img: Original RBG image, default shape=(224, 224, 3).
    :param low: Low-resolution CAM, default shape=(14, 14).
    :param high: High-resolution CAM, default shape=(224, 224).
    :param roi_percentile: Percentile based on which the ROI will be extracted. The default percentile=85 means that
                           the ROI will include the 15% highest-intensity pixels from the low-res map.
    :param focal_scope: The scope in which the focal points will be identified. 'global' looks for global maxima, while
                        'local' looks for local maxima. Accepted values: ['global', 'local']
    :param maxima_areas: Specifies the size of the focal points. Two options available: 'small' and 'large'.
    :param merge_type: Specifies the method of merging the high-res segment map with the low-res map.
                       Two methods available: 'blend' and 'multiply'. The first is a possibly weighted linear
                       combination of the two, while the second simply multiplies them.
    :param merge_alpha: If merge_type='blend', alpha regulates the importance of each of the two images (i.e. the low
                        and the high-res maps). Should be a float in [0, 1]. High values result in more influence from
                        the high-res map.
    :param filter_type: Specifies the method of segmenting the original image based on the combined CAM. Two methods are
                        available: 'percentage' and 'threshold'. The first keeps a percentage of the original image's
                        pixels while the second relies solely on the values of the combined CAM exceeding a threshold.
    :param filter_percentage: Selects the percentage of pixels to be included in the final segment. Only relevant if
                              filter_type='percentage'. Should be a number between 0 and 100.
    :param filter_threshold: Selects the threshold based on which the final segmentation will be performed. Only pixels
                             of the combined CAM that have an intensity greater than this threshold will be included.
                             Based on this mask, the original image will be segmented. Should be a float in [0, 1].
    """

    # Value checks

    # Categorical arguments
    if maxima_areas not in ('small', 'large'):
        raise ValueError(
            "available options for maxima_areas are: 'small' and 'large'.")

    if merge_type not in ('blend', 'multiply'):
        raise ValueError(
            "available options for merge_type are: 'blend' and 'multiply'.")

    if filter_type not in ('percentage', 'threshold'):
        raise ValueError(
            "vailable options for filter_type are: 'percentage' and 'threshold'."
        )

    # Percentage arguments
    if roi_percentile <= 0 or roi_percentile >= 100:
        raise ValueError('roi_percentile should be a percentage in (0, 100)')
    elif roi_percentile < 1:
        warnings.warn(
            'roi_percentile value in [0, 1). Should be defined as a percentage in (0, 100), '
            'e.g. If the desired percentage is 13%, pass 33 instead of 0.33!')

    if filter_percentage <= 0 or filter_percentage >= 100:
        raise ValueError(
            'filter_percentage should be a percentage in (0, 100)')
    elif filter_percentage < 1:
        warnings.warn(
            'filter_percentage value in [0, 1). Should be defined as a percentage in (0, 100), '
            'e.g. If the desired percentage is 13%, pass 33 instead of 0.33!')

    # Value arguments
    if merge_alpha < 0 or merge_alpha > 1:
        raise ValueError('merge_alpha should be a float in [0, 1]')

    if filter_threshold < 0 or filter_threshold > 1:
        raise ValueError('filter_threshold should be a float in [0, 1]')

    # Coordinates of the top/bottom/left/right/middle of the input image
    left = (0, img.shape[1] / 2)
    right = (img.shape[1], img.shape[1] / 2)
    bottom = (img.shape[1] / 2, img.shape[1])
    top = (img.shape[1] / 2, 0)
    midpoint = (img.shape[1] / 2, img.shape[1] / 2)

    # Create two 'blank' images for filling empty positions
    blank = np.ones(img[0].shape, dtype=np.uint8)
    half_blank = blank[::2]

    # Initialize 5x7 grid
    fig, ax = plt.subplots(5, 7, figsize=(16, 16))

    ##############################
    ######## First column ########
    ##############################

    # Fill first, second, fourth and fifth rows with blank images
    ax[0, 0].imshow(blank, alpha=0)
    ax[0, 0].axis('off')
    ax[1, 0].imshow(blank, alpha=0)
    ax[1, 0].axis('off')
    ax[3, 0].imshow(blank, alpha=0)
    ax[3, 0].axis('off')
    ax[4, 0].imshow(half_blank, alpha=0)
    ax[4, 0].axis('off')

    # Add original image to the third row
    ax[2, 0].imshow(img[0], zorder=3)
    ax[2, 0].axis('off')
    ax[2, 0].set_title('Original image', backgroundcolor='white', zorder=2)

    # Three crooked lines starting from the first row, represented by thirteen (!) connection patches
    # Connection of 'original image' to 'high-res map'
    con1a = ConnectionPatch(xyA=top,
                            xyB=midpoint,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 0],
                            axesB=ax[1, 0],
                            color='black',
                            lw=2,
                            zorder=1)
    con1b = ConnectionPatch(xyA=midpoint,
                            xyB=left,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[1, 0],
                            axesB=ax[1, 1],
                            color='black',
                            lw=2,
                            arrowstyle='->')

    # Connection of 'original image' to 'low-res map'
    con2a = ConnectionPatch(xyA=bottom,
                            xyB=midpoint,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 0],
                            axesB=ax[3, 0],
                            color='black',
                            lw=2)
    con2b = ConnectionPatch(xyA=midpoint,
                            xyB=left,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[3, 0],
                            axesB=ax[3, 1],
                            color='black',
                            lw=2,
                            arrowstyle='->')

    # Connection of 'original image' to 'result'
    con3b = ConnectionPatch(xyA=midpoint,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[1, 0],
                            axesB=ax[0, 0],
                            color='black',
                            lw=2)
    con3c = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 0],
                            axesB=ax[0, 1],
                            color='black',
                            lw=2)
    con3d = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 1],
                            axesB=ax[0, 2],
                            color='black',
                            lw=2)
    con3e = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 2],
                            axesB=ax[0, 3],
                            color='black',
                            lw=2)
    con3f = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 3],
                            axesB=ax[0, 4],
                            color='black',
                            lw=2)
    con3g = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 4],
                            axesB=ax[0, 5],
                            color='black',
                            lw=2)
    con3h = ConnectionPatch(xyA=bottom,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 5],
                            axesB=ax[0, 6],
                            color='black',
                            lw=2)
    con3i = ConnectionPatch(xyA=bottom,
                            xyB=midpoint,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[0, 6],
                            axesB=ax[1, 6],
                            color='black',
                            lw=2)
    con3k = ConnectionPatch(xyA=midpoint,
                            xyB=midpoint,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[1, 6],
                            axesB=ax[2, 6],
                            color='black',
                            lw=2)
    con3l = ConnectionPatch(xyA=midpoint,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 6],
                            axesB=ax[3, 6],
                            color='black',
                            lw=2,
                            arrowstyle='->',
                            zorder=1)

    # Add each patch to its respective axis
    ax[2, 0].add_artist(con1a)
    ax[1, 0].add_artist(con1b)

    ax[2, 0].add_artist(con2a)
    ax[3, 0].add_artist(con2b)

    ax[1, 0].add_artist(con3b)
    ax[0, 0].add_artist(con3c)
    ax[0, 1].add_artist(con3d)
    ax[0, 2].add_artist(con3e)
    ax[0, 3].add_artist(con3f)
    ax[0, 4].add_artist(con3g)
    ax[0, 5].add_artist(con3h)
    ax[0, 6].add_artist(con3i)
    ax[1, 6].add_artist(con3k)
    ax[2, 6].add_artist(con3l)

    ###############################
    ######## Second column ########
    ###############################

    # High-res map on the second line
    ax[1, 1].imshow(high)
    ax[1, 1].axis('off')
    ax[1, 1].set_title('High-res CAM')

    # Low-res map on the fourth line
    ax[3, 1].imshow(utils.resize(low), zorder=3)
    ax[3, 1].axis('off')
    ax[3, 1].set_title('Low-res CAM', backgroundcolor='white', zorder=2)

    # Fill the first, third and fifth lines with blank images
    ax[0, 1].imshow(blank, alpha=0)
    ax[0, 1].axis('off')
    ax[2, 1].imshow(blank, alpha=0)
    ax[2, 1].axis('off')
    ax[4, 1].imshow(half_blank, alpha=0)
    ax[4, 1].axis('off')

    # Four lines represented by eleven (!) connection patches
    # Connection of 'high-res map' to 'gradient'
    con4 = ConnectionPatch(xyA=right,
                           xyB=left,
                           coordsA='data',
                           coordsB='data',
                           axesA=ax[1, 1],
                           axesB=ax[1, 2],
                           color='black',
                           lw=2,
                           arrowstyle='->')

    # Connection of 'low-res map' to 'roi'
    con5a = ConnectionPatch(xyA=top,
                            xyB=midpoint,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[3, 1],
                            axesB=ax[2, 1],
                            color='black',
                            lw=2,
                            zorder=1)
    con5b = ConnectionPatch(xyA=midpoint,
                            xyB=left,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 1],
                            axesB=ax[2, 2],
                            color='black',
                            lw=2,
                            arrowstyle='->')

    # Connection of 'low-res map' to 'focal points'
    con6 = ConnectionPatch(xyA=right,
                           xyB=left,
                           coordsA='data',
                           coordsB='data',
                           axesA=ax[3, 1],
                           axesB=ax[3, 2],
                           color='black',
                           lw=2,
                           arrowstyle='->')

    # Connection of 'low-res map' to 'merger'
    con7a = ConnectionPatch(xyA=bottom,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[3, 1],
                            axesB=ax[4, 1],
                            color='black',
                            lw=2,
                            zorder=1)
    con7b = ConnectionPatch(xyA=top,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[4, 1],
                            axesB=ax[4, 2],
                            color='black',
                            lw=2,
                            zorder=1)
    con7c = ConnectionPatch(xyA=top,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[4, 2],
                            axesB=ax[4, 3],
                            color='black',
                            lw=2,
                            zorder=1)
    con7d = ConnectionPatch(xyA=top,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[4, 3],
                            axesB=ax[4, 4],
                            color='black',
                            lw=2,
                            zorder=1)
    con7e = ConnectionPatch(xyA=top,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[4, 4],
                            axesB=ax[4, 5],
                            color='black',
                            lw=2,
                            zorder=1)
    con7f = ConnectionPatch(xyA=top,
                            xyB=bottom,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[4, 5],
                            axesB=ax[3, 5],
                            color='black',
                            lw=2,
                            zorder=1,
                            arrowstyle='->')

    # Add the patches to their respective axes
    ax[1, 1].add_artist(con4)
    ax[3, 1].add_artist(con5a)
    ax[2, 1].add_artist(con5b)
    ax[3, 1].add_artist(con6)
    ax[3, 1].add_artist(con7a)
    ax[4, 1].add_artist(con7b)
    ax[4, 2].add_artist(con7c)
    ax[4, 3].add_artist(con7d)
    ax[4, 4].add_artist(con7e)
    ax[4, 5].add_artist(con7f)

    ##############################
    ######## Third column ########
    ##############################

    # High-res blur
    blurred = filters.blur(high)
    ax[1, 2].imshow(blurred)
    ax[1, 2].axis('off')
    ax[1, 2].set_title('Blurred')

    # Region of Interest
    roi = utils.resize(low) > utils.percentile(utils.resize(low),
                                               roi_percentile)
    a = ax[2, 2].imshow(roi)
    ax[2, 2].axis('off')
    ax[2, 2].set_title('Region of Interest')

    # Focal Points
    focal_points = maxima.find_focal_points(low,
                                            scope=focal_scope,
                                            maxima_areas=maxima_areas)
    bg, dots = a.get_cmap().colors[0], a.get_cmap().colors[-1]
    ax[3, 2].imshow(
        (blank.reshape(-1, 3) * bg).reshape(img.shape[1], img.shape[1], 3))
    ax[3, 2].scatter([x[0] for x in focal_points],
                     [x[1] for x in focal_points],
                     marker='x',
                     s=30,
                     c=dots)
    ax[3, 2].axis('off')
    ax[3, 2].set_title('Focal Points')

    # Fill first and fifth rows with blank images
    ax[0, 2].imshow(blank, alpha=0)
    ax[0, 2].axis('off')
    ax[4, 2].imshow(half_blank, alpha=0)
    ax[4, 2].axis('off')

    # Three lines represented by five connection patches
    con8 = ConnectionPatch(xyA=right,
                           xyB=left,
                           coordsA='data',
                           coordsB='data',
                           axesA=ax[1, 2],
                           axesB=ax[1, 3],
                           color='black',
                           lw=2,
                           arrowstyle='->')
    con9 = ConnectionPatch(xyA=right,
                           xyB=(0, 0.5),
                           coordsA='data',
                           coordsB='axes fraction',
                           axesA=ax[2, 2],
                           axesB=ax[2, 3],
                           color='black',
                           lw=2,
                           arrowstyle='->')
    con10a = ConnectionPatch(xyA=right,
                             xyB=midpoint,
                             coordsA='data',
                             coordsB='data',
                             axesA=ax[3, 2],
                             axesB=ax[3, 3],
                             color='black',
                             lw=2)
    con10b = ConnectionPatch(xyA=midpoint,
                             xyB=midpoint,
                             coordsA='data',
                             coordsB='data',
                             axesA=ax[3, 3],
                             axesB=ax[3, 4],
                             color='black',
                             lw=2)
    con10c = ConnectionPatch(xyA=midpoint,
                             xyB=left,
                             coordsA='data',
                             coordsB='data',
                             axesA=ax[3, 4],
                             axesB=ax[3, 5],
                             color='black',
                             lw=2,
                             arrowstyle='->')

    # Add the patches to their respective axes
    ax[1, 2].add_artist(con8)
    ax[2, 2].add_artist(con9)
    ax[3, 2].add_artist(con10a)
    ax[3, 3].add_artist(con10b)
    ax[3, 4].add_artist(con10c)

    ###############################
    ######## Fourth column ########
    ###############################

    # High-res edge detection
    grad = utils.normalize_image(filters.sobel(blurred))
    ax[1, 3].imshow(grad)
    ax[1, 3].axis('off')
    ax[1, 3].set_title('Edge detection')

    # Gradient percentiles
    roi_grad = grad[roi]
    lower = utils.percentile(roi_grad, 25)
    upper = utils.percentile(roi_grad, 75)
    ax[2, 3] = sns.distplot(roi_grad.ravel(), ax=ax[2, 3])
    ax[2, 3].plot([lower, lower], [0, 4], c='C1')
    ax[2, 3].plot([upper, upper], [0, 4], c='C1')
    ax[2, 3].text(lower,
                  -0.5,
                  'lower',
                  color='C1',
                  horizontalalignment='center')
    ax[2, 3].text(upper,
                  4.5,
                  'upper',
                  color='C1',
                  horizontalalignment='center')
    ax[2, 3].axis('off')
    ttl = ax[2, 3].set_title('Edge Histogram')
    ttl.set_bbox(dict(color='white', alpha=0.5, zorder=2))
    square_axes(
        ax[2,
           3])  # custom function that shrinks the axis object to a square box

    # Fill first, fourth and fifth rows
    ax[0, 3].imshow(blank, alpha=0)
    ax[0, 3].axis('off')
    ax[3, 3].imshow(blank, alpha=0)
    ax[3, 3].axis('off')
    ax[4, 3].imshow(half_blank, alpha=0)
    ax[4, 3].axis('off')

    # Three lines represented by four connection patches
    con11 = ConnectionPatch(xyA=bottom,
                            xyB=(0.5, 1),
                            coordsA='data',
                            coordsB='axes fraction',
                            axesA=ax[1, 3],
                            axesB=ax[2, 3],
                            color='black',
                            lw=2,
                            arrowstyle='->')
    con12a = ConnectionPatch(xyA=right,
                             xyB=midpoint,
                             coordsA='data',
                             coordsB='data',
                             axesA=ax[1, 3],
                             axesB=ax[1, 4],
                             color='black',
                             lw=2)
    con12b = ConnectionPatch(xyA=midpoint,
                             xyB=top,
                             coordsA='data',
                             coordsB='data',
                             axesA=ax[1, 4],
                             axesB=ax[2, 4],
                             color='black',
                             lw=2,
                             arrowstyle='->',
                             zorder=1)

    con13 = ConnectionPatch(xyA=(1, 0.5),
                            xyB=left,
                            coordsA='axes fraction',
                            coordsB='data',
                            axesA=ax[2, 3],
                            axesB=ax[2, 4],
                            color='black',
                            lw=2,
                            arrowstyle='->')

    # Add the patches to their respective axes
    ax[1, 3].add_artist(con11)
    ax[1, 3].add_artist(con12a)
    ax[1, 4].add_artist(con12b)
    ax[2, 3].add_artist(con13)

    ##############################
    ######## Fifth column ########
    ##############################

    # Region Growing Segmentation
    segm = segment.region_growing(grad,
                                  seeds=focal_points,
                                  lower=lower,
                                  upper=upper)
    ax[2, 4].imshow(segm, zorder=3)
    ax[2, 4].axis('off')
    ttl = ax[2, 4].set_title('Region Growing\nSegmentation')
    ttl.set_bbox(dict(color='white', alpha=0.5, zorder=2))

    # Fill first, second fourth and fifth rows
    ax[0, 4].imshow(blank, alpha=0)
    ax[0, 4].axis('off')
    ax[1, 4].imshow(blank, alpha=0)
    ax[1, 4].axis('off')
    ax[3, 4].imshow(blank, alpha=0)
    ax[3, 4].axis('off')
    ax[4, 4].imshow(half_blank, alpha=0)
    ax[4, 4].axis('off')

    # Just one connection! :)
    con14 = ConnectionPatch(xyA=right,
                            xyB=left,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 4],
                            axesB=ax[2, 5],
                            color='black',
                            lw=2,
                            arrowstyle='->')

    ax[2, 4].add_artist(con14)

    ##############################
    ######## Sixth column ########
    ##############################

    # Add edges and fill small holes
    edges = (grad >= upper).astype(float)
    roi_edges = edges * roi
    segm_with_edges = segm + roi_edges
    filled = maxima.remove_small_holes(segm_with_edges)
    ax[2, 5].imshow(filled)
    ax[2, 5].axis('off')
    ax[2, 5].set_title('Remove small holes')

    # High-Low merger
    merged = merge.merge_images(filled,
                                low,
                                method=merge_type,
                                alpha=merge_alpha)
    ax[3, 5].imshow(merged)
    ax[3, 5].axis('off')
    ttl = ax[3, 5].set_title('High-Low Merger')
    ttl.set_bbox(dict(color='white', alpha=0.5, zorder=2))

    # Fill remaining rows
    ax[0, 5].imshow(blank, alpha=0)
    ax[0, 5].axis('off')
    ax[1, 5].imshow(blank, alpha=0)
    ax[1, 5].axis('off')
    ax[3, 5].imshow(blank, alpha=0)
    ax[3, 5].axis('off')
    ax[4, 5].imshow(half_blank, alpha=0)
    ax[4, 5].axis('off')

    # Last connection patches...
    con15 = ConnectionPatch(xyA=bottom,
                            xyB=top,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[2, 5],
                            axesB=ax[3, 5],
                            color='black',
                            lw=2,
                            zorder=-1,
                            arrowstyle='->')
    con16 = ConnectionPatch(xyA=right,
                            xyB=left,
                            coordsA='data',
                            coordsB='data',
                            axesA=ax[3, 5],
                            axesB=ax[3, 6],
                            color='black',
                            lw=2,
                            zorder=-1,
                            arrowstyle='->')

    ax[2, 5].add_artist(con15)
    ax[3, 5].add_artist(con16)

    ################################
    ######## Seventh column ########
    ################################

    # Result
    if filter_type == 'percentage':
        result = merge.keep_percentage(img,
                                       merged,
                                       percentage=filter_percentage / 100)
    else:
        result = merge.filter_image(img, merged, threshold=filter_threshold)
    ax[3, 6].imshow(result, zorder=3)
    ax[3, 6].axis('off')
    ttl = ax[3, 6].set_title('Result')
    ttl.set_bbox(dict(color='white', alpha=0.5, zorder=2))

    # Fill remaining rows
    ax[0, 6].imshow(blank, alpha=0)
    ax[0, 6].axis('off')
    ax[1, 6].imshow(blank, alpha=0)
    ax[1, 6].axis('off')
    ax[2, 6].imshow(blank, alpha=0)
    ax[2, 6].axis('off')
    ax[4, 6].imshow(half_blank, alpha=0)
    ax[4, 6].axis('off')
Exemple #29
0
def eval_superpixel():
     # parse arg and start experiment
    global args
    args = arg_parser.parse_args()
    args.config_of_data = config.datasets[args.data]
    args.num_classes = config.datasets[args.data]['num_classes']
    if configure is None:
        args.tensorboard = False
        print(Fore.RED +
              'WARNING: you don\'t have tesnorboard_logger installed' +
              Fore.RESET)

    model = getModel(**vars(args))
    saved_checkpoint = torch.load("./saved_checkpoints/cifar10+-resnet-56/model_best.pth.tar")
    model.load_state_dict(saved_checkpoint['state_dict'])
    
    model.eval()

    # get test images
    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=2)

    count = 0
    for images, labels in test_loader: 

        count +=1

        if count > 5:
            break
        # show images
       # imshow(torchvision.utils.make_grid(images), ' '.join('%5s' % classes[labels[j]] for j in range(1)))
        
        if use_cuda == True:
            images, labels = images.cuda(), labels.cuda()

        images, labels = Variable(images, volatile=True), Variable(labels)

        org_img = images[0]

        org_img = org_img.type(torch.FloatTensor).data
        org_img = org_img.numpy()
        img = org_img.transpose( 1, 2, 0 )
        img -= img.min()
        img /= img.max()
        img *= 255
        img = img.astype(np.uint8)
       
        # cv2.imshow('org_img_index{}_label_{}.png'.format(count, labels[0].cpu().data.numpy()[0]), img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
   
        if count == 5:

            cv2.imwrite('original_img_index{}_label_{}.png'.format(count, labels[0].cpu().data.numpy()[0]), img)

            segments = felzenszwalb(img_as_float(img), scale=100, sigma=0.5, min_size=10)
            
            print("Felzenszwalb number of segments: {}".format(len(np.unique(segments))))
            

            # cv2.imshow('superpixels', mark_boundaries(img_as_float(img), segments))
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            output = model(images)
            pred = output.data.max(1, keepdim=True)[1]
            
       
            correct_pred_count = 0
            wrong_pred_count = 0
            for i in range(1000):               
                random_sampled_list= random.sample(range(np.unique(segments)[0], np.unique(segments)[-1]), 5)
               
                mask = np.zeros(img.shape[:2], dtype= "uint8")
                mask.fill(255)
                for (j, segVal) in enumerate(random_sampled_list):
                    mask[segments == segVal] = 0
                    

                masked_img = org_img * mask
                
                masked_img -= masked_img.min()
                masked_img /= masked_img.max()
                masked_img *= 255
                masked_img = normalize_image(masked_img)

                masked_img_batch = masked_img[None, :, :, :]

            
                masked_img_tensor = Variable(torch.from_numpy(masked_img_batch)).cuda()
                mask_output = model(masked_img_tensor)
                
                pred_mask = mask_output.data.max(1, keepdim=True)[1]
               
                print("pred_mask[0]", pred_mask[0].cpu().numpy()[0])

                if pred_mask[0].cpu().numpy()[0] == labels[0].cpu().data.numpy()[0]:
                    correct_pred_count+=1
                    print("correct_pred_count: ", correct_pred_count)
                    cv2.imwrite('./masks/mask_{}_{}.png'.format(i, 1), mask)
                    cv2.imwrite('./mask_on_img/masked_imgs_{}.png'.format(i), masked_img.transpose(1, 2, 0))
                else:
                    wrong_pred_count+=1
                    print("wrong_pred_count: ", wrong_pred_count)
                    cv2.imwrite('./masks/mask_{}_{}.png'.format(i, 0), mask)
                    cv2.imwrite('./mask_on_img/masked_imgs_{}.png'.format(i), masked_img.transpose(1, 2, 0))
Exemple #30
0
def pipeline(img,
             low,
             high,
             roi_percentile=85,
             focal_scope='global',
             maxima_areas='small',
             merge_type='blend',
             merge_alpha=0.5,
             filter_type='percentage',
             filter_percentage=15,
             filter_threshold=0.6):
    """
    The whole postprocessing pipeline, returning step-by-step results.

    In detail the postprocessing pipeline involves the following steps:

    1. Applies a filter to blur the high-res map.
    2. Extracts the ROI from the low-res map through a percentile.
    3. Identifies the focal points of the low-res map by locating it's local maxima.
    4. Computes the gradient of the high-res map through a sobel filter.
    5. Draws a histogram of the gradient. Only considers areas corresponding to the ROI extracted from the low-res map.
    6. Calculates a 'lower' and 'upper' bound on the 25th and 75th percentile, respectively.
    7. Performs a region-growing segmentation algorithm on the gradient. The boundaries are the previous percentiles,
       while the focal points are set as the initial seeds (from where to start growing).
    8. Merges the result of the segmentation with the low-res map.
    9. Segments the original image according to the result of the previous merger.

    :param img: The original image (numpy.ndarray).
    :param low: The low-resolution Class Activation Map (numpy.ndarray).
    :param high: The high-resolution Class Activation Map (numpy.ndarray).
    :param roi_percentile: The percentile above which the ROI will be estimated. roi_percentile=85 means that the 15%
                           highest intensity pixels of the low-res map will constitute the ROI (int in (0, 100)).
    :param focal_scope: The scope in which the focal points will be identified. 'global' looks for global maxima, while
                        'local' looks for local maxima. Accepted values: ['global', 'local']
    :param maxima_areas: Can either be 'large' or 'small', depending on whether or not we want larger or smaller areas.
                         Only relevant for 'local' scopes. Accepted values: ['global', 'local']
    :param merge_type: Selection on whether to multiply or blend the high with the low-res CAMs after processing.
                       Accepted values: ['blend', 'merge']
    :param merge_alpha: Parameter for the blend merge method. Higher values result in more influence from the high-res
                        map. Should be a float in [0, 1].
    :param filter_type: Selects how to crop the original image according to the refined CAM. Two options are available:
                        - 'percentage', which keeps a percentage of the highest-instensity values of the refined CAM
                        - 'threshold', which keeps the intensities above a certain threshold
    :param filter_percentage: A float representing the percentage of pixels to be kept (should be in [0, 1]). Only
                              relevant when filter_type='percentage'
    :param filter_threshold: A float in [0, 1] over which the intensities of the refined CAM will be kept. Only relevant
                             when filter_type='threshold'
    :return: A dictionary with all intermediate results from the postprocessing pipeline. In detail:
             - 'blurred': The blurred high-res CAM.
             - 'low': The original low-res CAM.
             - 'low_resized': The resized low-res CAM (through bilinear interpolation).
             - 'edges': The result of the sobel filter on the blurred high-res map.
             - 'roi': The Region Of Interest extracted from the low-res map.
             - 'bounds': The lower and upper bounds for the region-growing segmentation.
             - 'focal_points': The focal_points extracted from the low-res map.
             - 'segmented': The output of the region-growing segmentation.
             - 'full_segment': The filled segmentation.
             - 'merged': The merger of the segmentation with the low-res map, i.e. the refined CAM.
             - 'result': The part of the original image that has been cropped according to the regined CAM.
    """

    # High-res processing
    blurred = filters.blur(high)
    grad = utils.normalize_image(filters.sobel(blurred))

    # Low-res processing
    roi = utils.resize(low) > utils.percentile(utils.resize(low),
                                               roi_percentile)
    upper = utils.percentile(grad[roi], 75)
    lower = utils.percentile(grad[roi], 25)
    focal_points = maxima.find_focal_points(low,
                                            scope=focal_scope,
                                            maxima_areas=maxima_areas)

    # Region growing segmentation
    segm = segment.region_growing(grad,
                                  seeds=focal_points,
                                  lower=lower,
                                  upper=upper)

    # Segment processing
    edges = (grad >= upper).astype(float)
    roi_edges = edges * roi
    segm_with_edges = segm + roi_edges
    filled = maxima.remove_small_holes(segm_with_edges)

    # Merger
    merged = merge.merge_images(filled,
                                low,
                                method=merge_type,
                                alpha=merge_alpha)

    if filter_type == 'percentage':
        result = merge.keep_percentage(img,
                                       merged,
                                       percentage=filter_percentage / 100)
    elif filter_type == 'threshold':
        result = merge.filter_image(img, merged, threshold=filter_threshold)

    return {
        'blurred': blurred,
        'low': low,
        'low_resized': utils.resize(low),
        'edges': grad,
        'roi': roi,
        'bounds': (lower, upper),
        'focal_points': focal_points,
        'segmented': segm,
        'full_segment': filled,
        'merged': merged,
        'result': result
    }
Exemple #31
0
tex.set_filter_mode(driver.filter_mode.LINEAR)
tex.set_address_mode(0, driver.address_mode.CLAMP)
tex.set_address_mode(1, driver.address_mode.CLAMP)
driver.matrix_to_texref(prep_image, tex, order="C")

bilinear_interpolation_kernel(driver.Out(result),
                              driver.In(x_out),
                              driver.In(y_out),
                              np.int32(M1),
                              np.int32(N1),
                              np.int32(M2),
                              np.int32(N2),
                              block=block,
                              grid=grid,
                              texrefs=[tex])
big_image = normalize_image(result, image.shape[2])
stop.record()
stop.synchronize()
gpu_time = stop.time_since(start)
print("Время интерполяции на ГПУ: %.3f ms" % (gpu_time))
cv2.imwrite("./data/big-gpu-seal.jpg", big_image.astype(np.uint8))

#p_image = prepare_image(image)

print("Считаем на ЦПУ...")
start = timeit.default_timer()
cpu_result = bilinear(image)
cpu_time = timeit.default_timer() - start
print("Время интерполяции на ЦПУ: %.3f ms" % (cpu_time * 1e3))

#big_cpu_image = normalize_image(cpu_result, image.shape[2])