def main():
    ctx = get_extension_context('cudnn', device_id=args.gpus)
    nn.set_default_context(ctx)
    image_left = imread(args.left_image)
    image_right = imread(args.right_image)

    if args.dataset == 'Kitti':
        var_left = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt))
        var_right = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt))
        img_left, img_right = preprocess_kitti(image_left, image_right)
    elif args.dataset == 'SceneFlow':
        var_left = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf))
        var_right = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf))
        img_left, img_right = preprocess_sceneflow(image_left, image_right)

    var_left.d, var_right.d = img_left, img_right
    if args.loadmodel is not None:
        # Loading CNN pretrained parameters.
        nn.load_parameters(args.loadmodel)
    pred_test = psm_net(var_left, var_right, args.maxdisp, False)
    pred_test.forward(clear_buffer=True)
    pred = pred_test.d
    pred = np.squeeze(pred, axis=1)
    pred = pred[0]
    pred = 2*(pred - np.min(pred))/np.ptp(pred)-1
    scipy.misc.imsave('stereo_depth.png', pred)

    print("Done")
Exemple #2
0
    def _get_data(self, i):
        image_idx = self._indexes[i]
        label = 0 if self.labels is None else self.labels[i]

        # keep data paths
        if self.data_history.full():
            self.data_history.get()
        self.data_history.put(self.img_paths[image_idx])

        if self.on_memory and self.images[image_idx] is not None:
            return (self.images[image_idx], label)

        if self.fix_aspect_ratio:
            # perform resize and center crop to keep original aspect ratio.
            img = imread(self.img_paths[image_idx],
                         channel_first=True,
                         num_channels=3)
            img = resize_ccrop(img, self.im_size[0], channel_first=True)
        else:
            # Breaking original aspect ratio, forcely resize image to self.im_size.
            img = imread(self.img_paths[image_idx],
                         channel_first=True,
                         size=self.im_size,
                         num_channels=3)

        if self.on_memory:
            self.images[image_idx] = img

        return (img, label)
Exemple #3
0
def main():
    ctx = get_extension_context('cudnn', device_id=args.gpus)
    nn.set_default_context(ctx)
    image_left = imread(args.left_image)
    image_right = imread(args.right_image)

    if args.dataset == 'Kitti':
        var_left = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt))
        var_right = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt))
        img_left, img_right = preprocess_kitti(image_left, image_right)
    elif args.dataset == 'SceneFlow':
        var_left = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf))
        var_right = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf))
        img_left, img_right = preprocess_sceneflow(image_left, image_right)

    var_left.d, var_right.d = img_left, img_right
    if args.loadmodel is not None:
        # Loading CNN pretrained parameters.
        nn.load_parameters(args.loadmodel)
    pred_test = psm_net(var_left, var_right, args.maxdisp, False)
    pred_test.forward(clear_buffer=True)
    pred = pred_test.d
    pred = np.squeeze(pred, axis=1)
    pred = pred[0]
    pred = 2 * (pred - np.min(pred)) / np.ptp(pred) - 1
    #imsave('stereo_depth.png', (pred + 1) * 0.5))
    #imsave('stereo_depth.png', pred)
    #scipy.misc.imsave('stereo_depth.png', pred)
    scipy.misc.imsave('stereo_depth.png', pred)

    print("Done")

    # Save NNP file (used in C++ inference later.).
    if args.save_nnp:
        runtime_contents = {
            'networks': [{
                'name': 'runtime',
                'batch_size': 1,
                'outputs': {
                    'y0': pred_test
                },
                'names': {
                    'x0': var_left,
                    'x1': var_right
                }
            }],
            'executors': [{
                'name': 'runtime',
                'network': 'runtime',
                'data': ['x0', 'x1'],
                'output': ['y0']
            }]
        }
        import nnabla.utils.save
        nnabla.utils.save.save(args.nnp, runtime_contents)
Exemple #4
0
def compute_lpips_of_paired_images(lpips, img0_path, img1_path, params_dir, model):
    img0 = imread(img0_path, channel_first=True)
    # normalize. value range should be in [-1., +1.].
    img0 = (img0 / (255. / 2)) - 1
    img0 = F.reshape(nn.Variable.from_numpy_array(img0), (1,)+img0.shape)

    img1 = imread(img1_path, channel_first=True)
    # normalize. value range should be in [-1., +1.].
    img1 = (img1 / (255. / 2)) - 1
    img1 = F.reshape(nn.Variable.from_numpy_array(img1), (1,)+img1.shape)

    lpips_val = lpips(img0, img1, mean_batch=True)
    lpips_val.forward()

    return lpips_val
Exemple #5
0
def encode_and_write_to_path_files(filename, data_dir, ti, tl):
    '''
    Calling encode_label for each label and writing image and label paths to path files
    '''
    train_f = open(filename, 'r')
    label_path = data_dir + 'parts_lfw_funneled_gt_images/'
    image_path = data_dir + 'lfw_funneled/'
    for line in train_f:
        words = line.split(' ')
        prefix = get_prefix(words[1])

        if os.path.isdir(image_path + words[0] + '/'):
            ti.write(image_path + words[0] + '/' + words[0] + prefix +
                     str(int(words[1])) + '.jpg' + '\n')
            assert (
                os.path.isfile(label_path + words[0] + prefix +
                               str(int(words[1])) + '.ppm')
            ), "No matching label file for image : " + words[0] + prefix + str(
                int(words[1])) + '.jpg'
            label = utils.imread(label_path + words[0] + prefix +
                                 str(int(words[1])) + '.ppm')
            label = encode_label(label)
            np.save(
                label_path + 'encoded/' + words[0] + prefix +
                str(int(words[1])) + '.npy', label)
            tl.write(label_path + 'encoded/' + words[0] + prefix +
                     str(int(words[1])) + '.npy' + '\n')
Exemple #6
0
def load_image(path):
    from nnabla.utils.image_utils import imread
    cimg = crop_center_image(imread(path, size=(256, 256)), (224, 224))
    pimg = cimg[..., ::-1].transpose(2, 0, 1)[None]  # BGR and NCHW
    mean = np.array([104, 117, 123], dtype=np.float32).reshape(1, 3, 1, 1)
    pimg = pimg - mean
    return pimg
Exemple #7
0
    def _load_dtumvs(self, path):
        # Images
        image_files = sorted(glob.glob(os.path.join(path, "image", "*")))
        images = np.asarray([image_utils.imread(f) for f in image_files])
        images = images * (1.0 / 127.5) - 1.0

        # Masks
        mask_files = sorted(glob.glob(os.path.join(path, "mask", "*")))
        masks = np.asarray([
            imageio.imread(f, as_gray=True)[:, :, np.newaxis] > 127.5
            for f in mask_files
        ]) * 1.0

        # Camera projection matrix and scale matrix for special correctness
        cameras = np.load(os.path.join(path, "cameras.npz"))
        world_mats = [
            cameras['world_mat_%d' % idx].astype(np.float32)
            for idx in range(len(images))
        ]
        scale_mats = [
            cameras['scale_mat_%d' % idx].astype(np.float32)
            for idx in range(len(images))
        ]

        intrinsics, poses = [], []
        for W, S in zip(world_mats, scale_mats):
            P = W @ S
            P = P[:3, :4]
            intrinsic, pose = load_K_Rt_from_P(P)
            intrinsics.append(intrinsic[:3, :3])
            poses.append(pose)

        # return images[0:1, ...], masks[0:1, ...], np.asarray(intrinsics)[0:1, ...], np.asarray(poses)[0:1, ...]
        return images, masks, np.asarray(intrinsics), np.asarray(poses)
Exemple #8
0
def palette_png_reader(fname):
    '''
    '''
    assert 'PilBackend' in nn.utils.image_utils.get_available_backends()
    if nn.utils.image_utils.get_backend() != 'PilBackend':
        nn.utils.image_utils.set_backend("PilBackEnd")
    return imread(fname, return_palette_indices=True)
 def read_image_function():
     return image_utils.imread(img_file,
                               grayscale=grayscale,
                               size=size,
                               channel_first=channel_first,
                               as_uint16=as_uint16,
                               num_channels=num_channels)
    def __init__(self, width, height, padding, train=True, shuffle=False, rng=None):
        super(Caltech101DataSource, self).__init__(shuffle=shuffle, rng=rng)
        data_uri = "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz"
        logger.info('Getting labeled data from {}.'.format(data_uri))
        r = download(data_uri)  # file object returned
        label_dict = dict()
        with tarfile.open(fileobj=r, mode="r:gz") as fpin:
            images = []
            labels = []
            for name in fpin.getnames():
                if ".jpg" not in name or "Google" in name:
                    continue
                label, filename = name.split("/")[-2:]
                if label not in label_dict:
                    label_dict[label] = len(label_dict)
                im = imread(fpin.extractfile(name), num_channels=3)
                arranged_images = self._resize_image(
                    im, width, height, padding)
                images.append(arranged_images)
                labels.append(label_dict[label])
            self._size = len(images)
            self._images = np.array(images)
            self._labels = np.array(labels).reshape(-1, 1)
        r.close()
        logger.info('Getting labeled data from {}.'.format(data_uri))

        self._size = self._labels.size
        self._variables = ('x', 'y')
        if rng is None:
            rng = np.random.RandomState(313)
        self.rng = rng
        self._indexes = rng.permutation(self._size)
def read_video(name, frame_shape):
    """
        note that this function assumes that data (images or a video)
        is stored as RGB format.
    """

    if os.path.isdir(name):
        frames = sorted(os.listdir(name))
        num_frames = len(frames)
        video_array = np.array([
            imread(os.path.join(name, frames[idx])) / 255.
            for idx in range(num_frames)
        ])

    elif name.lower().endswith('.gif') or name.lower().endswith(
            '.mp4') or name.lower().endswith('.mov'):
        video = np.array(
            mimread(name, memtest=False, size=tuple(frame_shape[:2])))
        if video.shape[-1] == 4:
            video = video[..., :3]
        video_array = video / 255.
    else:
        raise Exception("Unknown file extensions  %s" % name)

    return video_array
Exemple #12
0
 def load_func(i):
     cx = 89
     cy = 121
     img = imread(imgs[i])
     img = img[cy - 64:cy + 64, cx - 64:cx + 64, :].transpose(2, 0,
                                                              1) / 255.
     img = img * 2. - 1.
     return img, None
Exemple #13
0
 def load_func(i):
     cx = 89
     cy = 121
     img = imread(imgs[i], num_channels=3)
     img = img[cy - 64:cy + 64, cx - 64:cx + 64, :].transpose(2, 0,
                                                              1) / 255.
     img = img * 2. - 1.
     return img, np.array([])
def get_sliced_images(filenames, resize=True):
    xs = []
    for filename in filenames:
        x = imread(filename)
        x = x[45:173, 25:153, :]
        if resize:
            x = imresize(x, size=(64, 64), interpolate='lanczos')
        xs.append(x)
    return xs
Exemple #15
0
def load_function(image_path, label_path, load_shape, crop_shape):
    # naive implementation of loading image.
    _load_shape = (load_shape[1], load_shape[0])
    image = imread(image_path, size=_load_shape,
                   interpolate="bicubic", channel_first=True, num_channels=3)
    label_map = imread(label_path, size=_load_shape, interpolate="nearest")

    if load_shape != crop_shape:
        pos_y = np.random.randint(0, max(0, load_shape[0] - crop_shape[0]))
        pos_x = np.random.randint(0, max(0, load_shape[1] - crop_shape[1]))

        image = _crop(image, (pos_y, pos_x), crop_shape)
        label_map = _crop(label_map, (pos_y, pos_x), crop_shape)

    # normalize
    image = (image - 127.5) / 127.5  # -> [-1, 1]

    return image, label_map
def load_function(image_path, inst_path, label_path, image_shape):
    # naive image read implementation
    image = imread(image_path, channel_first=True)

    inst_map = imread(inst_path, as_uint16=True)

    label_map = imread(label_path)

    if image.shape[1:] != image_shape:
        # imresize takes (width, height) as shape.
        resize_shape = (image_shape[1], image_shape[0])
        image = imresize(image, resize_shape, channel_first=True)
        inst_map = imresize(inst_map, resize_shape)
        label_map = imresize(label_map, resize_shape)

    # normalize
    image = (image - 127.5) / 127.5  # -> [-1, 1]

    return image, inst_map, label_map
def load_image_imread(file, shape=None, max_range=1.0):
    '''
    Load image from file like object.

    :param file: Image contents
    :type file: file like object.
    :param shape: shape of output array
        e.g. (3, 128, 192) : n_color, height, width.
    :type shape: tuple of int
    :param float max_range: the value of return array ranges from 0 to `max_range`.

    :return: numpy array

    '''
    orig_img = imread(
        file
    )  # return value is from zero to 255 (even if the image has 16-bitdepth.)

    if len(orig_img.shape) == 2:  # gray image
        height, width = orig_img.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, 1
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == 1)
        if out_height != height or out_width != width:
            # imresize returns 0 to 255 image.
            orig_img = imresize(orig_img, (out_height, out_width))
        orig_img = orig_img.reshape((out_n_color, out_height, out_width))
    elif len(orig_img.shape) == 3:  # RGB image
        height, width, n_color = orig_img.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, n_color
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == n_color)
        if out_height != height or out_width != width or out_n_color != n_color:
            # imresize returns 0 to 255 image.
            orig_img = imresize(orig_img, (out_height, out_width, out_n_color))
        orig_img = orig_img.transpose(2, 0, 1)

    if max_range < 0:
        return orig_img
    else:
        # 16bit depth
        if orig_img.dtype == 'uint16':
            if max_range == 65535.0:
                return orig_img
            return orig_img * (max_range / 65535.0)
        # 8bit depth (default)
        else:
            if max_range == 255.0:
                return orig_img
            return orig_img * (max_range / 255.0)
    def _get_data(self, position):
        idx = self._indexes[position]

        if self.is_train and self.id_sampling:
            name = self.videos[idx]
            path = np.random.choice(
                glob.glob(os.path.join(self.root_dir, name + '*.mp4')))
            path = str(path)
        else:
            name = self.videos[idx]
            path = os.path.join(self.root_dir, name)

        if self.is_train and os.path.isdir(path):
            frames = os.listdir(path)
            num_frames = len(frames)
            frame_idx = np.sort(
                np.random.choice(num_frames, replace=True, size=2))
            video_array = [
                imread(os.path.join(path, frames[idx])) / 255.0
                for idx in frame_idx
            ]

        else:
            video_array = read_video(path, frame_shape=self.frame_shape)
            num_frames = len(video_array)
            if self.is_train:
                frame_idx = np.sort(
                    np.random.choice(num_frames, replace=True, size=2))
            else:
                frame_idx = range(num_frames)
            video_array = video_array[frame_idx]

        if self.transform is not None:
            if random.random() < 0.5:
                video_array = video_array[::-1]
            if random.random() < 0.5:
                video_array = [np.fliplr(img) for img in video_array]

        out = {}
        if self.is_train:
            source = np.array(video_array[0], dtype='float32')
            driving = np.array(video_array[1], dtype='float32')

            out['driving'] = driving.transpose((2, 0, 1))
            out['source'] = source.transpose((2, 0, 1))
        else:
            video = np.array(video_array, dtype='float32')
            out['video'] = video.transpose((3, 0, 1, 2))

        if self.is_train:
            return out["driving"], out["source"]
        else:
            return out["video"], out["name"]
def load_cyclegan_dataset(dataset="horse2zebra",
                          train=True,
                          domain="A",
                          normalize_method=lambda x: (x - 127.5) / 127.5):
    '''
    Load CycleGAN dataset from `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_ 

    This function assumes that there are two domains in the dataset.

    Args:
        dataset (str): Dataset name excluding ".zip" extension, which you can find that `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_.
        train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images.
        domain (str): Domain name. It must be "A" or "B".
        normalize_method: Function of how to normalize an image.
    Returns:
        (np.ndarray, list): Images and filenames.

    '''
    assert domain in ["A", "B"]

    image_uri = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{}.zip'.format(
        dataset)
    logger.info('Getting {} data from {}.'.format(dataset, image_uri))
    r = download(image_uri)

    # Load unpaired images from zipfile.
    with zipfile.ZipFile(r, "r") as zf:
        images = []
        filename_list = []
        dirname = "{}{}".format("train" if train else "test", domain)

        # filter images by name
        zipinfos = filter(
            lambda zinfo: dirname in zinfo.filename and ".jpg" in zinfo.
            filename, zf.infolist())
        for zipinfo in zipinfos:
            with zf.open(zipinfo.filename, "r") as fp:
                # filename
                filename = zipinfo.filename
                logger.info('loading {}'.format(filename))

                # load image
                image = imread(fp)
                image = np.transpose(image, (2, 0, 1))
                image = normalize_method(image)
                image_name, ext = os.path.splitext(filename.split("/")[-1])
                images.append(image)
                filename_list.append(image_name)
    r.close()
    logger.info('Getting image data done.')
    return np.asarray(images), filename_list
Exemple #20
0
def get_croped_image(annotation, data_dir, margin=np.random.uniform(0, 0.15)):
    NUM_POINTS = 98
    img_name = annotation[-1].rsplit(os.linesep)[0]
    landmarks = [float(_) for _ in annotation[:NUM_POINTS * 2]]
    y1, x1, y2, x2 = [int(_) for _ in annotation[NUM_POINTS * 2:-7]]
    y_list = [int(float(_)) for _ in landmarks[0::2]]
    x_list = [int(float(_)) for _ in landmarks[1::2]]
    y_center = y_list[54]
    x_center = x_list[54]

    y_diff = max(y2 - y_center, y_center - y1)
    x_diff = max(x2 - x_center, x_center - x1)

    y1 = y_center - int((1 + margin) * y_diff)
    x1 = x_center - int((1 + margin) * x_diff)
    y2 = y_center + int((1 + margin) * y_diff)
    x2 = x_center + int((1 + margin) * x_diff)

    y1, x1, y2, x2 = get_square_corners(y1, x1, y2, x2)

    img = imread(os.path.join(data_dir, img_name), channel_first=True)
    H, W = img.shape[1:]

    # just in case that the corner lies outside the image, apply padding.
    if x1 < 0:
        img = np.concatenate([img[:, ::-1, :], img], axis=1)
        x1 += H
        x2 += H
        x_list = [_ + H for _ in x_list]
        H += H

    if y1 < 0:
        img = np.concatenate([img[:, :, ::-1], img], axis=2)
        y1 += W
        y2 += W
        y_list = [_ + W for _ in y_list]
        W += W

    if x2 > H:
        img = np.concatenate([img, img[:, ::-1, :]], axis=1)

    if y2 > W:
        img = np.concatenate([img, img[:, :, ::-1]], axis=2)

    img = img[:, x1:x2, y1:y2]
    y_list = [_ - y1 for _ in y_list]
    x_list = [_ - x1 for _ in x_list]
    return img_name, img, y_list, x_list
Exemple #21
0
def read_image_with_preprocess(path, channel_last=False, channels=3):
    assert channels in (3, 4)
    from nnabla.utils.image_utils import imread
    H, W = 256, 256
    h, w = 224, 224
    image = imread(path, num_channels=3, size=(W, H))
    image = crop_center_image(image, (h, w))
    image = normalize_uint8_image(image)
    if channels == 4:
        shape = list(image.shape)
        image = np.pad(image, ((0, 0), (0, 0), (0, 1)),
                       mode='constant',
                       constant_values=0)
    if not channel_last:
        image = np.transpose(image, (2, 0, 1))
    return image[None]  # Add batch dimension
Exemple #22
0
def load_image_imread(file, shape=None, max_range=1.0):
    '''
    Load image from file like object.

    :param file: Image contents
    :type file: file like object.
    :param shape: shape of output array
        e.g. (3, 128, 192) : n_color, height, width.
    :type shape: tuple of int
    :param float max_range: the value of return array ranges from 0 to `max_range`.

    :return: numpy array

    '''
    img255 = imread(
        file
    )  # return value is from zero to 255 (even if the image has 16-bitdepth.)

    if len(img255.shape) == 2:  # gray image
        height, width = img255.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, 1
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == 1)
        if out_height != height or out_width != width:
            # imresize returns 0 to 255 image.
            img255 = imresize(img255, (out_height, out_width))
        img255 = img255.reshape((out_n_color, out_height, out_width))
    elif len(img255.shape) == 3:  # RGB image
        height, width, n_color = img255.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, n_color
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == n_color)
        if out_height != height or out_width != width or out_n_color != n_color:
            # imresize returns 0 to 255 image.
            img255 = imresize(img255, (out_height, out_width, out_n_color))
        img255 = img255.transpose(2, 0, 1)

    if max_range < 0 or max_range == 255.0:
        return img255
    else:
        return img255 * (max_range / 255.0)
Exemple #23
0
def test_examples_cpp_mnist_runtime(tmpdir, nnabla_examples_root, batch_size):
    pytest.skip('Temporarily skip due to mnist training data server trouble.')
    nn.clear_parameters()

    # A. Check this test can run
    if not nnabla_examples_root.available:
        pytest.skip('`nnabla-examples` can not be found.')

    if not command_exists('mnist_runtime'):
        pytest.skip('An executable `mnist_runtime` is not in path.')

    tmpdir.chdir()

    # B. Run mnist training.
    script = os.path.join(nnabla_examples_root.path,
                          'image-classification/mnist-collection',
                          'classification.py')
    check_call(['python', script, '-i', '100'])

    # C. Get mnist_runtime results.
    nnp_file = tmpdir.join('tmp.monitor', 'lenet_result.nnp').strpath
    assert os.path.isfile(nnp_file)
    pgm_file = os.path.join(os.path.dirname(__file__),
                            '../../../examples/cpp/mnist_runtime/1.pgm')
    assert os.path.isfile(pgm_file)
    output = check_output(['mnist_runtime', nnp_file, pgm_file, 'Runtime'])
    output.decode('ascii').splitlines()[1].split(':')[1].strip()
    cpp_result = np.asarray(output.decode('ascii').splitlines()[1].split(':')
                            [1].strip().split(' '),
                            dtype=np.float32)

    # D. Get nnp_graph results and compare.
    from nnabla.utils import nnp_graph
    nnp = nnp_graph.NnpLoader(nnp_file)
    graph = nnp.get_network('Validation', batch_size=batch_size)
    x = graph.inputs['x']
    y = graph.outputs['y']
    from nnabla.utils.image_utils import imread
    img = imread(pgm_file, grayscale=True)
    x.d = img
    y.forward()
    assert_allclose(y.d.flatten(), cpp_result)
Exemple #24
0
def combine_images(images):
    """
                    source        drving         fake
        images: [(B, C, H, W), (B, C, H, W), (B, C, H, W)]
    """

    batch_size = images[0].shape[0]
    target_height, target_width = images[0].shape[2:]
    header = imread("imgs/header_combined.png", channel_first=True)

    out_image = np.clip(images[0], 0.0, 1.0)
    # (3, 256, 256) -> (B, 3, 256, 256)
    header = np.tile(np.expand_dims(header, 0), (batch_size, 1, 1, 1))
    # (B, 3, 256, 256) -> (B, 3, 256, 512)
    upper_images = np.concatenate([header / 255., out_image], axis=3)

    lower_images = np.concatenate([np.clip(images[1], 0.0, 1.0),
                                   np.clip(images[2], 0.0, 1.0)], axis=3)
    out_image = np.concatenate([upper_images, lower_images], axis=2)
    return out_image
Exemple #25
0
def img_preprocess(img_paths, used_config):

    image_size = used_config["image_size"]
    images = list()
    image_names = list()

    for img_path in img_paths:
        # Load (and resize) image and labels.
        image = imread(img_path, num_channels=3, channel_first=True)
        if image.dtype == np.uint8:
            # Clip image's value from [0, 255] -> [0.0, 1.0]
            image = image / 255.0
        image = (image - 0.5) / 0.5  # Normalize
        image = imresize(image, (image_size, image_size),
                         interpolate='bilinear',
                         channel_first=True)
        images.append(image)
        image_names.append(img_path.split("/")[-1])

    return np.asarray(images), np.asarray(image_names)
Exemple #26
0
def stargan_load_func(i, dataset, image_dir, image_size, crop_size):
    '''
    Load an image and label from dataset.
    This function assumes that there are two set of domains in the dataset.
    For example, CelebA has 40 attributes.
    Args:
        dataset: a list containing image paths and attribute lists.
        image_dir: path to the directory containing raw images.
        image_size: image size (height and width) after getting resized.
        crop_size: crop size.
    Returns:
        image, label: an image and a label to be fed to nn.Variables.
    '''
    def center_crop_numpy(image, crop_size_h, crop_size_w):
        # naive implementation.
        assert len(image.shape) == 3  # (c, h, w)
        start_h = (image.shape[1] - crop_size_h) // 2
        stop_h = image.shape[1] - start_h
        start_w = (image.shape[2] - crop_size_w) // 2
        stop_w = image.shape[2] - start_w
        cropped_image = image[:, start_h:start_h + crop_size_h,
                              start_w:start_w + crop_size_w]
        return cropped_image

    img_path, label = dataset[i][0], dataset[i][1]
    # Load image and labels.
    # Unlike original implementation, crop and resize are executed here.
    image = imread(os.path.join(image_dir, img_path),
                   num_channels=3,
                   channel_first=True)
    if image.dtype == np.uint8:
        # Clip image's value from [0, 255] -> [0.0, 1.0]
        image = image / 255.0
    image = (image - 0.5) / 0.5  # Normalize.
    image = center_crop_numpy(image, crop_size, crop_size)
    image = imresize(image, (image_size, image_size),
                     interpolate='bilinear',
                     channel_first=True)

    return np.asarray(image), np.asarray(label)
Exemple #27
0
def infer():
    """
    Main script.
    """

    # get args.
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    nn.clear_parameters()  # To infer.

    # Get data from args.
    im = imread(args.input_file, num_channels=3)
    vdata = resize_and_crop_center(im)

    # Get a model.
    num_classes = 1000  # The number of class.
    v_model = get_model(args, num_classes)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # Get parameters from parameter file.
    nn.load_parameters(args.weight_file)

    # Perfome inference.
    v_model.image.d = vdata
    v_model.image.data.cast(np.uint8, ctx)
    v_model.pred.forward(clear_buffer=True)
    values, labels = F.sort(-v_model.pred.data, with_index=True)
    ratios = F.softmax(-values)
    print_result(labels.data, ratios.data)
def read_image_with_preprocess(path,
                               norm_config,
                               channel_last=False,
                               channels=3,
                               spatial_size=(224, 224)):
    assert channels in (3, 4)
    from nnabla.utils.image_utils import imread
    # Assume the ratio between the resized image and the input shape to the network is 256 / 224,
    # this is a mostly typical setting for the imagenet classification.
    import args as A
    H = A.resize_by_ratio(spatial_size[0])
    W = A.resize_by_ratio(spatial_size[1])
    h, w = spatial_size[0], spatial_size[0]
    image = imread(path, num_channels=3, size=(W, H))
    image = crop_center_image(image, (h, w))
    image = normalize_uint8_image(image, norm_config)
    if channels == 4:
        image = np.pad(image, ((0, 0), (0, 0), (0, 1)),
                       mode='constant',
                       constant_values=0)
    if not channel_last:
        image = np.transpose(image, (2, 0, 1))
    return image[None]  # Add batch dimension
Exemple #29
0
def main():
    args = get_args()
    from nnabla.ext_utils import get_extension_context
    ctx = get_extension_context(args.context)
    nn.set_default_context(ctx)

    nn.load_parameters(args.weights)
    x = nn.Variable((1, 3, args.size, args.size))
    y = darknet19.darknet19_classification(x / 255, test=True)

    label_names = np.loadtxt('imagenet.shortnames.list',
                             dtype=str,
                             delimiter=',')[:1000]

    img = imread(args.input)
    img = imresize(img, (args.size, args.size))

    x.d = img.transpose(2, 0, 1).reshape(1, 3, args.size, args.size)
    y.forward(clear_buffer=True)

    # softmax
    p = F.reshape(F.mul_scalar(F.softmax(y.data), 100), (y.size, ))

    # Show top-5 prediction
    inds = np.argsort(y.d.flatten())[::-1][:5]
    for i in inds:
        print('{}: {:.1f}%'.format(label_names[i], p.data[i]))

    s = time.time()
    n_time = 10
    for i in range(n_time):
        y.forward(clear_buffer=True)
    # Invoking device-to-host copy to synchronize the device (if CUDA).
    _ = y.d
    print("Processing time: {:.1f} [ms/image]".format(
        (time.time() - s) / n_time * 1000))
def convert_image(args):
    file_name = args[0]
    source_dir = args[1]
    dest_dir = args[2]
    width = args[3]
    height = args[4]
    mode = args[5]
    ch = args[6]
    num_class = args[7]
    grid_size = args[8]
    anchors = args[9]

    src_file_name = os.path.join(source_dir, file_name)
    src_label_file_name = os.path.join(
        source_dir, os.path.splitext(file_name)[0] + ".txt")
    image_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + ".png")
    label_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + "_label.csv")
    region_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + "_region.csv")
    try:
        os.makedirs(os.path.dirname(image_file_name))
    except OSError:
        pass  # python2 does not support exists_ok arg
    # print(src_file_name, dest_file_name)

    # open source image
    labels = load_label(src_label_file_name)

    warp_func = None
    try:
        im = imread(src_file_name)
        if len(im.shape) < 2 or len(im.shape) > 3:
            logger.warning(
                "Illegal image file format %s.".format(src_file_name))
            raise
        elif len(im.shape) == 3:
            # RGB image
            if im.shape[2] != 3:
                logger.warning(
                    "The image must be RGB or monochrome.")
                csv_data.remove(data)
                raise

        # resize
        h = im.shape[0]
        w = im.shape[1]
        input_size = (w, h)
        # print(h, w)
        if w != width or h != height:
            # resize image
            if mode == 'trimming':
                # trimming mode
                if float(h) / w > float(height) / width:
                    target_h = int(float(w) / width * height)
                    # print('crop_target_h', target_h)
                    im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
                else:
                    target_w = int(float(h) / height * width)
                    # print('crop_target_w', target_w)
                    im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
                # print('before', im.shape)

                def trim_warp(label, input_size, output_size):
                    w_scale = input_size[0] * 1.0 / output_size[0]
                    h_scale = input_size[1] * 1.0 / output_size[1]
                    label[0] = (label[0] - (1.0 - 1.0 / w_scale)
                                * 0.5) * w_scale
                    label[1] = (label[1] - (1.0 - 1.0 / h_scale)
                                * 0.5) * h_scale
                    label[3] *= w_scale
                    label[4] *= h_scale
                    return label
                warp_func = trim_warp
            elif mode == 'padding':
                # padding mode
                if float(h) / w < float(height) / width:
                    target_h = int(float(height) / width * w)
                    # print('padding_target_h', target_h)
                    pad = (((target_h - h) // 2, target_h -
                            (target_h - h) // 2 - h), (0, 0))
                else:
                    target_w = int(float(width) / height * h)
                    # print('padding_target_w', target_w)
                    pad = ((0, 0), ((target_w - w) // 2,
                                    target_w - (target_w - w) // 2 - w))
                if len(im.shape) == 3:
                    pad = pad + ((0, 0),)
                im = np.pad(im, pad, 'constant')
                # print('before', im.shape)

                def pad_warp(label, input_size, output_size):
                    w_scale = input_size[0] * 1.0 / output_size[0]
                    h_scale = input_size[1] * 1.0 / output_size[1]
                    label[0] = (label[0] * w_scale + (1.0 - w_scale) * 0.5)
                    label[1] = (label[1] * h_scale + (1.0 - h_scale) * 0.5)
                    label[3] *= w_scale
                    label[4] *= h_scale
                    return label
                warp_func = pad_warp
            im = imresize(im, size=(width, height))
            output_size = (width, height)
            # print('after', im.shape)

        # change color ch
        if len(im.shape) == 2 and ch == 3:
            # Monochrome to RGB
            im = np.array([im, im, im]).transpose((1, 2, 0))
        elif len(im.shape) == 3 and ch == 1:
            # RGB to monochrome
            im = np.dot(im[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8)

        # output image
        imsave(image_file_name, im)

    except:
        logger.warning(
            "Failed to convert %s." % (src_file_name))
        raise

    # create label and region file
    if warp_func is not None:
        labels = [warp_func(label, input_size, output_size)
                  for label in labels]
    grid_w = width // grid_size
    grid_h = height // grid_size
    label_array = np.full((len(anchors), grid_h, grid_w), -1, dtype=np.int)
    region_array = np.full(
        (len(anchors), grid_h, grid_w, 4), 0.0, dtype=np.float)

    for label in labels:
        label_rect = ObjectRect(XYWH=label[1:]).clip()

        if label_rect.width() > 0.0 and label_rect.height() > 0.0:
            gx, gy = int(label_rect.centerx() *
                         grid_w), int(label_rect.centery() * grid_h)
            max_iou = 0
            anchor_index = 0
            for i, anchor in enumerate(anchors):
                anchor_rect = ObjectRect(
                    XYWH=[(gx + 0.5) / grid_w, (gy + 0.5) / grid_h, anchor[0], anchor[1]])
                iou = label_rect.iou(anchor_rect)
                if iou > max_iou:
                    anchor_index = i
                    max_iou = iou
            label_array[anchor_index][gy][gx] = int(label[0])
            region_array[anchor_index][gy][gx] = [(label_rect.centerx() - anchor_rect.centerx()) * grid_w + 0.5, (label_rect.centery(
            ) - anchor_rect.centery()) * grid_h + 0.5, np.log(label_rect.width() * grid_w), np.log(label_rect.height() * grid_h)]
    np.savetxt(label_file_name, label_array.reshape(
        (label_array.shape[0] * label_array.shape[1], -1)), fmt='%i', delimiter=',')
    np.savetxt(region_file_name, region_array.reshape(
        (region_array.shape[0] * region_array.shape[1], -1)), fmt='%f', delimiter=',')