Ejemplo n.º 1
0
def load_image_imread(file, shape=None, max_range=1.0):
    '''
    Load image from file like object.

    :param file: Image contents
    :type file: file like object.
    :param shape: shape of output array
        e.g. (3, 128, 192) : n_color, height, width.
    :type shape: tuple of int
    :param float max_range: the value of return array ranges from 0 to `max_range`.

    :return: numpy array

    '''
    orig_img = imread(
        file
    )  # return value is from zero to 255 (even if the image has 16-bitdepth.)

    if len(orig_img.shape) == 2:  # gray image
        height, width = orig_img.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, 1
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == 1)
        if out_height != height or out_width != width:
            # imresize returns 0 to 255 image.
            orig_img = imresize(orig_img, (out_height, out_width))
        orig_img = orig_img.reshape((out_n_color, out_height, out_width))
    elif len(orig_img.shape) == 3:  # RGB image
        height, width, n_color = orig_img.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, n_color
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == n_color)
        if out_height != height or out_width != width or out_n_color != n_color:
            # imresize returns 0 to 255 image.
            orig_img = imresize(orig_img, (out_height, out_width, out_n_color))
        orig_img = orig_img.transpose(2, 0, 1)

    if max_range < 0:
        return orig_img
    else:
        # 16bit depth
        if orig_img.dtype == 'uint16':
            if max_range == 65535.0:
                return orig_img
            return orig_img * (max_range / 65535.0)
        # 8bit depth (default)
        else:
            if max_range == 255.0:
                return orig_img
            return orig_img * (max_range / 255.0)
Ejemplo n.º 2
0
def resize(image, desired_size):
    old_size = image.shape[:2]  # old_size is in (height, width) format
    ratio = min(np.divide(desired_size, old_size))
    new_size = (int(old_size[0]*ratio), int(old_size[1]*ratio))

    # new_size should be in (width, height) format
    if image.shape[2] == 1:
        image = imresize(
            image, (new_size[1], new_size[0]), interpolate='nearest')
        return image
    image = imresize(image, (new_size[1], new_size[0]))

    return image
Ejemplo n.º 3
0
def letterbox(img_orig, h, w):
    '''
    Input image is pre-processed before passing it to the network in YoloV2. This function applies the pre-processing to input image.

    Args:
        img_orig: Input image
        w : Desired width of output image after pre-processing. Should be a multiple of 32.
        h : Desired height of output image after pre-processing. Should be a multiple of 32.
    '''
    assert img_orig.dtype == np.uint8
    im_h, im_w, _ = img_orig.shape
    if (w * 1.0 / im_w) < (h * 1. / im_h):
        new_w = w
        new_h = int((im_h * w) / im_w)
    else:
        new_h = h
        new_w = int((im_w * h) / im_h)

    patch = imresize(img_orig, (new_w, new_h))
    img = np.ones((h, w, 3), np.uint8) * 127
    # resize
    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) / 2)
    img[y0:y0 + new_h, x0:x0 + new_w] = patch
    return img, new_w, new_h
Ejemplo n.º 4
0
def _resize_image(im, width, height, padding):
    # resize
    h = im.shape[0]
    w = im.shape[1]
    if w != width or h != height:
        # resize image
        if not padding:
            # trimming mode
            if float(h) / w > float(height) / width:
                target_h = int(float(w) / width * height)
                im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
            else:
                target_w = int(float(h) / height * width)
                im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
        else:
            # padding mode
            if float(h) / w < float(height) / width:
                target_h = int(float(height) / width * w)
                pad = (((target_h - h) // 2,
                        target_h - (target_h - h) // 2 - h), (0, 0))
            else:
                target_w = int(float(width) / height * h)
                pad = ((0, 0), ((target_w - w) // 2,
                                target_w - (target_w - w) // 2 - w))
            pad = pad + ((0, 0), )
            im = np.pad(im, pad, 'constant')
        im = imresize(im, (width, height))

    x = np.array(im, dtype=np.uint8).transpose((2, 0, 1))
    return x
Ejemplo n.º 5
0
 def force_resize(image, target_shape):
     resized_image = np.zeros(
         image.shape[:2] + target_shape)  # (B, C, H, W)
     for i in range(image.shape[0]):
         resized_image[i] = imresize(
             image[i], target_shape, channel_first=True)
     return resized_image
Ejemplo n.º 6
0
def load_omniglot(dataset_root):

    # We cached omniglot dataset as npy files
    x_train, _ = np.load(dataset_root + "/train.npy", allow_pickle=True)
    x_valid, _ = np.load(dataset_root + "/val.npy", allow_pickle=True)
    x = np.r_[x_train, x_valid]

    # A common setting for benchmarking with Omniglot dataset
    # - Image shape: (1, 28, 28)
    # - Number of classes: 1623
    # - Number of images per class: 20
    shape_x = (1, 28, 28)
    x_resized = np.zeros([1623, 20, 28, 28])

    # Resize images following the benchmark setting
    from nnabla.utils.image_utils import imresize
    for xi, ri in zip(x, x_resized):
        for xij, rij in zip(xi, ri):
            rij[:] = imresize(xij,
                              size=(shape_x[2], shape_x[1]),
                              interpolate="nearest") / 255.

    # Class augmentation following the benchmark setting
    rng = np.random.RandomState(706)
    data = augmentation(x_resized)
    data = rng.permutation(data)
    data = data.reshape((1, ) + data.shape).transpose(1, 2, 0, 3, 4)

    # Divide dataset following the benchmark setting
    train_data = data[:4112]
    val_data = data[4112:4800]
    test_data = data[4800:]

    return train_data, val_data, test_data
Ejemplo n.º 7
0
def post_process_image(output, image, target_size):
    old_size = image.shape[:2]
    ratio = min(np.divide(target_size, old_size))
    new_size = (int(old_size[0]*ratio), int(old_size[1]*ratio))
    post_processed = output[0:new_size[0], 0:new_size[1]]
    post_processed = (imresize(
        post_processed, (old_size[1], old_size[0]), interpolate='nearest'))
    return (post_processed)
Ejemplo n.º 8
0
def load_image_imread(file, shape=None, max_range=1.0):
    '''
    Load image from file like object.

    :param file: Image contents
    :type file: file like object.
    :param shape: shape of output array
        e.g. (3, 128, 192) : n_color, height, width.
    :type shape: tuple of int
    :param float max_range: the value of return array ranges from 0 to `max_range`.

    :return: numpy array

    '''
    img255 = imread(
        file
    )  # return value is from zero to 255 (even if the image has 16-bitdepth.)

    if len(img255.shape) == 2:  # gray image
        height, width = img255.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, 1
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == 1)
        if out_height != height or out_width != width:
            # imresize returns 0 to 255 image.
            img255 = imresize(img255, (out_height, out_width))
        img255 = img255.reshape((out_n_color, out_height, out_width))
    elif len(img255.shape) == 3:  # RGB image
        height, width, n_color = img255.shape
        if shape is None:
            out_height, out_width, out_n_color = height, width, n_color
        else:
            out_n_color, out_height, out_width = shape
        assert (out_n_color == n_color)
        if out_height != height or out_width != width or out_n_color != n_color:
            # imresize returns 0 to 255 image.
            img255 = imresize(img255, (out_height, out_width, out_n_color))
        img255 = img255.transpose(2, 0, 1)

    if max_range < 0 or max_range == 255.0:
        return img255
    else:
        return img255 * (max_range / 255.0)
Ejemplo n.º 9
0
    def style_mixing(self, test_config, args):

        from nnabla.utils.image_utils import imsave, imresize

        print('Testing style mixing of generation...')

        z1 = F.randn(shape=(args.batch_size_A, test_config['latent_dim']),
                     seed=args.seed_1[0]).data
        z2 = F.randn(shape=(args.batch_size_B, test_config['latent_dim']),
                     seed=args.seed_2[0]).data

        nn.set_auto_forward(True)

        mix_image_stacks = []
        for i in range(args.batch_size_A):
            image_column = []
            for j in range(args.batch_size_B):
                style_noises = [
                    F.reshape(z1[i], (1, 512)),
                    F.reshape(z2[j], (1, 512))
                ]
                rgb_output = self.generator(
                    1,
                    style_noises,
                    test_config['truncation_psi'],
                    mixing_layer_index=test_config['mix_after'])
                image = save_generations(rgb_output, None, return_images=True)
                image_column.append(image[0])
            image_column = np.concatenate([image for image in image_column],
                                          axis=1)
            mix_image_stacks.append(image_column)
        mix_image_stacks = np.concatenate(
            [image for image in mix_image_stacks], axis=2)

        style_noises = [z1, z1]
        rgb_output = self.generator(args.batch_size_A, style_noises,
                                    test_config['truncation_psi'])
        image_A = save_generations(rgb_output, None, return_images=True)
        image_A = np.concatenate([image for image in image_A], axis=2)

        style_noises = [z2, z2]
        rgb_output = self.generator(args.batch_size_B, style_noises,
                                    test_config['truncation_psi'])
        image_B = save_generations(rgb_output, None, return_images=True)
        image_B = np.concatenate([image for image in image_B], axis=1)

        top_image = 255 * np.ones(rgb_output[0].shape).astype(np.uint8)

        top_image = np.concatenate((top_image, image_A), axis=2)
        grid_image = np.concatenate((image_B, mix_image_stacks), axis=2)
        grid_image = np.concatenate((top_image, grid_image), axis=1)

        filename = os.path.join(self.results_dir, 'style_mix.png')
        imsave(filename,
               imresize(grid_image, (1024, 1024), channel_first=True),
               channel_first=True)
        print(f'Output saved as {filename}')
Ejemplo n.º 10
0
def get_sliced_images(filenames, resize=True):
    xs = []
    for filename in filenames:
        x = imread(filename)
        x = x[45:173, 25:153, :]
        if resize:
            x = imresize(x, size=(64, 64), interpolate='lanczos')
        xs.append(x)
    return xs
Ejemplo n.º 11
0
def load_function(image_path, inst_path, label_path, image_shape):
    # naive image read implementation
    image = imread(image_path, channel_first=True)

    inst_map = imread(inst_path, as_uint16=True)

    label_map = imread(label_path)

    if image.shape[1:] != image_shape:
        # imresize takes (width, height) as shape.
        resize_shape = (image_shape[1], image_shape[0])
        image = imresize(image, resize_shape, channel_first=True)
        inst_map = imresize(inst_map, resize_shape)
        label_map = imresize(label_map, resize_shape)

    # normalize
    image = (image - 127.5) / 127.5  # -> [-1, 1]

    return image, inst_map, label_map
Ejemplo n.º 12
0
def test_imresize(backend, size, channel_first, img):
    _change_backend(backend)

    channel_axis = 0
    if channel_first and len(img.shape) == 3:
        img = img.transpose((2, 0, 1))
        channel_axis = 1

    resized_img = image_utils.imresize(img, size, channel_first=channel_first)

    assert resized_img.shape[channel_axis:channel_axis + 2] == size
Ejemplo n.º 13
0
def load_image_pypng(file, shape=None, max_range=1.0):
    import png
    r = png.Reader(file=file)
    width, height, pixels, metadata = r.read()
    bitscale = 2**metadata['bitdepth'] - 1
    img = numpy.array(list(pixels), dtype=numpy.float32).reshape(
        (height, width, -1)) / bitscale  # (height, width, n_channel)
    if metadata['alpha'] and metadata['planes'] == 4:  # RGBA
        # TODO: this case is note tested well
        try:
            bg = numpy.array(metadata['background']) / bitscale
        except KeyError:
            bg = numpy.array([1.0, 1.0, 1.0])
        rgb = img[:, :, :3]
        alpha = img[:, :, 3]
        imshp = alpha.shape
        img = numpy.outer((1 - alpha), bg).reshape(imshp + (3,)) +\
            numpy.tile(alpha.reshape(imshp + (1,)), (1, 1, 3)) * rgb
        out_n_color = 3
    elif metadata['alpha'] and metadata['planes'] == 2:  # (gray, alpha)
        # TODO: this case is note tested well
        try:
            bg = numpy.array(metadata['background']) / bitscale
        except KeyError:
            bg = numpy.array([1.0])
        rgb = img[:, :, :1]
        alpha = img[:, :, 1]
        imshp = alpha.shape
        img = numpy.outer(
            (1 - alpha), bg).reshape(imshp +
                                     (1, )) + alpha.reshape(imshp +
                                                            (1, )) * rgb
        out_n_color = 1
    else:  # RGB or Gray
        out_n_color = metadata['planes']

    # Reshape image
    if max_range < 0:
        max_range = 255
    if shape is None:
        return img.transpose(2, 0, 1) * max_range
    else:
        out_n_color, out_height, out_width = shape
        return imresize(img, (out_height, out_width)).transpose(
            (2, 0, 1)) * max_range / 255.0
Ejemplo n.º 14
0
def load_omniglot(dataset_root):
    x_train, _ = np.load(dataset_root + "/train.npy", allow_pickle=True)
    x_valid, _ = np.load(dataset_root + "/val.npy", allow_pickle=True)
    x = np.r_[x_train, x_valid]
    from nnabla.utils.image_utils import imresize
    shape_x = (1, 28, 28)
    x_resized = np.zeros([1623, 20, 28, 28])
    for xi, ri in zip(x, x_resized):
        for xij, rij in zip(xi, ri):
            rij[:] = imresize(xij, size=(shape_x[2], shape_x[1])) / 255.
    data = augmentation(x_resized)
    rng = np.random.RandomState(706)
    data = rng.permutation(data)
    data = data.reshape((1, ) + data.shape).transpose(1, 2, 0, 3, 4)
    train_data = data[:4112]
    val_data = data[4112:4800]
    test_data = data[4800:]
    return train_data, val_data, test_data
Ejemplo n.º 15
0
def resize_ccrop(img, size, channel_first=True):
    assert isinstance(size, int)
    h1, w1 = img.shape[-2:] if channel_first else img.shape[:-2]
    s = size / min(h1, w1)

    rsz = imresize(
        img, (max(size, int(round(s * w1))), max(size, int(round(s * h1)))),
        channel_first=channel_first)

    h2, w2 = rsz.shape[-2:] if channel_first else rsz.shape[:-2]
    h_off = (h2 - size) // 2
    w_off = (w2 - size) // 2
    rsz = rsz[:, h_off:h_off + size,
              w_off:w_off + size] if channel_first else rsz[h_off:h_off + size,
                                                            w_off:w_off + size]

    h3, w3 = rsz.shape[-2:] if channel_first else rsz.shape[:-2]
    assert h3 == size and w3 == size

    return rsz
Ejemplo n.º 16
0
def img_preprocess(img_paths, used_config):

    image_size = used_config["image_size"]
    images = list()
    image_names = list()

    for img_path in img_paths:
        # Load (and resize) image and labels.
        image = imread(img_path, num_channels=3, channel_first=True)
        if image.dtype == np.uint8:
            # Clip image's value from [0, 255] -> [0.0, 1.0]
            image = image / 255.0
        image = (image - 0.5) / 0.5  # Normalize
        image = imresize(image, (image_size, image_size),
                         interpolate='bilinear',
                         channel_first=True)
        images.append(image)
        image_names.append(img_path.split("/")[-1])

    return np.asarray(images), np.asarray(image_names)
Ejemplo n.º 17
0
def stargan_load_func(i, dataset, image_dir, image_size, crop_size):
    '''
    Load an image and label from dataset.
    This function assumes that there are two set of domains in the dataset.
    For example, CelebA has 40 attributes.
    Args:
        dataset: a list containing image paths and attribute lists.
        image_dir: path to the directory containing raw images.
        image_size: image size (height and width) after getting resized.
        crop_size: crop size.
    Returns:
        image, label: an image and a label to be fed to nn.Variables.
    '''
    def center_crop_numpy(image, crop_size_h, crop_size_w):
        # naive implementation.
        assert len(image.shape) == 3  # (c, h, w)
        start_h = (image.shape[1] - crop_size_h) // 2
        stop_h = image.shape[1] - start_h
        start_w = (image.shape[2] - crop_size_w) // 2
        stop_w = image.shape[2] - start_w
        cropped_image = image[:, start_h:start_h + crop_size_h,
                              start_w:start_w + crop_size_w]
        return cropped_image

    img_path, label = dataset[i][0], dataset[i][1]
    # Load image and labels.
    # Unlike original implementation, crop and resize are executed here.
    image = imread(os.path.join(image_dir, img_path),
                   num_channels=3,
                   channel_first=True)
    if image.dtype == np.uint8:
        # Clip image's value from [0, 255] -> [0.0, 1.0]
        image = image / 255.0
    image = (image - 0.5) / 0.5  # Normalize.
    image = center_crop_numpy(image, crop_size, crop_size)
    image = imresize(image, (image_size, image_size),
                     interpolate='bilinear',
                     channel_first=True)

    return np.asarray(image), np.asarray(label)
Ejemplo n.º 18
0
def resize_and_crop_center(im):
    # resize
    width = 256
    height = 256
    h = im.shape[0]
    w = im.shape[1]
    # trimming mode
    if float(h) / w > float(height) / width:
        target_h = int(float(w) / width * height)
        im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
    else:
        target_w = int(float(h) / height * width)
        im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
    # crop
    im = imresize(im, (width, height))
    hc = im.shape[0] // 2
    wc = im.shape[1] // 2
    r = 224 // 2
    hs = hc - r
    he = hc + r
    ws = wc - r
    we = wc + r
    x = np.array(im[hs:he, ws:we], dtype=np.uint8).transpose((2, 0, 1))
    return x
Ejemplo n.º 19
0
def main():
    args = get_args()
    from nnabla.ext_utils import get_extension_context
    ctx = get_extension_context(args.context)
    nn.set_default_context(ctx)

    nn.load_parameters(args.weights)
    x = nn.Variable((1, 3, args.size, args.size))
    y = darknet19.darknet19_classification(x / 255, test=True)

    label_names = np.loadtxt('imagenet.shortnames.list',
                             dtype=str,
                             delimiter=',')[:1000]

    img = imread(args.input)
    img = imresize(img, (args.size, args.size))

    x.d = img.transpose(2, 0, 1).reshape(1, 3, args.size, args.size)
    y.forward(clear_buffer=True)

    # softmax
    p = F.reshape(F.mul_scalar(F.softmax(y.data), 100), (y.size, ))

    # Show top-5 prediction
    inds = np.argsort(y.d.flatten())[::-1][:5]
    for i in inds:
        print('{}: {:.1f}%'.format(label_names[i], p.data[i]))

    s = time.time()
    n_time = 10
    for i in range(n_time):
        y.forward(clear_buffer=True)
    # Invoking device-to-host copy to synchronize the device (if CUDA).
    _ = y.d
    print("Processing time: {:.1f} [ms/image]".format(
        (time.time() - s) / n_time * 1000))
Ejemplo n.º 20
0
def preprocess_WFLW(args):
    import csv
    print("preprocessing WFLW dataset...")

    src_dir = args.src_dir
    assert os.path.isdir(src_dir)
    out_dir = args.out_dir
    os.makedirs(out_dir, exist_ok=True)
    resize_size = args.resize_size
    line_thickness = args.line_thickness
    gaussian_kernel = args.gaussian_kernel
    gaussian_sigma = args.gaussian_sigma

    imgs_root_path = src_dir
    assert os.path.exists(
        imgs_root_path), f"specified path {imgs_root_path} not found."

    out_csv = [["saved_name", "real_name"]]

    mode = args.mode
    textname = f"WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_{mode}.txt"
    with open(os.path.join(src_dir, textname)) as f:
        annotations = f.readlines()
        annotations = [_.split(" ") for _ in annotations]

    prep = Preprocessor(imgs_root_path, resize_size, line_thickness,
                        gaussian_kernel, gaussian_sigma)

    tmp_hm_dict = dict()
    tmp_img_dict = dict()

    if args.save_boundary_image:
        os.makedirs(os.path.join(out_dir, "WFLW_landmark_images", mode),
                    exist_ok=True)
        os.makedirs(os.path.join(out_dir, "WFLW_cropped_images", mode),
                    exist_ok=True)

    idx = 0
    for annotation in tqdm(annotations):
        img_name, img, y_list, x_list = get_croped_image(
            annotation, os.path.join(src_dir, "WFLW_images"))
        scale_ratio = 256. / img.shape[-1]
        x_list_scaled = [int(_ * scale_ratio) for _ in x_list]
        y_list_scaled = [int(_ * scale_ratio) for _ in y_list]
        img_resized = imresize(img, (256, 256), channel_first=True)
        bod_img = get_bod_img(img_resized, y_list_scaled, x_list_scaled,
                              resize_size, line_thickness, gaussian_kernel,
                              gaussian_sigma)
        bod_map = get_bod_map(img_resized, y_list_scaled, x_list_scaled,
                              resize_size, line_thickness, gaussian_kernel,
                              gaussian_sigma)
        saved_name = f"{mode}_{idx}.png"
        tmp_img_dict[saved_name] = img_resized
        tmp_hm_dict[saved_name] = bod_map  # uint8
        out_csv.append([saved_name, img_name])

        if args.save_boundary_image:
            save_path_bod = os.path.join(out_dir, "WFLW_landmark_images", mode,
                                         saved_name)
            save_path_cropped = os.path.join(out_dir, "WFLW_cropped_images",
                                             mode, saved_name)
            imsave(save_path_bod, bod_img, channel_first=True)
            imsave(save_path_cropped, img_resized, channel_first=True)
        idx += 1

    np.savez_compressed(os.path.join(out_dir, f'WFLW_cropped_image_{mode}'),
                        **tmp_img_dict)
    np.savez_compressed(os.path.join(out_dir, f'WFLW_heatmap_{mode}'),
                        **tmp_hm_dict)
    with open(os.path.join(out_dir, f"{mode}_data.csv"), 'w') as f:
        writer = csv.writer(f)
        writer.writerows(out_csv)
def convert_image(args):
    file_name = args[0]
    source_dir = args[1]
    dest_dir = args[2]
    width = args[3]
    height = args[4]
    mode = args[5]
    ch = args[6]
    num_class = args[7]
    grid_size = args[8]
    anchors = args[9]

    src_file_name = os.path.join(source_dir, file_name)
    src_label_file_name = os.path.join(
        source_dir, os.path.splitext(file_name)[0] + ".txt")
    image_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + ".png")
    label_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + "_label.csv")
    region_file_name = os.path.join(
        dest_dir, 'data', os.path.splitext(file_name)[0] + "_region.csv")
    try:
        os.makedirs(os.path.dirname(image_file_name))
    except OSError:
        pass  # python2 does not support exists_ok arg
    # print(src_file_name, dest_file_name)

    # open source image
    labels = load_label(src_label_file_name)

    warp_func = None
    try:
        im = imread(src_file_name)
        if len(im.shape) < 2 or len(im.shape) > 3:
            logger.warning(
                "Illegal image file format %s.".format(src_file_name))
            raise
        elif len(im.shape) == 3:
            # RGB image
            if im.shape[2] != 3:
                logger.warning(
                    "The image must be RGB or monochrome.")
                csv_data.remove(data)
                raise

        # resize
        h = im.shape[0]
        w = im.shape[1]
        input_size = (w, h)
        # print(h, w)
        if w != width or h != height:
            # resize image
            if mode == 'trimming':
                # trimming mode
                if float(h) / w > float(height) / width:
                    target_h = int(float(w) / width * height)
                    # print('crop_target_h', target_h)
                    im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
                else:
                    target_w = int(float(h) / height * width)
                    # print('crop_target_w', target_w)
                    im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
                # print('before', im.shape)

                def trim_warp(label, input_size, output_size):
                    w_scale = input_size[0] * 1.0 / output_size[0]
                    h_scale = input_size[1] * 1.0 / output_size[1]
                    label[0] = (label[0] - (1.0 - 1.0 / w_scale)
                                * 0.5) * w_scale
                    label[1] = (label[1] - (1.0 - 1.0 / h_scale)
                                * 0.5) * h_scale
                    label[3] *= w_scale
                    label[4] *= h_scale
                    return label
                warp_func = trim_warp
            elif mode == 'padding':
                # padding mode
                if float(h) / w < float(height) / width:
                    target_h = int(float(height) / width * w)
                    # print('padding_target_h', target_h)
                    pad = (((target_h - h) // 2, target_h -
                            (target_h - h) // 2 - h), (0, 0))
                else:
                    target_w = int(float(width) / height * h)
                    # print('padding_target_w', target_w)
                    pad = ((0, 0), ((target_w - w) // 2,
                                    target_w - (target_w - w) // 2 - w))
                if len(im.shape) == 3:
                    pad = pad + ((0, 0),)
                im = np.pad(im, pad, 'constant')
                # print('before', im.shape)

                def pad_warp(label, input_size, output_size):
                    w_scale = input_size[0] * 1.0 / output_size[0]
                    h_scale = input_size[1] * 1.0 / output_size[1]
                    label[0] = (label[0] * w_scale + (1.0 - w_scale) * 0.5)
                    label[1] = (label[1] * h_scale + (1.0 - h_scale) * 0.5)
                    label[3] *= w_scale
                    label[4] *= h_scale
                    return label
                warp_func = pad_warp
            im = imresize(im, size=(width, height))
            output_size = (width, height)
            # print('after', im.shape)

        # change color ch
        if len(im.shape) == 2 and ch == 3:
            # Monochrome to RGB
            im = np.array([im, im, im]).transpose((1, 2, 0))
        elif len(im.shape) == 3 and ch == 1:
            # RGB to monochrome
            im = np.dot(im[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8)

        # output image
        imsave(image_file_name, im)

    except:
        logger.warning(
            "Failed to convert %s." % (src_file_name))
        raise

    # create label and region file
    if warp_func is not None:
        labels = [warp_func(label, input_size, output_size)
                  for label in labels]
    grid_w = width // grid_size
    grid_h = height // grid_size
    label_array = np.full((len(anchors), grid_h, grid_w), -1, dtype=np.int)
    region_array = np.full(
        (len(anchors), grid_h, grid_w, 4), 0.0, dtype=np.float)

    for label in labels:
        label_rect = ObjectRect(XYWH=label[1:]).clip()

        if label_rect.width() > 0.0 and label_rect.height() > 0.0:
            gx, gy = int(label_rect.centerx() *
                         grid_w), int(label_rect.centery() * grid_h)
            max_iou = 0
            anchor_index = 0
            for i, anchor in enumerate(anchors):
                anchor_rect = ObjectRect(
                    XYWH=[(gx + 0.5) / grid_w, (gy + 0.5) / grid_h, anchor[0], anchor[1]])
                iou = label_rect.iou(anchor_rect)
                if iou > max_iou:
                    anchor_index = i
                    max_iou = iou
            label_array[anchor_index][gy][gx] = int(label[0])
            region_array[anchor_index][gy][gx] = [(label_rect.centerx() - anchor_rect.centerx()) * grid_w + 0.5, (label_rect.centery(
            ) - anchor_rect.centery()) * grid_h + 0.5, np.log(label_rect.width() * grid_w), np.log(label_rect.height() * grid_h)]
    np.savetxt(label_file_name, label_array.reshape(
        (label_array.shape[0] * label_array.shape[1], -1)), fmt='%i', delimiter=',')
    np.savetxt(region_file_name, region_array.reshape(
        (region_array.shape[0] * region_array.shape[1], -1)), fmt='%f', delimiter=',')
Ejemplo n.º 22
0
 def load_func(i):
     img = imread(imgs[i], num_channels=3)
     img = imresize(img, imsize).transpose(2, 0, 1)
     img = img / 255. * 2. - 1.
     return img, i
Ejemplo n.º 23
0
def get_data_nnabla(dataset, idx, resize_size, test, seed):
    image, label = dataset._get_data(idx)
    image = imresize(image, resize_size, channel_first=True)
    image = transform(image, resize_size, seed, test)
    return image, label[0]
def convert_image(args):
    file_name = args[0]
    source_dir = args[1]
    dest_dir = args[2]
    width = args[3]
    height = args[4]
    mode = args[5]
    ch = args[6]

    src_file_name = os.path.join(source_dir, file_name)
    file_name = os.path.splitext(file_name)[0] + ".png"
    dest_file_name = os.path.join(dest_dir, file_name)
    dest_path = os.path.dirname(dest_file_name)
    # print(src_file_name, dest_file_name)

    # open source image
    try:
        im = imread(src_file_name)
        if len(im.shape) < 2 or len(im.shape) > 3:
            logger.warning(
                "Illegal image file format %s.".format(src_file_name))
            raise
        elif len(im.shape) == 3:
            # RGB image
            if im.shape[2] != 3:
                logger.warning("The image must be RGB or monochrome.")
                csv_data.remove(data)
                raise

        # resize
        h = im.shape[0]
        w = im.shape[1]
        # print(h, w)
        if w != width or h != height:
            # resize image
            if mode == 'trimming':
                # trimming mode
                if float(h) / w > float(height) / width:
                    target_h = int(float(w) / width * height)
                    # print('crop_target_h', target_h)
                    im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
                else:
                    target_w = int(float(h) / height * width)
                    # print('crop_target_w', target_w)
                    im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
                # print('before', im.shape)
            elif mode == 'padding':
                # padding mode
                if float(h) / w < float(height) / width:
                    target_h = int(float(height) / width * w)
                    # print('padding_target_h', target_h)
                    pad = (((target_h - h) // 2,
                            target_h - (target_h - h) // 2 - h), (0, 0))
                else:
                    target_w = int(float(width) / height * h)
                    # print('padding_target_w', target_w)
                    pad = ((0, 0), ((target_w - w) // 2,
                                    target_w - (target_w - w) // 2 - w))
                if len(im.shape) == 3:
                    pad = pad + ((0, 0), )
                im = np.pad(im, pad, 'constant')
                # print('before', im.shape)
            im = imresize(im, size=(height, width))
            # print('after', im.shape)

        # change color ch
        if len(im.shape) == 2 and ch == 3:
            # Monochrome to RGB
            im = np.array([im, im, im]).transpose((1, 2, 0))
        elif len(im.shape) == 3 and ch == 1:
            # RGB to monochrome
            im = np.dot(im[..., :3], [0.299, 0.587, 0.114])

        # output
        try:
            os.makedirs(dest_path)
        except OSError:
            pass  # python2 does not support exists_ok arg

        imsave(dest_file_name, im)
    except:
        logger.warning("Failed to convert %s." % (src_file_name))
Ejemplo n.º 25
0
def main():
    args = get_args()
    names = np.genfromtxt(args.class_names, dtype=str, delimiter='?')
    rng = np.random.RandomState(1223)
    colors = rng.randint(0, 256, (args.classes, 3)).astype(np.uint8)
    colors = [tuple(c.tolist()) for c in colors]

    # Set context
    from nnabla.ext_utils import get_extension_context
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Load parameter
    _ = nn.load_parameters(args.weights)

    # Build a YOLO v2 network
    feature_dict = {}
    x = nn.Variable((1, 3, args.width, args.width))
    y = yolov2.yolov2(x,
                      args.num_anchors,
                      args.classes,
                      test=True,
                      feature_dict=feature_dict)
    y = yolov2.yolov2_activate(y, args.num_anchors, args.anchors)
    y = F.nms_detection2d(y, args.thresh, args.nms, args.nms_per_class)

    # Read image
    img_orig = imread(args.input, num_channels=3)
    im_h, im_w, _ = img_orig.shape
    # letterbox
    w = args.width
    h = args.width

    if (w * 1.0 / im_w) < (h * 1. / im_h):
        new_w = w
        new_h = int((im_h * w) / im_w)
    else:
        new_h = h
        new_w = int((im_w * h) / im_h)

    patch = imresize(img_orig, (new_w, new_h)) / 255.
    img = np.ones((h, w, 3), np.float32) * 0.5
    # resize
    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) / 2)
    img[y0:y0 + new_h, x0:x0 + new_w] = patch

    # Execute YOLO v2
    print("forward")
    in_img = img.transpose(2, 0, 1).reshape(1, 3, args.width, args.width)
    x.d = in_img
    y.forward(clear_buffer=True)
    print("done")

    bboxes = y.d[0]
    img_draw = draw_bounding_boxes(img_orig, bboxes, im_w, im_h, names, colors,
                                   new_w * 1.0 / w, new_h * 1.0 / h,
                                   args.thresh)
    imsave(args.output, img_draw)

    # Timing
    s = time.time()
    n_time = 10
    for i in range(n_time):
        x.d = in_img
        y.forward(clear_buffer=True)
        # Invoking device-to-host copy if CUDA
        # so that time contains data transfer.
        _ = y.d
    print("Processing time: {:.1f} [ms/image]".format(
        (time.time() - s) / n_time * 1000))