def load_image_imread(file, shape=None, max_range=1.0): ''' Load image from file like object. :param file: Image contents :type file: file like object. :param shape: shape of output array e.g. (3, 128, 192) : n_color, height, width. :type shape: tuple of int :param float max_range: the value of return array ranges from 0 to `max_range`. :return: numpy array ''' orig_img = imread( file ) # return value is from zero to 255 (even if the image has 16-bitdepth.) if len(orig_img.shape) == 2: # gray image height, width = orig_img.shape if shape is None: out_height, out_width, out_n_color = height, width, 1 else: out_n_color, out_height, out_width = shape assert (out_n_color == 1) if out_height != height or out_width != width: # imresize returns 0 to 255 image. orig_img = imresize(orig_img, (out_height, out_width)) orig_img = orig_img.reshape((out_n_color, out_height, out_width)) elif len(orig_img.shape) == 3: # RGB image height, width, n_color = orig_img.shape if shape is None: out_height, out_width, out_n_color = height, width, n_color else: out_n_color, out_height, out_width = shape assert (out_n_color == n_color) if out_height != height or out_width != width or out_n_color != n_color: # imresize returns 0 to 255 image. orig_img = imresize(orig_img, (out_height, out_width, out_n_color)) orig_img = orig_img.transpose(2, 0, 1) if max_range < 0: return orig_img else: # 16bit depth if orig_img.dtype == 'uint16': if max_range == 65535.0: return orig_img return orig_img * (max_range / 65535.0) # 8bit depth (default) else: if max_range == 255.0: return orig_img return orig_img * (max_range / 255.0)
def resize(image, desired_size): old_size = image.shape[:2] # old_size is in (height, width) format ratio = min(np.divide(desired_size, old_size)) new_size = (int(old_size[0]*ratio), int(old_size[1]*ratio)) # new_size should be in (width, height) format if image.shape[2] == 1: image = imresize( image, (new_size[1], new_size[0]), interpolate='nearest') return image image = imresize(image, (new_size[1], new_size[0])) return image
def letterbox(img_orig, h, w): ''' Input image is pre-processed before passing it to the network in YoloV2. This function applies the pre-processing to input image. Args: img_orig: Input image w : Desired width of output image after pre-processing. Should be a multiple of 32. h : Desired height of output image after pre-processing. Should be a multiple of 32. ''' assert img_orig.dtype == np.uint8 im_h, im_w, _ = img_orig.shape if (w * 1.0 / im_w) < (h * 1. / im_h): new_w = w new_h = int((im_h * w) / im_w) else: new_h = h new_w = int((im_w * h) / im_h) patch = imresize(img_orig, (new_w, new_h)) img = np.ones((h, w, 3), np.uint8) * 127 # resize x0 = int((w - new_w) / 2) y0 = int((h - new_h) / 2) img[y0:y0 + new_h, x0:x0 + new_w] = patch return img, new_w, new_h
def _resize_image(im, width, height, padding): # resize h = im.shape[0] w = im.shape[1] if w != width or h != height: # resize image if not padding: # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] else: # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) pad = pad + ((0, 0), ) im = np.pad(im, pad, 'constant') im = imresize(im, (width, height)) x = np.array(im, dtype=np.uint8).transpose((2, 0, 1)) return x
def force_resize(image, target_shape): resized_image = np.zeros( image.shape[:2] + target_shape) # (B, C, H, W) for i in range(image.shape[0]): resized_image[i] = imresize( image[i], target_shape, channel_first=True) return resized_image
def load_omniglot(dataset_root): # We cached omniglot dataset as npy files x_train, _ = np.load(dataset_root + "/train.npy", allow_pickle=True) x_valid, _ = np.load(dataset_root + "/val.npy", allow_pickle=True) x = np.r_[x_train, x_valid] # A common setting for benchmarking with Omniglot dataset # - Image shape: (1, 28, 28) # - Number of classes: 1623 # - Number of images per class: 20 shape_x = (1, 28, 28) x_resized = np.zeros([1623, 20, 28, 28]) # Resize images following the benchmark setting from nnabla.utils.image_utils import imresize for xi, ri in zip(x, x_resized): for xij, rij in zip(xi, ri): rij[:] = imresize(xij, size=(shape_x[2], shape_x[1]), interpolate="nearest") / 255. # Class augmentation following the benchmark setting rng = np.random.RandomState(706) data = augmentation(x_resized) data = rng.permutation(data) data = data.reshape((1, ) + data.shape).transpose(1, 2, 0, 3, 4) # Divide dataset following the benchmark setting train_data = data[:4112] val_data = data[4112:4800] test_data = data[4800:] return train_data, val_data, test_data
def post_process_image(output, image, target_size): old_size = image.shape[:2] ratio = min(np.divide(target_size, old_size)) new_size = (int(old_size[0]*ratio), int(old_size[1]*ratio)) post_processed = output[0:new_size[0], 0:new_size[1]] post_processed = (imresize( post_processed, (old_size[1], old_size[0]), interpolate='nearest')) return (post_processed)
def load_image_imread(file, shape=None, max_range=1.0): ''' Load image from file like object. :param file: Image contents :type file: file like object. :param shape: shape of output array e.g. (3, 128, 192) : n_color, height, width. :type shape: tuple of int :param float max_range: the value of return array ranges from 0 to `max_range`. :return: numpy array ''' img255 = imread( file ) # return value is from zero to 255 (even if the image has 16-bitdepth.) if len(img255.shape) == 2: # gray image height, width = img255.shape if shape is None: out_height, out_width, out_n_color = height, width, 1 else: out_n_color, out_height, out_width = shape assert (out_n_color == 1) if out_height != height or out_width != width: # imresize returns 0 to 255 image. img255 = imresize(img255, (out_height, out_width)) img255 = img255.reshape((out_n_color, out_height, out_width)) elif len(img255.shape) == 3: # RGB image height, width, n_color = img255.shape if shape is None: out_height, out_width, out_n_color = height, width, n_color else: out_n_color, out_height, out_width = shape assert (out_n_color == n_color) if out_height != height or out_width != width or out_n_color != n_color: # imresize returns 0 to 255 image. img255 = imresize(img255, (out_height, out_width, out_n_color)) img255 = img255.transpose(2, 0, 1) if max_range < 0 or max_range == 255.0: return img255 else: return img255 * (max_range / 255.0)
def style_mixing(self, test_config, args): from nnabla.utils.image_utils import imsave, imresize print('Testing style mixing of generation...') z1 = F.randn(shape=(args.batch_size_A, test_config['latent_dim']), seed=args.seed_1[0]).data z2 = F.randn(shape=(args.batch_size_B, test_config['latent_dim']), seed=args.seed_2[0]).data nn.set_auto_forward(True) mix_image_stacks = [] for i in range(args.batch_size_A): image_column = [] for j in range(args.batch_size_B): style_noises = [ F.reshape(z1[i], (1, 512)), F.reshape(z2[j], (1, 512)) ] rgb_output = self.generator( 1, style_noises, test_config['truncation_psi'], mixing_layer_index=test_config['mix_after']) image = save_generations(rgb_output, None, return_images=True) image_column.append(image[0]) image_column = np.concatenate([image for image in image_column], axis=1) mix_image_stacks.append(image_column) mix_image_stacks = np.concatenate( [image for image in mix_image_stacks], axis=2) style_noises = [z1, z1] rgb_output = self.generator(args.batch_size_A, style_noises, test_config['truncation_psi']) image_A = save_generations(rgb_output, None, return_images=True) image_A = np.concatenate([image for image in image_A], axis=2) style_noises = [z2, z2] rgb_output = self.generator(args.batch_size_B, style_noises, test_config['truncation_psi']) image_B = save_generations(rgb_output, None, return_images=True) image_B = np.concatenate([image for image in image_B], axis=1) top_image = 255 * np.ones(rgb_output[0].shape).astype(np.uint8) top_image = np.concatenate((top_image, image_A), axis=2) grid_image = np.concatenate((image_B, mix_image_stacks), axis=2) grid_image = np.concatenate((top_image, grid_image), axis=1) filename = os.path.join(self.results_dir, 'style_mix.png') imsave(filename, imresize(grid_image, (1024, 1024), channel_first=True), channel_first=True) print(f'Output saved as {filename}')
def get_sliced_images(filenames, resize=True): xs = [] for filename in filenames: x = imread(filename) x = x[45:173, 25:153, :] if resize: x = imresize(x, size=(64, 64), interpolate='lanczos') xs.append(x) return xs
def load_function(image_path, inst_path, label_path, image_shape): # naive image read implementation image = imread(image_path, channel_first=True) inst_map = imread(inst_path, as_uint16=True) label_map = imread(label_path) if image.shape[1:] != image_shape: # imresize takes (width, height) as shape. resize_shape = (image_shape[1], image_shape[0]) image = imresize(image, resize_shape, channel_first=True) inst_map = imresize(inst_map, resize_shape) label_map = imresize(label_map, resize_shape) # normalize image = (image - 127.5) / 127.5 # -> [-1, 1] return image, inst_map, label_map
def test_imresize(backend, size, channel_first, img): _change_backend(backend) channel_axis = 0 if channel_first and len(img.shape) == 3: img = img.transpose((2, 0, 1)) channel_axis = 1 resized_img = image_utils.imresize(img, size, channel_first=channel_first) assert resized_img.shape[channel_axis:channel_axis + 2] == size
def load_image_pypng(file, shape=None, max_range=1.0): import png r = png.Reader(file=file) width, height, pixels, metadata = r.read() bitscale = 2**metadata['bitdepth'] - 1 img = numpy.array(list(pixels), dtype=numpy.float32).reshape( (height, width, -1)) / bitscale # (height, width, n_channel) if metadata['alpha'] and metadata['planes'] == 4: # RGBA # TODO: this case is note tested well try: bg = numpy.array(metadata['background']) / bitscale except KeyError: bg = numpy.array([1.0, 1.0, 1.0]) rgb = img[:, :, :3] alpha = img[:, :, 3] imshp = alpha.shape img = numpy.outer((1 - alpha), bg).reshape(imshp + (3,)) +\ numpy.tile(alpha.reshape(imshp + (1,)), (1, 1, 3)) * rgb out_n_color = 3 elif metadata['alpha'] and metadata['planes'] == 2: # (gray, alpha) # TODO: this case is note tested well try: bg = numpy.array(metadata['background']) / bitscale except KeyError: bg = numpy.array([1.0]) rgb = img[:, :, :1] alpha = img[:, :, 1] imshp = alpha.shape img = numpy.outer( (1 - alpha), bg).reshape(imshp + (1, )) + alpha.reshape(imshp + (1, )) * rgb out_n_color = 1 else: # RGB or Gray out_n_color = metadata['planes'] # Reshape image if max_range < 0: max_range = 255 if shape is None: return img.transpose(2, 0, 1) * max_range else: out_n_color, out_height, out_width = shape return imresize(img, (out_height, out_width)).transpose( (2, 0, 1)) * max_range / 255.0
def load_omniglot(dataset_root): x_train, _ = np.load(dataset_root + "/train.npy", allow_pickle=True) x_valid, _ = np.load(dataset_root + "/val.npy", allow_pickle=True) x = np.r_[x_train, x_valid] from nnabla.utils.image_utils import imresize shape_x = (1, 28, 28) x_resized = np.zeros([1623, 20, 28, 28]) for xi, ri in zip(x, x_resized): for xij, rij in zip(xi, ri): rij[:] = imresize(xij, size=(shape_x[2], shape_x[1])) / 255. data = augmentation(x_resized) rng = np.random.RandomState(706) data = rng.permutation(data) data = data.reshape((1, ) + data.shape).transpose(1, 2, 0, 3, 4) train_data = data[:4112] val_data = data[4112:4800] test_data = data[4800:] return train_data, val_data, test_data
def resize_ccrop(img, size, channel_first=True): assert isinstance(size, int) h1, w1 = img.shape[-2:] if channel_first else img.shape[:-2] s = size / min(h1, w1) rsz = imresize( img, (max(size, int(round(s * w1))), max(size, int(round(s * h1)))), channel_first=channel_first) h2, w2 = rsz.shape[-2:] if channel_first else rsz.shape[:-2] h_off = (h2 - size) // 2 w_off = (w2 - size) // 2 rsz = rsz[:, h_off:h_off + size, w_off:w_off + size] if channel_first else rsz[h_off:h_off + size, w_off:w_off + size] h3, w3 = rsz.shape[-2:] if channel_first else rsz.shape[:-2] assert h3 == size and w3 == size return rsz
def img_preprocess(img_paths, used_config): image_size = used_config["image_size"] images = list() image_names = list() for img_path in img_paths: # Load (and resize) image and labels. image = imread(img_path, num_channels=3, channel_first=True) if image.dtype == np.uint8: # Clip image's value from [0, 255] -> [0.0, 1.0] image = image / 255.0 image = (image - 0.5) / 0.5 # Normalize image = imresize(image, (image_size, image_size), interpolate='bilinear', channel_first=True) images.append(image) image_names.append(img_path.split("/")[-1]) return np.asarray(images), np.asarray(image_names)
def stargan_load_func(i, dataset, image_dir, image_size, crop_size): ''' Load an image and label from dataset. This function assumes that there are two set of domains in the dataset. For example, CelebA has 40 attributes. Args: dataset: a list containing image paths and attribute lists. image_dir: path to the directory containing raw images. image_size: image size (height and width) after getting resized. crop_size: crop size. Returns: image, label: an image and a label to be fed to nn.Variables. ''' def center_crop_numpy(image, crop_size_h, crop_size_w): # naive implementation. assert len(image.shape) == 3 # (c, h, w) start_h = (image.shape[1] - crop_size_h) // 2 stop_h = image.shape[1] - start_h start_w = (image.shape[2] - crop_size_w) // 2 stop_w = image.shape[2] - start_w cropped_image = image[:, start_h:start_h + crop_size_h, start_w:start_w + crop_size_w] return cropped_image img_path, label = dataset[i][0], dataset[i][1] # Load image and labels. # Unlike original implementation, crop and resize are executed here. image = imread(os.path.join(image_dir, img_path), num_channels=3, channel_first=True) if image.dtype == np.uint8: # Clip image's value from [0, 255] -> [0.0, 1.0] image = image / 255.0 image = (image - 0.5) / 0.5 # Normalize. image = center_crop_numpy(image, crop_size, crop_size) image = imresize(image, (image_size, image_size), interpolate='bilinear', channel_first=True) return np.asarray(image), np.asarray(label)
def resize_and_crop_center(im): # resize width = 256 height = 256 h = im.shape[0] w = im.shape[1] # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # crop im = imresize(im, (width, height)) hc = im.shape[0] // 2 wc = im.shape[1] // 2 r = 224 // 2 hs = hc - r he = hc + r ws = wc - r we = wc + r x = np.array(im[hs:he, ws:we], dtype=np.uint8).transpose((2, 0, 1)) return x
def main(): args = get_args() from nnabla.ext_utils import get_extension_context ctx = get_extension_context(args.context) nn.set_default_context(ctx) nn.load_parameters(args.weights) x = nn.Variable((1, 3, args.size, args.size)) y = darknet19.darknet19_classification(x / 255, test=True) label_names = np.loadtxt('imagenet.shortnames.list', dtype=str, delimiter=',')[:1000] img = imread(args.input) img = imresize(img, (args.size, args.size)) x.d = img.transpose(2, 0, 1).reshape(1, 3, args.size, args.size) y.forward(clear_buffer=True) # softmax p = F.reshape(F.mul_scalar(F.softmax(y.data), 100), (y.size, )) # Show top-5 prediction inds = np.argsort(y.d.flatten())[::-1][:5] for i in inds: print('{}: {:.1f}%'.format(label_names[i], p.data[i])) s = time.time() n_time = 10 for i in range(n_time): y.forward(clear_buffer=True) # Invoking device-to-host copy to synchronize the device (if CUDA). _ = y.d print("Processing time: {:.1f} [ms/image]".format( (time.time() - s) / n_time * 1000))
def preprocess_WFLW(args): import csv print("preprocessing WFLW dataset...") src_dir = args.src_dir assert os.path.isdir(src_dir) out_dir = args.out_dir os.makedirs(out_dir, exist_ok=True) resize_size = args.resize_size line_thickness = args.line_thickness gaussian_kernel = args.gaussian_kernel gaussian_sigma = args.gaussian_sigma imgs_root_path = src_dir assert os.path.exists( imgs_root_path), f"specified path {imgs_root_path} not found." out_csv = [["saved_name", "real_name"]] mode = args.mode textname = f"WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_{mode}.txt" with open(os.path.join(src_dir, textname)) as f: annotations = f.readlines() annotations = [_.split(" ") for _ in annotations] prep = Preprocessor(imgs_root_path, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) tmp_hm_dict = dict() tmp_img_dict = dict() if args.save_boundary_image: os.makedirs(os.path.join(out_dir, "WFLW_landmark_images", mode), exist_ok=True) os.makedirs(os.path.join(out_dir, "WFLW_cropped_images", mode), exist_ok=True) idx = 0 for annotation in tqdm(annotations): img_name, img, y_list, x_list = get_croped_image( annotation, os.path.join(src_dir, "WFLW_images")) scale_ratio = 256. / img.shape[-1] x_list_scaled = [int(_ * scale_ratio) for _ in x_list] y_list_scaled = [int(_ * scale_ratio) for _ in y_list] img_resized = imresize(img, (256, 256), channel_first=True) bod_img = get_bod_img(img_resized, y_list_scaled, x_list_scaled, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) bod_map = get_bod_map(img_resized, y_list_scaled, x_list_scaled, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) saved_name = f"{mode}_{idx}.png" tmp_img_dict[saved_name] = img_resized tmp_hm_dict[saved_name] = bod_map # uint8 out_csv.append([saved_name, img_name]) if args.save_boundary_image: save_path_bod = os.path.join(out_dir, "WFLW_landmark_images", mode, saved_name) save_path_cropped = os.path.join(out_dir, "WFLW_cropped_images", mode, saved_name) imsave(save_path_bod, bod_img, channel_first=True) imsave(save_path_cropped, img_resized, channel_first=True) idx += 1 np.savez_compressed(os.path.join(out_dir, f'WFLW_cropped_image_{mode}'), **tmp_img_dict) np.savez_compressed(os.path.join(out_dir, f'WFLW_heatmap_{mode}'), **tmp_hm_dict) with open(os.path.join(out_dir, f"{mode}_data.csv"), 'w') as f: writer = csv.writer(f) writer.writerows(out_csv)
def convert_image(args): file_name = args[0] source_dir = args[1] dest_dir = args[2] width = args[3] height = args[4] mode = args[5] ch = args[6] num_class = args[7] grid_size = args[8] anchors = args[9] src_file_name = os.path.join(source_dir, file_name) src_label_file_name = os.path.join( source_dir, os.path.splitext(file_name)[0] + ".txt") image_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + ".png") label_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_label.csv") region_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_region.csv") try: os.makedirs(os.path.dirname(image_file_name)) except OSError: pass # python2 does not support exists_ok arg # print(src_file_name, dest_file_name) # open source image labels = load_label(src_label_file_name) warp_func = None try: im = imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illegal image file format %s.".format(src_file_name)) raise elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning( "The image must be RGB or monochrome.") csv_data.remove(data) raise # resize h = im.shape[0] w = im.shape[1] input_size = (w, h) # print(h, w) if w != width or h != height: # resize image if mode == 'trimming': # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) def trim_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] - (1.0 - 1.0 / w_scale) * 0.5) * w_scale label[1] = (label[1] - (1.0 - 1.0 / h_scale) * 0.5) * h_scale label[3] *= w_scale label[4] *= h_scale return label warp_func = trim_warp elif mode == 'padding': # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0),) im = np.pad(im, pad, 'constant') # print('before', im.shape) def pad_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] * w_scale + (1.0 - w_scale) * 0.5) label[1] = (label[1] * h_scale + (1.0 - h_scale) * 0.5) label[3] *= w_scale label[4] *= h_scale return label warp_func = pad_warp im = imresize(im, size=(width, height)) output_size = (width, height) # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8) # output image imsave(image_file_name, im) except: logger.warning( "Failed to convert %s." % (src_file_name)) raise # create label and region file if warp_func is not None: labels = [warp_func(label, input_size, output_size) for label in labels] grid_w = width // grid_size grid_h = height // grid_size label_array = np.full((len(anchors), grid_h, grid_w), -1, dtype=np.int) region_array = np.full( (len(anchors), grid_h, grid_w, 4), 0.0, dtype=np.float) for label in labels: label_rect = ObjectRect(XYWH=label[1:]).clip() if label_rect.width() > 0.0 and label_rect.height() > 0.0: gx, gy = int(label_rect.centerx() * grid_w), int(label_rect.centery() * grid_h) max_iou = 0 anchor_index = 0 for i, anchor in enumerate(anchors): anchor_rect = ObjectRect( XYWH=[(gx + 0.5) / grid_w, (gy + 0.5) / grid_h, anchor[0], anchor[1]]) iou = label_rect.iou(anchor_rect) if iou > max_iou: anchor_index = i max_iou = iou label_array[anchor_index][gy][gx] = int(label[0]) region_array[anchor_index][gy][gx] = [(label_rect.centerx() - anchor_rect.centerx()) * grid_w + 0.5, (label_rect.centery( ) - anchor_rect.centery()) * grid_h + 0.5, np.log(label_rect.width() * grid_w), np.log(label_rect.height() * grid_h)] np.savetxt(label_file_name, label_array.reshape( (label_array.shape[0] * label_array.shape[1], -1)), fmt='%i', delimiter=',') np.savetxt(region_file_name, region_array.reshape( (region_array.shape[0] * region_array.shape[1], -1)), fmt='%f', delimiter=',')
def load_func(i): img = imread(imgs[i], num_channels=3) img = imresize(img, imsize).transpose(2, 0, 1) img = img / 255. * 2. - 1. return img, i
def get_data_nnabla(dataset, idx, resize_size, test, seed): image, label = dataset._get_data(idx) image = imresize(image, resize_size, channel_first=True) image = transform(image, resize_size, seed, test) return image, label[0]
def convert_image(args): file_name = args[0] source_dir = args[1] dest_dir = args[2] width = args[3] height = args[4] mode = args[5] ch = args[6] src_file_name = os.path.join(source_dir, file_name) file_name = os.path.splitext(file_name)[0] + ".png" dest_file_name = os.path.join(dest_dir, file_name) dest_path = os.path.dirname(dest_file_name) # print(src_file_name, dest_file_name) # open source image try: im = imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illegal image file format %s.".format(src_file_name)) raise elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning("The image must be RGB or monochrome.") csv_data.remove(data) raise # resize h = im.shape[0] w = im.shape[1] # print(h, w) if w != width or h != height: # resize image if mode == 'trimming': # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) elif mode == 'padding': # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0), ) im = np.pad(im, pad, 'constant') # print('before', im.shape) im = imresize(im, size=(height, width)) # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]) # output try: os.makedirs(dest_path) except OSError: pass # python2 does not support exists_ok arg imsave(dest_file_name, im) except: logger.warning("Failed to convert %s." % (src_file_name))
def main(): args = get_args() names = np.genfromtxt(args.class_names, dtype=str, delimiter='?') rng = np.random.RandomState(1223) colors = rng.randint(0, 256, (args.classes, 3)).astype(np.uint8) colors = [tuple(c.tolist()) for c in colors] # Set context from nnabla.ext_utils import get_extension_context ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Load parameter _ = nn.load_parameters(args.weights) # Build a YOLO v2 network feature_dict = {} x = nn.Variable((1, 3, args.width, args.width)) y = yolov2.yolov2(x, args.num_anchors, args.classes, test=True, feature_dict=feature_dict) y = yolov2.yolov2_activate(y, args.num_anchors, args.anchors) y = F.nms_detection2d(y, args.thresh, args.nms, args.nms_per_class) # Read image img_orig = imread(args.input, num_channels=3) im_h, im_w, _ = img_orig.shape # letterbox w = args.width h = args.width if (w * 1.0 / im_w) < (h * 1. / im_h): new_w = w new_h = int((im_h * w) / im_w) else: new_h = h new_w = int((im_w * h) / im_h) patch = imresize(img_orig, (new_w, new_h)) / 255. img = np.ones((h, w, 3), np.float32) * 0.5 # resize x0 = int((w - new_w) / 2) y0 = int((h - new_h) / 2) img[y0:y0 + new_h, x0:x0 + new_w] = patch # Execute YOLO v2 print("forward") in_img = img.transpose(2, 0, 1).reshape(1, 3, args.width, args.width) x.d = in_img y.forward(clear_buffer=True) print("done") bboxes = y.d[0] img_draw = draw_bounding_boxes(img_orig, bboxes, im_w, im_h, names, colors, new_w * 1.0 / w, new_h * 1.0 / h, args.thresh) imsave(args.output, img_draw) # Timing s = time.time() n_time = 10 for i in range(n_time): x.d = in_img y.forward(clear_buffer=True) # Invoking device-to-host copy if CUDA # so that time contains data transfer. _ = y.d print("Processing time: {:.1f} [ms/image]".format( (time.time() - s) / n_time * 1000))