def main(): ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) image_left = imread(args.left_image) image_right = imread(args.right_image) if args.dataset == 'Kitti': var_left = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) var_right = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) img_left, img_right = preprocess_kitti(image_left, image_right) elif args.dataset == 'SceneFlow': var_left = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) var_right = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) img_left, img_right = preprocess_sceneflow(image_left, image_right) var_left.d, var_right.d = img_left, img_right if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) pred_test = psm_net(var_left, var_right, args.maxdisp, False) pred_test.forward(clear_buffer=True) pred = pred_test.d pred = np.squeeze(pred, axis=1) pred = pred[0] pred = 2*(pred - np.min(pred))/np.ptp(pred)-1 scipy.misc.imsave('stereo_depth.png', pred) print("Done")
def _get_data(self, i): image_idx = self._indexes[i] label = 0 if self.labels is None else self.labels[i] # keep data paths if self.data_history.full(): self.data_history.get() self.data_history.put(self.img_paths[image_idx]) if self.on_memory and self.images[image_idx] is not None: return (self.images[image_idx], label) if self.fix_aspect_ratio: # perform resize and center crop to keep original aspect ratio. img = imread(self.img_paths[image_idx], channel_first=True, num_channels=3) img = resize_ccrop(img, self.im_size[0], channel_first=True) else: # Breaking original aspect ratio, forcely resize image to self.im_size. img = imread(self.img_paths[image_idx], channel_first=True, size=self.im_size, num_channels=3) if self.on_memory: self.images[image_idx] = img return (img, label)
def main(): ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) image_left = imread(args.left_image) image_right = imread(args.right_image) if args.dataset == 'Kitti': var_left = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) var_right = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) img_left, img_right = preprocess_kitti(image_left, image_right) elif args.dataset == 'SceneFlow': var_left = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) var_right = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) img_left, img_right = preprocess_sceneflow(image_left, image_right) var_left.d, var_right.d = img_left, img_right if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) pred_test = psm_net(var_left, var_right, args.maxdisp, False) pred_test.forward(clear_buffer=True) pred = pred_test.d pred = np.squeeze(pred, axis=1) pred = pred[0] pred = 2 * (pred - np.min(pred)) / np.ptp(pred) - 1 #imsave('stereo_depth.png', (pred + 1) * 0.5)) #imsave('stereo_depth.png', pred) #scipy.misc.imsave('stereo_depth.png', pred) scipy.misc.imsave('stereo_depth.png', pred) print("Done") # Save NNP file (used in C++ inference later.). if args.save_nnp: runtime_contents = { 'networks': [{ 'name': 'runtime', 'batch_size': 1, 'outputs': { 'y0': pred_test }, 'names': { 'x0': var_left, 'x1': var_right } }], 'executors': [{ 'name': 'runtime', 'network': 'runtime', 'data': ['x0', 'x1'], 'output': ['y0'] }] } import nnabla.utils.save nnabla.utils.save.save(args.nnp, runtime_contents)
def compute_lpips_of_paired_images(lpips, img0_path, img1_path, params_dir, model): img0 = imread(img0_path, channel_first=True) # normalize. value range should be in [-1., +1.]. img0 = (img0 / (255. / 2)) - 1 img0 = F.reshape(nn.Variable.from_numpy_array(img0), (1,)+img0.shape) img1 = imread(img1_path, channel_first=True) # normalize. value range should be in [-1., +1.]. img1 = (img1 / (255. / 2)) - 1 img1 = F.reshape(nn.Variable.from_numpy_array(img1), (1,)+img1.shape) lpips_val = lpips(img0, img1, mean_batch=True) lpips_val.forward() return lpips_val
def encode_and_write_to_path_files(filename, data_dir, ti, tl): ''' Calling encode_label for each label and writing image and label paths to path files ''' train_f = open(filename, 'r') label_path = data_dir + 'parts_lfw_funneled_gt_images/' image_path = data_dir + 'lfw_funneled/' for line in train_f: words = line.split(' ') prefix = get_prefix(words[1]) if os.path.isdir(image_path + words[0] + '/'): ti.write(image_path + words[0] + '/' + words[0] + prefix + str(int(words[1])) + '.jpg' + '\n') assert ( os.path.isfile(label_path + words[0] + prefix + str(int(words[1])) + '.ppm') ), "No matching label file for image : " + words[0] + prefix + str( int(words[1])) + '.jpg' label = utils.imread(label_path + words[0] + prefix + str(int(words[1])) + '.ppm') label = encode_label(label) np.save( label_path + 'encoded/' + words[0] + prefix + str(int(words[1])) + '.npy', label) tl.write(label_path + 'encoded/' + words[0] + prefix + str(int(words[1])) + '.npy' + '\n')
def load_image(path): from nnabla.utils.image_utils import imread cimg = crop_center_image(imread(path, size=(256, 256)), (224, 224)) pimg = cimg[..., ::-1].transpose(2, 0, 1)[None] # BGR and NCHW mean = np.array([104, 117, 123], dtype=np.float32).reshape(1, 3, 1, 1) pimg = pimg - mean return pimg
def _load_dtumvs(self, path): # Images image_files = sorted(glob.glob(os.path.join(path, "image", "*"))) images = np.asarray([image_utils.imread(f) for f in image_files]) images = images * (1.0 / 127.5) - 1.0 # Masks mask_files = sorted(glob.glob(os.path.join(path, "mask", "*"))) masks = np.asarray([ imageio.imread(f, as_gray=True)[:, :, np.newaxis] > 127.5 for f in mask_files ]) * 1.0 # Camera projection matrix and scale matrix for special correctness cameras = np.load(os.path.join(path, "cameras.npz")) world_mats = [ cameras['world_mat_%d' % idx].astype(np.float32) for idx in range(len(images)) ] scale_mats = [ cameras['scale_mat_%d' % idx].astype(np.float32) for idx in range(len(images)) ] intrinsics, poses = [], [] for W, S in zip(world_mats, scale_mats): P = W @ S P = P[:3, :4] intrinsic, pose = load_K_Rt_from_P(P) intrinsics.append(intrinsic[:3, :3]) poses.append(pose) # return images[0:1, ...], masks[0:1, ...], np.asarray(intrinsics)[0:1, ...], np.asarray(poses)[0:1, ...] return images, masks, np.asarray(intrinsics), np.asarray(poses)
def palette_png_reader(fname): ''' ''' assert 'PilBackend' in nn.utils.image_utils.get_available_backends() if nn.utils.image_utils.get_backend() != 'PilBackend': nn.utils.image_utils.set_backend("PilBackEnd") return imread(fname, return_palette_indices=True)
def read_image_function(): return image_utils.imread(img_file, grayscale=grayscale, size=size, channel_first=channel_first, as_uint16=as_uint16, num_channels=num_channels)
def __init__(self, width, height, padding, train=True, shuffle=False, rng=None): super(Caltech101DataSource, self).__init__(shuffle=shuffle, rng=rng) data_uri = "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz" logger.info('Getting labeled data from {}.'.format(data_uri)) r = download(data_uri) # file object returned label_dict = dict() with tarfile.open(fileobj=r, mode="r:gz") as fpin: images = [] labels = [] for name in fpin.getnames(): if ".jpg" not in name or "Google" in name: continue label, filename = name.split("/")[-2:] if label not in label_dict: label_dict[label] = len(label_dict) im = imread(fpin.extractfile(name), num_channels=3) arranged_images = self._resize_image( im, width, height, padding) images.append(arranged_images) labels.append(label_dict[label]) self._size = len(images) self._images = np.array(images) self._labels = np.array(labels).reshape(-1, 1) r.close() logger.info('Getting labeled data from {}.'.format(data_uri)) self._size = self._labels.size self._variables = ('x', 'y') if rng is None: rng = np.random.RandomState(313) self.rng = rng self._indexes = rng.permutation(self._size)
def read_video(name, frame_shape): """ note that this function assumes that data (images or a video) is stored as RGB format. """ if os.path.isdir(name): frames = sorted(os.listdir(name)) num_frames = len(frames) video_array = np.array([ imread(os.path.join(name, frames[idx])) / 255. for idx in range(num_frames) ]) elif name.lower().endswith('.gif') or name.lower().endswith( '.mp4') or name.lower().endswith('.mov'): video = np.array( mimread(name, memtest=False, size=tuple(frame_shape[:2]))) if video.shape[-1] == 4: video = video[..., :3] video_array = video / 255. else: raise Exception("Unknown file extensions %s" % name) return video_array
def load_func(i): cx = 89 cy = 121 img = imread(imgs[i]) img = img[cy - 64:cy + 64, cx - 64:cx + 64, :].transpose(2, 0, 1) / 255. img = img * 2. - 1. return img, None
def load_func(i): cx = 89 cy = 121 img = imread(imgs[i], num_channels=3) img = img[cy - 64:cy + 64, cx - 64:cx + 64, :].transpose(2, 0, 1) / 255. img = img * 2. - 1. return img, np.array([])
def get_sliced_images(filenames, resize=True): xs = [] for filename in filenames: x = imread(filename) x = x[45:173, 25:153, :] if resize: x = imresize(x, size=(64, 64), interpolate='lanczos') xs.append(x) return xs
def load_function(image_path, label_path, load_shape, crop_shape): # naive implementation of loading image. _load_shape = (load_shape[1], load_shape[0]) image = imread(image_path, size=_load_shape, interpolate="bicubic", channel_first=True, num_channels=3) label_map = imread(label_path, size=_load_shape, interpolate="nearest") if load_shape != crop_shape: pos_y = np.random.randint(0, max(0, load_shape[0] - crop_shape[0])) pos_x = np.random.randint(0, max(0, load_shape[1] - crop_shape[1])) image = _crop(image, (pos_y, pos_x), crop_shape) label_map = _crop(label_map, (pos_y, pos_x), crop_shape) # normalize image = (image - 127.5) / 127.5 # -> [-1, 1] return image, label_map
def load_function(image_path, inst_path, label_path, image_shape): # naive image read implementation image = imread(image_path, channel_first=True) inst_map = imread(inst_path, as_uint16=True) label_map = imread(label_path) if image.shape[1:] != image_shape: # imresize takes (width, height) as shape. resize_shape = (image_shape[1], image_shape[0]) image = imresize(image, resize_shape, channel_first=True) inst_map = imresize(inst_map, resize_shape) label_map = imresize(label_map, resize_shape) # normalize image = (image - 127.5) / 127.5 # -> [-1, 1] return image, inst_map, label_map
def load_image_imread(file, shape=None, max_range=1.0): ''' Load image from file like object. :param file: Image contents :type file: file like object. :param shape: shape of output array e.g. (3, 128, 192) : n_color, height, width. :type shape: tuple of int :param float max_range: the value of return array ranges from 0 to `max_range`. :return: numpy array ''' orig_img = imread( file ) # return value is from zero to 255 (even if the image has 16-bitdepth.) if len(orig_img.shape) == 2: # gray image height, width = orig_img.shape if shape is None: out_height, out_width, out_n_color = height, width, 1 else: out_n_color, out_height, out_width = shape assert (out_n_color == 1) if out_height != height or out_width != width: # imresize returns 0 to 255 image. orig_img = imresize(orig_img, (out_height, out_width)) orig_img = orig_img.reshape((out_n_color, out_height, out_width)) elif len(orig_img.shape) == 3: # RGB image height, width, n_color = orig_img.shape if shape is None: out_height, out_width, out_n_color = height, width, n_color else: out_n_color, out_height, out_width = shape assert (out_n_color == n_color) if out_height != height or out_width != width or out_n_color != n_color: # imresize returns 0 to 255 image. orig_img = imresize(orig_img, (out_height, out_width, out_n_color)) orig_img = orig_img.transpose(2, 0, 1) if max_range < 0: return orig_img else: # 16bit depth if orig_img.dtype == 'uint16': if max_range == 65535.0: return orig_img return orig_img * (max_range / 65535.0) # 8bit depth (default) else: if max_range == 255.0: return orig_img return orig_img * (max_range / 255.0)
def _get_data(self, position): idx = self._indexes[position] if self.is_train and self.id_sampling: name = self.videos[idx] path = np.random.choice( glob.glob(os.path.join(self.root_dir, name + '*.mp4'))) path = str(path) else: name = self.videos[idx] path = os.path.join(self.root_dir, name) if self.is_train and os.path.isdir(path): frames = os.listdir(path) num_frames = len(frames) frame_idx = np.sort( np.random.choice(num_frames, replace=True, size=2)) video_array = [ imread(os.path.join(path, frames[idx])) / 255.0 for idx in frame_idx ] else: video_array = read_video(path, frame_shape=self.frame_shape) num_frames = len(video_array) if self.is_train: frame_idx = np.sort( np.random.choice(num_frames, replace=True, size=2)) else: frame_idx = range(num_frames) video_array = video_array[frame_idx] if self.transform is not None: if random.random() < 0.5: video_array = video_array[::-1] if random.random() < 0.5: video_array = [np.fliplr(img) for img in video_array] out = {} if self.is_train: source = np.array(video_array[0], dtype='float32') driving = np.array(video_array[1], dtype='float32') out['driving'] = driving.transpose((2, 0, 1)) out['source'] = source.transpose((2, 0, 1)) else: video = np.array(video_array, dtype='float32') out['video'] = video.transpose((3, 0, 1, 2)) if self.is_train: return out["driving"], out["source"] else: return out["video"], out["name"]
def load_cyclegan_dataset(dataset="horse2zebra", train=True, domain="A", normalize_method=lambda x: (x - 127.5) / 127.5): ''' Load CycleGAN dataset from `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_ This function assumes that there are two domains in the dataset. Args: dataset (str): Dataset name excluding ".zip" extension, which you can find that `here <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_. train (bool): The testing dataset will be returned if False. Training data has 60000 images, while testing has 10000 images. domain (str): Domain name. It must be "A" or "B". normalize_method: Function of how to normalize an image. Returns: (np.ndarray, list): Images and filenames. ''' assert domain in ["A", "B"] image_uri = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{}.zip'.format( dataset) logger.info('Getting {} data from {}.'.format(dataset, image_uri)) r = download(image_uri) # Load unpaired images from zipfile. with zipfile.ZipFile(r, "r") as zf: images = [] filename_list = [] dirname = "{}{}".format("train" if train else "test", domain) # filter images by name zipinfos = filter( lambda zinfo: dirname in zinfo.filename and ".jpg" in zinfo. filename, zf.infolist()) for zipinfo in zipinfos: with zf.open(zipinfo.filename, "r") as fp: # filename filename = zipinfo.filename logger.info('loading {}'.format(filename)) # load image image = imread(fp) image = np.transpose(image, (2, 0, 1)) image = normalize_method(image) image_name, ext = os.path.splitext(filename.split("/")[-1]) images.append(image) filename_list.append(image_name) r.close() logger.info('Getting image data done.') return np.asarray(images), filename_list
def get_croped_image(annotation, data_dir, margin=np.random.uniform(0, 0.15)): NUM_POINTS = 98 img_name = annotation[-1].rsplit(os.linesep)[0] landmarks = [float(_) for _ in annotation[:NUM_POINTS * 2]] y1, x1, y2, x2 = [int(_) for _ in annotation[NUM_POINTS * 2:-7]] y_list = [int(float(_)) for _ in landmarks[0::2]] x_list = [int(float(_)) for _ in landmarks[1::2]] y_center = y_list[54] x_center = x_list[54] y_diff = max(y2 - y_center, y_center - y1) x_diff = max(x2 - x_center, x_center - x1) y1 = y_center - int((1 + margin) * y_diff) x1 = x_center - int((1 + margin) * x_diff) y2 = y_center + int((1 + margin) * y_diff) x2 = x_center + int((1 + margin) * x_diff) y1, x1, y2, x2 = get_square_corners(y1, x1, y2, x2) img = imread(os.path.join(data_dir, img_name), channel_first=True) H, W = img.shape[1:] # just in case that the corner lies outside the image, apply padding. if x1 < 0: img = np.concatenate([img[:, ::-1, :], img], axis=1) x1 += H x2 += H x_list = [_ + H for _ in x_list] H += H if y1 < 0: img = np.concatenate([img[:, :, ::-1], img], axis=2) y1 += W y2 += W y_list = [_ + W for _ in y_list] W += W if x2 > H: img = np.concatenate([img, img[:, ::-1, :]], axis=1) if y2 > W: img = np.concatenate([img, img[:, :, ::-1]], axis=2) img = img[:, x1:x2, y1:y2] y_list = [_ - y1 for _ in y_list] x_list = [_ - x1 for _ in x_list] return img_name, img, y_list, x_list
def read_image_with_preprocess(path, channel_last=False, channels=3): assert channels in (3, 4) from nnabla.utils.image_utils import imread H, W = 256, 256 h, w = 224, 224 image = imread(path, num_channels=3, size=(W, H)) image = crop_center_image(image, (h, w)) image = normalize_uint8_image(image) if channels == 4: shape = list(image.shape) image = np.pad(image, ((0, 0), (0, 0), (0, 1)), mode='constant', constant_values=0) if not channel_last: image = np.transpose(image, (2, 0, 1)) return image[None] # Add batch dimension
def load_image_imread(file, shape=None, max_range=1.0): ''' Load image from file like object. :param file: Image contents :type file: file like object. :param shape: shape of output array e.g. (3, 128, 192) : n_color, height, width. :type shape: tuple of int :param float max_range: the value of return array ranges from 0 to `max_range`. :return: numpy array ''' img255 = imread( file ) # return value is from zero to 255 (even if the image has 16-bitdepth.) if len(img255.shape) == 2: # gray image height, width = img255.shape if shape is None: out_height, out_width, out_n_color = height, width, 1 else: out_n_color, out_height, out_width = shape assert (out_n_color == 1) if out_height != height or out_width != width: # imresize returns 0 to 255 image. img255 = imresize(img255, (out_height, out_width)) img255 = img255.reshape((out_n_color, out_height, out_width)) elif len(img255.shape) == 3: # RGB image height, width, n_color = img255.shape if shape is None: out_height, out_width, out_n_color = height, width, n_color else: out_n_color, out_height, out_width = shape assert (out_n_color == n_color) if out_height != height or out_width != width or out_n_color != n_color: # imresize returns 0 to 255 image. img255 = imresize(img255, (out_height, out_width, out_n_color)) img255 = img255.transpose(2, 0, 1) if max_range < 0 or max_range == 255.0: return img255 else: return img255 * (max_range / 255.0)
def test_examples_cpp_mnist_runtime(tmpdir, nnabla_examples_root, batch_size): pytest.skip('Temporarily skip due to mnist training data server trouble.') nn.clear_parameters() # A. Check this test can run if not nnabla_examples_root.available: pytest.skip('`nnabla-examples` can not be found.') if not command_exists('mnist_runtime'): pytest.skip('An executable `mnist_runtime` is not in path.') tmpdir.chdir() # B. Run mnist training. script = os.path.join(nnabla_examples_root.path, 'image-classification/mnist-collection', 'classification.py') check_call(['python', script, '-i', '100']) # C. Get mnist_runtime results. nnp_file = tmpdir.join('tmp.monitor', 'lenet_result.nnp').strpath assert os.path.isfile(nnp_file) pgm_file = os.path.join(os.path.dirname(__file__), '../../../examples/cpp/mnist_runtime/1.pgm') assert os.path.isfile(pgm_file) output = check_output(['mnist_runtime', nnp_file, pgm_file, 'Runtime']) output.decode('ascii').splitlines()[1].split(':')[1].strip() cpp_result = np.asarray(output.decode('ascii').splitlines()[1].split(':') [1].strip().split(' '), dtype=np.float32) # D. Get nnp_graph results and compare. from nnabla.utils import nnp_graph nnp = nnp_graph.NnpLoader(nnp_file) graph = nnp.get_network('Validation', batch_size=batch_size) x = graph.inputs['x'] y = graph.outputs['y'] from nnabla.utils.image_utils import imread img = imread(pgm_file, grayscale=True) x.d = img y.forward() assert_allclose(y.d.flatten(), cpp_result)
def combine_images(images): """ source drving fake images: [(B, C, H, W), (B, C, H, W), (B, C, H, W)] """ batch_size = images[0].shape[0] target_height, target_width = images[0].shape[2:] header = imread("imgs/header_combined.png", channel_first=True) out_image = np.clip(images[0], 0.0, 1.0) # (3, 256, 256) -> (B, 3, 256, 256) header = np.tile(np.expand_dims(header, 0), (batch_size, 1, 1, 1)) # (B, 3, 256, 256) -> (B, 3, 256, 512) upper_images = np.concatenate([header / 255., out_image], axis=3) lower_images = np.concatenate([np.clip(images[1], 0.0, 1.0), np.clip(images[2], 0.0, 1.0)], axis=3) out_image = np.concatenate([upper_images, lower_images], axis=2) return out_image
def img_preprocess(img_paths, used_config): image_size = used_config["image_size"] images = list() image_names = list() for img_path in img_paths: # Load (and resize) image and labels. image = imread(img_path, num_channels=3, channel_first=True) if image.dtype == np.uint8: # Clip image's value from [0, 255] -> [0.0, 1.0] image = image / 255.0 image = (image - 0.5) / 0.5 # Normalize image = imresize(image, (image_size, image_size), interpolate='bilinear', channel_first=True) images.append(image) image_names.append(img_path.split("/")[-1]) return np.asarray(images), np.asarray(image_names)
def stargan_load_func(i, dataset, image_dir, image_size, crop_size): ''' Load an image and label from dataset. This function assumes that there are two set of domains in the dataset. For example, CelebA has 40 attributes. Args: dataset: a list containing image paths and attribute lists. image_dir: path to the directory containing raw images. image_size: image size (height and width) after getting resized. crop_size: crop size. Returns: image, label: an image and a label to be fed to nn.Variables. ''' def center_crop_numpy(image, crop_size_h, crop_size_w): # naive implementation. assert len(image.shape) == 3 # (c, h, w) start_h = (image.shape[1] - crop_size_h) // 2 stop_h = image.shape[1] - start_h start_w = (image.shape[2] - crop_size_w) // 2 stop_w = image.shape[2] - start_w cropped_image = image[:, start_h:start_h + crop_size_h, start_w:start_w + crop_size_w] return cropped_image img_path, label = dataset[i][0], dataset[i][1] # Load image and labels. # Unlike original implementation, crop and resize are executed here. image = imread(os.path.join(image_dir, img_path), num_channels=3, channel_first=True) if image.dtype == np.uint8: # Clip image's value from [0, 255] -> [0.0, 1.0] image = image / 255.0 image = (image - 0.5) / 0.5 # Normalize. image = center_crop_numpy(image, crop_size, crop_size) image = imresize(image, (image_size, image_size), interpolate='bilinear', channel_first=True) return np.asarray(image), np.asarray(label)
def infer(): """ Main script. """ # get args. args = get_args() # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.clear_parameters() # To infer. # Get data from args. im = imread(args.input_file, num_channels=3) vdata = resize_and_crop_center(im) # Get a model. num_classes = 1000 # The number of class. v_model = get_model(args, num_classes) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Get parameters from parameter file. nn.load_parameters(args.weight_file) # Perfome inference. v_model.image.d = vdata v_model.image.data.cast(np.uint8, ctx) v_model.pred.forward(clear_buffer=True) values, labels = F.sort(-v_model.pred.data, with_index=True) ratios = F.softmax(-values) print_result(labels.data, ratios.data)
def read_image_with_preprocess(path, norm_config, channel_last=False, channels=3, spatial_size=(224, 224)): assert channels in (3, 4) from nnabla.utils.image_utils import imread # Assume the ratio between the resized image and the input shape to the network is 256 / 224, # this is a mostly typical setting for the imagenet classification. import args as A H = A.resize_by_ratio(spatial_size[0]) W = A.resize_by_ratio(spatial_size[1]) h, w = spatial_size[0], spatial_size[0] image = imread(path, num_channels=3, size=(W, H)) image = crop_center_image(image, (h, w)) image = normalize_uint8_image(image, norm_config) if channels == 4: image = np.pad(image, ((0, 0), (0, 0), (0, 1)), mode='constant', constant_values=0) if not channel_last: image = np.transpose(image, (2, 0, 1)) return image[None] # Add batch dimension
def main(): args = get_args() from nnabla.ext_utils import get_extension_context ctx = get_extension_context(args.context) nn.set_default_context(ctx) nn.load_parameters(args.weights) x = nn.Variable((1, 3, args.size, args.size)) y = darknet19.darknet19_classification(x / 255, test=True) label_names = np.loadtxt('imagenet.shortnames.list', dtype=str, delimiter=',')[:1000] img = imread(args.input) img = imresize(img, (args.size, args.size)) x.d = img.transpose(2, 0, 1).reshape(1, 3, args.size, args.size) y.forward(clear_buffer=True) # softmax p = F.reshape(F.mul_scalar(F.softmax(y.data), 100), (y.size, )) # Show top-5 prediction inds = np.argsort(y.d.flatten())[::-1][:5] for i in inds: print('{}: {:.1f}%'.format(label_names[i], p.data[i])) s = time.time() n_time = 10 for i in range(n_time): y.forward(clear_buffer=True) # Invoking device-to-host copy to synchronize the device (if CUDA). _ = y.d print("Processing time: {:.1f} [ms/image]".format( (time.time() - s) / n_time * 1000))
def convert_image(args): file_name = args[0] source_dir = args[1] dest_dir = args[2] width = args[3] height = args[4] mode = args[5] ch = args[6] num_class = args[7] grid_size = args[8] anchors = args[9] src_file_name = os.path.join(source_dir, file_name) src_label_file_name = os.path.join( source_dir, os.path.splitext(file_name)[0] + ".txt") image_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + ".png") label_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_label.csv") region_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_region.csv") try: os.makedirs(os.path.dirname(image_file_name)) except OSError: pass # python2 does not support exists_ok arg # print(src_file_name, dest_file_name) # open source image labels = load_label(src_label_file_name) warp_func = None try: im = imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illegal image file format %s.".format(src_file_name)) raise elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning( "The image must be RGB or monochrome.") csv_data.remove(data) raise # resize h = im.shape[0] w = im.shape[1] input_size = (w, h) # print(h, w) if w != width or h != height: # resize image if mode == 'trimming': # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) def trim_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] - (1.0 - 1.0 / w_scale) * 0.5) * w_scale label[1] = (label[1] - (1.0 - 1.0 / h_scale) * 0.5) * h_scale label[3] *= w_scale label[4] *= h_scale return label warp_func = trim_warp elif mode == 'padding': # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0),) im = np.pad(im, pad, 'constant') # print('before', im.shape) def pad_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] * w_scale + (1.0 - w_scale) * 0.5) label[1] = (label[1] * h_scale + (1.0 - h_scale) * 0.5) label[3] *= w_scale label[4] *= h_scale return label warp_func = pad_warp im = imresize(im, size=(width, height)) output_size = (width, height) # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8) # output image imsave(image_file_name, im) except: logger.warning( "Failed to convert %s." % (src_file_name)) raise # create label and region file if warp_func is not None: labels = [warp_func(label, input_size, output_size) for label in labels] grid_w = width // grid_size grid_h = height // grid_size label_array = np.full((len(anchors), grid_h, grid_w), -1, dtype=np.int) region_array = np.full( (len(anchors), grid_h, grid_w, 4), 0.0, dtype=np.float) for label in labels: label_rect = ObjectRect(XYWH=label[1:]).clip() if label_rect.width() > 0.0 and label_rect.height() > 0.0: gx, gy = int(label_rect.centerx() * grid_w), int(label_rect.centery() * grid_h) max_iou = 0 anchor_index = 0 for i, anchor in enumerate(anchors): anchor_rect = ObjectRect( XYWH=[(gx + 0.5) / grid_w, (gy + 0.5) / grid_h, anchor[0], anchor[1]]) iou = label_rect.iou(anchor_rect) if iou > max_iou: anchor_index = i max_iou = iou label_array[anchor_index][gy][gx] = int(label[0]) region_array[anchor_index][gy][gx] = [(label_rect.centerx() - anchor_rect.centerx()) * grid_w + 0.5, (label_rect.centery( ) - anchor_rect.centery()) * grid_h + 0.5, np.log(label_rect.width() * grid_w), np.log(label_rect.height() * grid_h)] np.savetxt(label_file_name, label_array.reshape( (label_array.shape[0] * label_array.shape[1], -1)), fmt='%i', delimiter=',') np.savetxt(region_file_name, region_array.reshape( (region_array.shape[0] * region_array.shape[1], -1)), fmt='%f', delimiter=',')