def feed_transform(data_in, paths_out, checkpoint_dir): img_shape = utils.imread(data_in[0]).shape g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), tf.Session(config=soft_config) as sess: img_placeholder = tf.placeholder(tf.float32, shape=[None, *img_shape], name='img_placeholder') model = Transfer() pred = model(img_placeholder) saver = tf.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception('No checkpoint found...') else: saver.restore(sess, checkpoint_dir) img = np.asarray([utils.imread(data_in[0])]).astype(np.float32) start_tic = time.time() _pred = sess.run(pred, feed_dict={img_placeholder: img}) end_toc = time.time() print('PT: {:.2f} msec.\n'.format((end_toc - start_tic) * 1000)) utils.imsave(paths_out[0], _pred[0]) # paths_out and _pred is list
def __init__(self, config, rng=None): self.rng = np.random.RandomState(1) if rng is None else rng self.data_path = os.path.join(config.data_dir, 'our') self.real_data_path = os.path.join(self.data_path, config.real_image_dir) self.batch_size = config.batch_size self.debug = config.debug synthetic_image_path = create_tree(config, self.data_path, rng) self.synthetic_data_paths = np.array( glob(os.path.join(synthetic_image_path, '*.jpg'))) self.synthetic_data_dims = list( imread(self.synthetic_data_paths[0]).shape[:2]) + [1] self.real_data_paths = np.array( glob(os.path.join(self.real_data_path, "*.jpg"))) self.real_data_dims = list(imread( self.real_data_paths[0]).shape[:2]) + [1] self.synthetic_data_paths.sort() # if np.rank(self.real_data) == 3: # self.real_data = np.expand_dims(self.real_data, -1) self.real_p = 0
def feed_transform(self, data_in, paths_out): checkpoint_dir = os.path.join(self.flags.checkpoint_dir, self.style_img_name, 'model') img_shape = utils.imread(data_in[0]).shape g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), tf.Session(config=soft_config) as sess: img_placeholder = tf.placeholder(tf.float32, shape=[None, *img_shape], name='img_placeholder') model = Transfer() pred = model(img_placeholder) saver = tf.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception('No checkpoint found...') else: saver.restore(sess, checkpoint_dir) img = np.asarray([utils.imread(data_in[0])]).astype(np.float32) _pred = sess.run(pred, feed_dict={img_placeholder: img}) utils.imsave(paths_out[0], _pred[0]) # paths_out and _pred is list
def __init__(self, sess): # TF Session self.sess = sess # Data self.data_dir = config.DATA['data_dir'] self.dataset_name = config.DATA['dataset_name'] self.data_limit = config.MODEL['data_limit'] if self.dataset_name == 'cifar10': self.image_shape = 32, 32 (x_train, _), (_, _) = cifar10.load_data() self.data = x_train[0:self.data_limit] / 127.5 - 1 self.c_dim = 3 self.grayscale = 0 else: # Reptile set self.image_shape = 108, 108 self.data = glob(os.path.join(self.data_dir, self.dataset_name, "*.jpg"))[0:self.data_limit] imread_img = imread(self.data[0]) if len(imread_img.shape) >= 3: # Check if image is a non-grayscale image by checking channel number self.c_dim = imread(self.data[0]).shape[-1] else: self.c_dim = 1 self.grayscale = (self.c_dim == 1) # Hyperparameters self.model_dir = config.MODEL['model_dir'] self.checkpoint_dir = config.MODEL['checkpoint_dir'] self.epochs = config.MODEL['epochs'] self.batch_size = config.MODEL['batch_size'] self.batch_idxs = len(self.data) // self.batch_size self.sample_dir = config.DATA['sample_dir'] self.sample_num = config.MODEL['sample_num'] self.z_dim = 100 self.gf_dim = 64 # Dimension of gen filters in first conv layer. [64] self.df_dim = 64 # Dimension of discrim filters in first conv layer. [64] self.learning_rate = 0.0002 self.beta1 = 0.5 # Momentum term of adam [0.5] # Batch normalization layers self.d_bn1 = batch_norm(name='d_bn1') self.d_bn2 = batch_norm(name='d_bn2') self.d_bn3 = batch_norm(name='d_bn3') self.g_bn0 = batch_norm(name='g_bn0') self.g_bn1 = batch_norm(name='g_bn1') self.g_bn2 = batch_norm(name='g_bn2') self.g_bn3 = batch_norm(name='g_bn3') # GAN variants self.use_spectral_norm = config.MODEL['use_spectral_norm'] self.sn_update_ops_collection = 'SPECTRAL_NORM_UPDATE_OPS' self.use_wasserstein = config.MODEL['use_wasserstein'] self.use_weight_clipping = config.MODEL['use_weight_clipping'] self.weight_clipping_limit = config.MODEL['weight_clipping_limit'] self.build_model()
def __getitem__(self, index): if isinstance(index, int): img_path, label = self.data[index] img = imread(img_path) result = self.transform(image=img) return result['image'], label elif isinstance(index, tuple): index, attribute_idx = index img_path, label = self.data[index] img = imread(img_path) result = self.transform(image=img) return result['image'], label, attribute_idx
def __init__(self): test_noisy_image = utils.imread(utils.get_image_path(False, 64, 4003)) test_noisy_image = utils.scale_image(test_noisy_image, 2.0) # Image size 128x128 test_noisy_image /= 255.0 test_noisy_image = test_noisy_image.reshape(128, 128, 1) self.noisy_img1 = test_noisy_image test_noisy_image = utils.imread(utils.get_image_path(False, 64, 19983)) test_noisy_image = utils.scale_image(test_noisy_image, 2.0) # Image size 128x128 test_noisy_image /= 255.0 test_noisy_image = test_noisy_image.reshape(128, 128, 1) self.noisy_img2 = test_noisy_image
def load(path): imgs = [] for i in os.listdir(path): all = imread(path + "/" + i) img, cond = all[:, :conf.img_size], all[:, conf.img_size:] imgs.append((img, cond)) return imgs
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() # Get image name idx, _ = self.df.values[idx] img_name = self.root_dir.format(idx) if not self.training: return img_name, img_name # dummy return and do nothing for it # Read image img0 = imread(img_name, True) img = preprocess_image(img0, flip=False) # augmentation if self.transform: img1 = self.transform(image=img)['image'] img2 = self.transform(image=img)['image'] if self.normalized: img1 = normalize(img1, means, stds) img2 = normalize(img2, means, stds) img1 = np.rollaxis(img1, 2, 0) img2 = np.rollaxis(img2, 2, 0) return [img1, img2]
def inference(self, model_path, target_dir): """ :param model_path: :param target_dir: :return: """ # TODO: ファイルごとにモデルを作り直しているため非常に遅い。なんとかする。 path_list = glob(os.path.join(target_dir, self.config.DATA_EXT)) output_dir_name = os.path.join("translated", os.path.basename(target_dir)) os.makedirs(output_dir_name, exist_ok=True) for image_path in path_list: image = utils.imread(image_path) shape = image.shape input_layer = Input(shape=shape) G = net_utils.mapping_function( shape, base_name="G", num_res_blocks=self.config.NUMBER_RESIDUAL_BLOCKS) A2B = G(input_layer) inference_model = Model(inputs=[input_layer], outputs=[A2B]) inference_model.load_weights(model_path, by_name=True) image = np.array([image]) translated_image = inference_model.predict(image) name = os.path.basename(image_path) output_path = os.path.join(output_dir_name, name) utils.output_sample_image(output_path, translated_image[0])
def prep_eval_data(prep_fns): prep = [] for fn in tqdm(prep_fns): img = utils.imread(fn) img = cv2.resize(img, (136, 136), interpolation=cv2.INTER_CUBIC) prep.append(img) return prep
def load_mask(mask_path, shape, return_mask_img=False): if K.image_data_format() == "channels_first": _, channels, width, height = shape else: _, width, height, channels = shape mask = imread(mask_path, mode="L") # Grayscale mask load mask = imresize(mask, (width, height)).astype('float32') # Perform binarization of mask mask[mask <= 127] = 0 mask[mask > 128] = 255 max = np.amax(mask) mask /= max if return_mask_img: return mask mask_shape = shape[1:] mask_tensor = np.empty(mask_shape) for i in range(channels): if K.image_data_format() == "channels_first": mask_tensor[i, :, :] = mask else: mask_tensor[:, :, i] = mask return mask_tensor
def make_denoise_small_ds(source_dir, target_dir): factor = 0.25 interp = cv2.INTER_AREA # for downsampling if not os.path.exists(target_dir): os.system('mkdir ' + target_dir) for img_dir in os.listdir(source_dir): img_name = os.path.split(img_dir)[-1] if len(img_name) == 0: img_name = os.path.split(img_dir)[-2] ext = '.png' clean_img = os.path.join(source_dir, img_dir, img_name + ext) img = utils.imread(clean_img) size = (int(img.shape[1] * factor), int(img.shape[0] * factor)) img = cv2.resize(img, size, interpolation=interp) target_img_dir = os.path.join(target_dir, img_name) if not os.path.exists(target_img_dir): os.system('mkdir ' + target_img_dir) target_clean = os.path.join(target_img_dir, img_name + ext) utils.imwrite(target_clean, img) for sigma in [5, 10, 20, 25, 30, 35, 40, 50, 60, 70, 75, 80, 90, 100]: output_file = os.path.join(target_dir, img_dir, img_name + '_s' + str(sigma) + ext) sigma = sigma / 255. noisy_img = utils.get_noisy_image(img, sigma) utils.imwrite(output_file, noisy_img)
def build_model(self): if self.is_train: self.images = tf.compat.v1.placeholder( tf.float32, [None, self.image_size, self.image_size, self.c_dim], name='images') self.labels = tf.compat.v1.placeholder(tf.float32, [ None, self.image_size * self.scale, self.image_size * self.scale, self.c_dim ], name='labels') else: ''' Because the test need to put image to model, so here we don't need do preprocess, so we set input as the same with preprocess output ''' data = load_data(self.is_train, self.test_img) input_ = imread(data[0]) self.h, self.w, c = input_.shape self.images = tf.compat.v1.placeholder( tf.float32, [None, self.h, self.w, self.c_dim], name='images') self.labels = tf.compat.v1.placeholder( tf.float32, [None, self.h * self.scale, self.w * self.scale, self.c_dim], name='labels') self.weights = { 'w1': tf.compat.v1.Variable(tf.random.normal([5, 5, self.c_dim, 64], stddev=np.sqrt(2.0 / 25 / 3)), name='w1'), 'w2': tf.compat.v1.Variable(tf.random.normal([3, 3, 64, 32], stddev=np.sqrt(2.0 / 9 / 64)), name='w2'), 'w3': tf.compat.v1.Variable(tf.random.normal( [3, 3, 32, self.c_dim * self.scale * self.scale], stddev=np.sqrt(2.0 / 9 / 32)), name='w3') } self.biases = { 'b1': tf.compat.v1.Variable(tf.zeros([64], name='b1')), 'b2': tf.compat.v1.Variable(tf.zeros([32], name='b2')), 'b3': tf.compat.v1.Variable( tf.zeros([self.c_dim * self.scale * self.scale], name='b3')) } self.pred = self.model() self.loss = tf.reduce_mean(tf.square(self.labels - self.pred)) self.saver = tf.compat.v1.train.Saver() # To save checkpoint
def load_image(image_path): img = imread(image_path) w = int(img.shape[1]) mid = int(w / 2) a = img[:, 0:mid] b = img[:, mid:w] return a, b
def __next__(self, n=None): """ :param n: the number of examples to fetch :return: """ if n is None: n = self.batch_size if self.real_p == 0: inds = self.rng.permutation(len(self.real_data_paths)) self.real_data_paths = self.real_data_paths[inds] if self.real_p + n > len(self.real_data_paths): self.reset() paths = self.real_data_paths[self.real_p:self.real_p + n] self.real_p += self.batch_size # real_filenames, real_data = image_from_paths(paths, self.real_data_dims) real_data = np.expand_dims( np.stack([ cv2.cvtColor(imread(path), cv2.COLOR_BGR2GRAY) for path in paths ]), -1) return real_data
def __getitem__(self, i): # type: (int) -> Tuple[torch.Tensor, str, float, float, float, float, str] # select sequence number and frame number sequence, frame = self.keys[i] # get corresponding data frame_path = self.cnf.jta_path / 'frames' / 'test' / f'seq_{sequence}/{frame}.jpg' data_path = self.cnf.jta_path / 'poses' / 'test' / f'seq_{sequence}/{frame}.data' # read input frame frame = utils.imread(frame_path) frame = transforms.ToTensor()(frame) frame = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(frame) # read GT list of poses poses = torch.load(data_path) # type: List[Pose] # create gt gt_3d = [] for jtype in USEFUL_JOINTS: for pose in poses: joint = pose[jtype] if joint.x2d < 0 or joint.y2d < 0 or joint.x2d > 1920 or joint.y2d > 1080: continue gt_3d.append([USEFUL_JOINTS.index(jtype), joint.x3d, joint.y3d, joint.z3d]) gt_3d = json.dumps(gt_3d) fx, fy, cx, cy = CAMERA_PARAMS return frame, gt_3d, fx, fy, cx, cy, frame_path
def run(self, show=False, resize_factor=0.2): self.all_img_points = [] self.files = [] found = 0 for imgpath in self.imgpaths: img = imread(imgpath) img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) self.imgsize = img_gray.shape img_resized = cv2.resize(img, None, fx=resize_factor, fy=resize_factor) ret, corners = cv2.findChessboardCorners(img_gray, self.chessboard_shape, None) if ret: print('{}: corners found.'.format(imgpath)) corners_subpix = cv2.cornerSubPix(img_gray, corners, (11, 11), (-1, -1), self.criteria) if show: cv2.drawChessboardCorners(img_resized, self.chessboard_shape, corners_subpix * resize_factor, ret) imshow(img_resized, title=imgpath) self.all_img_points.append(corners_subpix) self.files.append(os.path.split(imgpath)[1]) found += 1 else: print('{}: corners not found.'.format(imgpath)) self.all_world_points = [self.world_pts] * len(self.all_img_points) self._calibrate()
def __init__(self, config, rng=None): self.rng = np.random.RandomState(1) if rng is None else rng self.input_channel = config.input_channel self.input_height = config.input_height self.input_width = config.input_width self.data_path = os.path.join(config.data_dir, config.data_set) self.sample_path = os.path.join(self.data_path, config.sample_dir) self.batch_size = config.batch_size self.debug = config.debug self.real_data, synthetic_image_path = load(config, self.data_path, self.sample_path, rng) self.synthetic_data_paths = np.array( glob(os.path.join(synthetic_image_path, '*_grayscale.png'))) self.synthetic_data_dims = list( imread(self.synthetic_data_paths[0]).shape) + [1] self.synthetic_data_paths.sort() if np.rank(self.real_data) == 3: self.real_data = np.expand_dims(self.real_data, -1) self.real_p = 0
def load_images_from_files(self, start_idx, end_idx): """Loads images by reading images from files Args: start_idx: First index in the list end_idx : End index in the list Returns: Images as pairs """ img_dim = self.opts.h images_A = np.empty([self.opts.batch_size, img_dim, img_dim, 3], dtype=np.float32) images_B = np.empty([self.opts.batch_size, img_dim, img_dim, 3], dtype=np.float32) for idx, path in enumerate(self.t_image_paths[start_idx:end_idx]): # TODO: Generalize this method to load test/val images path = os.path.join(self.opts.dataset_dir, self.opts.dataset, 'train', path) try: image = utils.imread(path) image = utils.normalize_images(images=image) except IOError: raise IOError("Cannot read the image {}" % path) split_len = 600 if self.opts.dataset == 'maps' else 256 images_A[idx] = image[:, :split_len, :] images_B[idx] = image[:, split_len:, :] return images_A, images_B
def load_imgs(file_paths, resize=0.5): slice_ = (slice(0, 112), slice(0, 92)) h_slice, w_slice = slice_ h = (h_slice.stop - h_slice.start) // (h_slice.step or 1) w = (w_slice.stop - w_slice.start) // (w_slice.step or 1) if resize is not None: resize = float(resize) h = int(resize * h) w = int(resize * w) n_faces = len(file_paths) faces = np.zeros((n_faces, h, w), dtype=np.float32) # iterate over the collected file path to load the jpeg files as numpy # arrays for i, file_path in enumerate(file_paths): img = imread(file_path) face = np.asarray(img[slice_], dtype=np.float32) face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats if resize is not None: face = imresize(face, resize) faces[i, ...] = face return faces
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() # Get image name idx, labels = self.df.values[idx] img_name = self.root_dir.format(idx) # Augmentation flip = False if self.training: flip = np.random.randint(2) == 1 dropmask = 0 else: dropmask_name = self.root_dir_dropmasks.format(idx) if os.path.isfile(dropmask_name): dropmask = imread(dropmask_name, True) # , 'test_masks', idx+'.jpg')) dropmask = preprocess_mask(dropmask) else: dropmask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 3)) dropmask = np.rollaxis(dropmask, 2, 0) # Read image img0 = imread(img_name, True) img = preprocess_image(img0, flip=flip) # augmentation if self.transform: img = self.transform(image=img)['image'] if self.normalized: img = normalize(img, means, stds) img = np.rollaxis(img, 2, 0) # Get mask and regression maps mask, regr, heatmap = get_mask_and_regr(img0, labels, sigma=self.sigma, flip=flip) regr = np.rollaxis(regr, 2, 0) img = torch.as_tensor(img, dtype=torch.float32) # /255) regr = torch.as_tensor(regr, dtype=torch.float32) dropmask = torch.as_tensor(dropmask, dtype=torch.float32) heatmap = torch.as_tensor(heatmap, dtype=torch.float32) return [img, mask, regr, heatmap, dropmask]
def test(self, config): """ Testing process. """ print("Testing...") # Load checkpoint if self.load(self.checkpoint_dir, config.scale): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") nx, ny = input_setup(self.sess, config) data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "test.h5") test_data, test_label = read_data(data_dir) result = self.pred.eval({ self.images: test_data, self.labels: test_label }) result = merge(result, [nx, ny]) result = result.squeeze() # Save output image output_path = os.path.join(os.getcwd(), config.output_dir) image_path = os.path.join(output_path, "test_img.png") imsave(result, image_path) # PSNR label_path = os.path.join(output_path, "test_org_img.png") bicubic_path = os.path.join(output_path, "test_bicubic_img.png") bicubic_img = imread(bicubic_path, is_grayscale=True) label_img = imread(label_path, is_grayscale=True) output_img = imread(image_path, is_grayscale=True) bicubic_psnr_value = psnr(label_img, bicubic_img) srcnn_psnr_value = psnr(label_img, output_img) print("Bicubic PSNR: [{}]".format(bicubic_psnr_value)) print("SRCNN PSNR: [{}]".format(srcnn_psnr_value))
def test_test_model_single_call(): from vgg import vgg19, vgg19_rev import os.path as osp import tensorlayer as tl VGG19_PARTIAL_WEIGHTS_PATH = 'pretrained_models/predefined_vgg19_endwith(conv4_1)_weights.h5' DEC_BEST_WEIGHTS_PATH = 'pretrained_models/dec_best_weights.h5' CONTENT_DATA_PATH = './test_images/content' STYLE_DATA_PATH = './test_images/style' test_content_filenames = ['brad_pitt_01.jpg'] test_style_filenames = ['cat.jpg'] TEST_INPUT_CONSTRAINTED_SIZE = 800 TEST_OUTPUT_PATH = './test_images/output' tl.logging.set_verbosity(tl.logging.DEBUG) enc_net = vgg19(pretrained=False, end_with='conv4_1') enc_net.load_weights(VGG19_PARTIAL_WEIGHTS_PATH, in_order=False) dec_net = vgg19_rev(pretrained=False, batch_norm=False, input_depth=512) dec_net.load_weights(DEC_BEST_WEIGHTS_PATH, skip=True) i = 0 # only test 1 pair of input test_content = utils.imread( osp.join(CONTENT_DATA_PATH, test_content_filenames[i])) test_style = utils.imread( osp.join(STYLE_DATA_PATH, test_style_filenames[i])) # import cv2 # test_content = cv2.cvtColor(test_content, cv2.COLOR_BGR2RGB) # <- moved to utils.imread # test_style = cv2.cvtColor(test_style, cv2.COLOR_BGR2RGB) # <- moved to utils.imread content_features = enc_net(test_content, is_train=False) style_features = enc_net(test_style, is_train=False) target_features = utils.AdaIN(content_features, style_features, alpha=1) del content_features, style_features generated = dec_net(target_features, is_train=False) import tensorflow as tf if isinstance(generated, tf.Tensor): if generated.dtype == tf.float32: generated = tf.cast(generated, tf.uint8) generated = generated[0].numpy() saved_path = f"{osp.splitext(test_style_filenames[i])[0]}+{osp.splitext(test_content_filenames[i])[0]}" saved_path = osp.join(TEST_OUTPUT_PATH, f"{saved_path}.jpg") # generated = cv2.cvtColor(generated, cv2.COLOR_RGB2BGR) # <- moved to utils.imsave utils.imsave(saved_path, generated) tl.logging.info(f"saved_path = {saved_path}") tl.logging.info(f"generated.shape = {generated.shape}")
def train(self): print("[*] Training starts...") self._summary_writer = None sample_num = reduce(lambda x, y: x * y, self.config.sample_image_grid) idxs = self.rng.choice(len(self.data_loader.synthetic_data_paths), sample_num) test_samples = np.expand_dims(np.stack( [cv2.cvtColor( imread(path), cv2.COLOR_BGR2GRAY) for path in self.data_loader.synthetic_data_paths[idxs]] ), -1) # test_samples = image_from_paths(self.data_loader.synthetic_data_paths[idxs], self.data_loader.synthetic_data_dims) def train_refiner(push_buffer=False): feed_dict = { self.model.synthetic_batch_size: self.data_loader.batch_size, } res = self.model.train_refiner( self.sess, feed_dict, self._summary_writer, with_output=True) self._summary_writer = self._get_summary_writer(res) if push_buffer: self.history_buffer.push(res['output']) if res['step'] % self.log_step == 0: feed_dict = { self.model.x: test_samples, } self._inject_summary( 'test_refined_images', feed_dict, res['step']) if res['step'] / float(self.log_step) == 1.: self._inject_summary( 'test_synthetic_images', feed_dict, res['step']) def train_discrim(): feed_dict = { self.model.synthetic_batch_size: self.data_loader.batch_size // 2, self.model.R_x_history: self.history_buffer.sample(), self.model.y: self.data_loader.next(), } res = self.model.train_discrim( self.sess, feed_dict, self._summary_writer, with_history=True, with_output=False) self._summary_writer = self._get_summary_writer(res) for k in trange(self.initial_K_g, desc="Train refiner"): train_refiner(push_buffer=k > self.initial_K_g * 0.9) for k in trange(self.initial_K_d, desc="Train discrim"): train_discrim() for step in trange(self.max_step, desc="Train both"): for k in range(self.K_g): train_refiner(push_buffer=True) for k in range(self.K_d): train_discrim()
def next_batch(self): batch_imgs = [] batch_files = np.random.choice(self.content_target_paths, self.flags.batch_size, replace=False) for batch_file in batch_files: img = utils.imread(batch_file, img_size=(256, 256, 3)) batch_imgs.append(img) return np.asarray(batch_imgs)
def __call__(self, x_batch, y_batch): images, landmarks = [], [] for img_file, pts_file in zip(x_batch, y_batch): img = imread(img_file) xs, ys = read_landmarks(pts_file) pts = np.r_[xs, ys] cropped = crop(img, pts, padding=self.padding) new_img, new_pts = resize(*cropped, target_size=self.target_size) images.append(new_img) landmarks.append(new_pts) return np.array(images), np.array(landmarks)
def load_dataset(self): """ Load data and check the channel number `c_dim`. """ if self.dataset_name == 'mnist': self.data_X, self.data_y = load_mnist(self.y_dim) self.c_dim = self.data_X[0].shape[-1] else: self.data = glob( os.path.join("./data", self.dataset_name, self.input_fname_pattern)) imreadImg = imread(self.data[0]) if len(imreadImg.shape) >= 3: # check if image is a non-grayscale image by checking channel number self.c_dim = imread(self.data[0]).shape[-1] else: self.c_dim = 1 self.grayscale = (self.c_dim == 1)
def __getitem__(self, idx): v,q,a = -1, -1, -1 try: question_json, answer_json = self.dataset.get(idx, self.split_type) img_path = question_json['image_path'] v = imread(img_path) q = question_json['encoding'] a = answer_json['encoding'][0] q_len = len(q) except Exception as e: print("DATALOAD-ERR: " + str(e)) return idx, v, q, a, q_len
def flow(self, mode='train'): while True: if mode =='train': shuffle(self.train_keys) keys = self.train_keys elif mode == 'val' or mode == 'demo': shuffle(self.validation_keys) keys = self.validation_keys else: raise Exception('invalid mode: %s' % mode) inputs = [] targets = [] for key in keys: image_path = self.path_prefix + key image_array = imread(image_path) image_array = imresize(image_array, self.image_size) num_image_channels = len(image_array.shape) if num_image_channels != 3: continue ground_truth = self.ground_truth_data[key] if self.do_random_crop: image_array = self._do_random_crop(image_array) image_array = image_array.astype('float32') if mode == 'train' or mode == 'demo': if self.ground_truth_transformer != None: image_array, ground_truth = self.transform( image_array, ground_truth) ground_truth = ( self.ground_truth_transformer.assign_boxes( ground_truth)) else: image_array = self.transform(image_array)[0] inputs.append(image_array) targets.append(ground_truth) if len(targets) == self.batch_size: inputs = np.asarray(inputs) targets = np.asarray(targets) # this will not work for boxes targets = to_categorical(targets) if mode == 'train' or mode == 'val': inputs = self.preprocess_images(inputs) yield self._wrap_in_dictionary(inputs, targets) if mode == 'demo': yield self._wrap_in_dictionary(inputs, targets) inputs = [] targets = []
def run(self, checkpoint_dir, vid_dir, frame_ext, out_dir, amplification_factor, velocity_mag=False): """Magnify a video in the two-frames mode. Args: checkpoint_dir: checkpoint directory. vid_dir: directory containing video frames videos are processed in sorted order. out_dir: directory to place output frames and resulting video. amplification_factor: the amplification factor, with 0 being no change. velocity_mag: if True, process video in Dynamic mode. """ vid_name = os.path.basename(out_dir) # make folder mkdir(out_dir) vid_frames = sorted(glob(os.path.join(vid_dir, '*.' + frame_ext))) first_frame = vid_frames[0] im = imread(first_frame) image_height, image_width = im.shape if not self.is_graph_built: self.setup_for_inference(checkpoint_dir, image_width, image_height) try: i = int(self.ckpt_name.split('-')[-1]) print("Iteration number is {:d}".format(i)) vid_name = vid_name + '_' + str(i) except: print("Cannot get iteration number") if velocity_mag: print("Running in Dynamic mode") prev_frame = first_frame desc = vid_name if len(vid_name) < 10 else vid_name[:10] for frame in tqdm(vid_frames, desc=desc): file_name = os.path.basename(frame) out_amp = self.inference(prev_frame, frame, amplification_factor) im_path = os.path.join(out_dir, file_name) save_images(out_amp, [1, 1], im_path) if velocity_mag: prev_frame = frame # Try to combine it into a video call([ DEFAULT_VIDEO_CONVERTER, '-y', '-f', 'image2', '-r', '30', '-i', os.path.join(out_dir, '%06d.png'), '-c:v', 'libx264', os.path.join(out_dir, vid_name + '.mp4') ])
def test(): from median import median3x3 from gradient import gradient from hsi import rgb2hsi, hsi2rgb, joinChannels, splitChannels # Create a noisy image with an embedded white square image = np.zeros((201,199),dtype=np.float32) width,height = image.shape x,y = width/2, height/2 offset = 10 image[x-offset:x+offset,y-offset:y+offset] = 2 image += np.random.random_sample(image.shape) filtered = median3x3(image, 100) showArray("Noisy",image) showArray("Filtered",filtered) image = np.float32(imread("test.jpg")) image /= 256. showArray("Test HSI",image) r,g,b = splitChannels(image) h,s,i = rgb2hsi(r,g,b) showArray("I",i) showArray("S",s) showArray("H",h) from gaussian import gaussImage blur = gaussImage(i, 3) showArray("Blur", blur) blurmore = gaussImage(i,4) dog = blur-blurmore showArray("DOG", dog) g,a = gradient(i,5) showArray("Gradient",g) showArray("Angle", a) sat = np.ones_like(i) gimg = joinChannels(*hsi2rgb(a,sat,g)) showArray("Color gradient with angle", gimg) showArrayGrad("Grad angle", image, a) showArrayGrad("Grad vectors", image, a,g*10)