def visualize_image_sequence(est, dataset_name, dataset_parent_dir, input_sequence_name, app_base_path, output_dir): """Generates an image sequence as a video and stores it to disk.""" batch_sz = opts.batch_size def input_seq_fn(): dict_inp = data.provide_data( dataset_name=dataset_name, parent_dir=dataset_parent_dir, subset=input_sequence_name, batch_size=batch_sz, crop_flag=False, seeds=None, use_appearance=False, shuffle=0) x_in = dict_inp['conditional_input'] return x_in # Compute appearance embedding only once and use it for all input frames. app_rgb_path = app_base_path + '_reference.png' app_rendered_path = app_base_path + '_color.png' app_depth_path = app_base_path + '_depth.png' app_sem_path = app_base_path + '_seg_rgb.png' x_app = _load_and_concatenate_image_channels( app_rgb_path, app_rendered_path, app_depth_path, app_sem_path) def seq_with_single_appearance_inp_fn(): """input frames with a fixed latent appearance vector.""" x_in_op = input_seq_fn() x_app_op = tf.convert_to_tensor(x_app) x_app_tiled_op = tf.tile(x_app_op, [tf.shape(x_in_op)[0], 1, 1, 1]) return {'conditional_input': x_in_op, 'peek_input': x_app_tiled_op} images = [x for x in est.predict(seq_with_single_appearance_inp_fn)] for i, gen_img in enumerate(images): output_file_path = osp.join(output_dir, 'out_%04d.png' % i) print('Saving frame #%d to %s' % (i, output_file_path)) with tf.gfile.Open(output_file_path, 'wb') as f: f.write(utils.to_png(gen_img))
def infer_dir(model_dir, input_dir, output_dir): tf.gfile.MakeDirs(output_dir) est = _build_inference_estimator(opts.train_dir) def read_image(base_path, is_appearance=False): if is_appearance: ref_img_path = base_path + '_reference.png' else: ref_img_path = None rendered_img_path = base_path + '_color.png' depth_img_path = base_path + '_depth.png' seg_img_path = base_path + '_seg_rgb.png' img = _load_and_concatenate_image_channels( rgb_path=ref_img_path, rendered_path=rendered_img_path, depth_path=depth_img_path, seg_path=seg_img_path) return img def get_inference_input_fn(base_path, app_base_path): x_in = read_image(base_path, False) x_app_in = read_image(app_base_path, True) def infer_input_fn(): return {'conditional_input': x_in, 'peek_input': x_app_in} return infer_input_fn file_paths = sorted(glob.glob(osp.join(input_dir, '*_depth.png'))) base_paths = [x[:-10] for x in file_paths] # remove the '_depth.png' suffix for inp_base_path in base_paths: est_inp_fn = get_inference_input_fn(inp_base_path, inp_base_path) img = next(est.predict(est_inp_fn)) basename = osp.basename(inp_base_path) output_img_path = osp.join(output_dir, basename + '_out.png') print('Saving generated image to %s' % output_img_path) with tf.gfile.Open(output_img_path, 'wb') as f: f.write(utils.to_png(img))
def evaluate_image_set(dataset_name, dataset_parent_dir, subset_suffix, output_dir=None, batch_size=6): if output_dir is None: output_dir = osp.join(opts.train_dir, 'validation_output_%s' % subset_suffix) tf.gfile.MakeDirs(output_dir) model_fn_old = build_model_fn() def model_fn_wrapper(features, labels, mode, params): del mode return model_fn_old(features, labels, 'eval_subset', params) model_dir = opts.train_dir est = tf.estimator.Estimator(model_fn_wrapper, model_dir) est_inp_fn = functools.partial(data.provide_data, dataset_name=dataset_name, parent_dir=dataset_parent_dir, subset=subset_suffix, batch_size=batch_size, use_appearance=opts.use_appearance, shuffle=0) print('Evaluating images for subset %s' % subset_suffix) images = [x for x in est.predict(est_inp_fn)] print('Evaluated %d images' % len(images)) for i, img in enumerate(images): output_file_path = osp.join(output_dir, 'out_%04d.png' % i) print('Saving file #%d: %s' % (i, output_file_path)) with tf.gfile.Open(output_file_path, 'wb') as f: f.write(utils.to_png(img))
def make_sample_grid_and_save(est, dataset_name, dataset_parent_dir, grid_dims, output_dir, cur_nimg): """Evaluate a fixed set of validation images and save output. Args: est: tf,estimator.Estimator, TF estimator to run the predictions. dataset_name: basename for the validation tfrecord from which to load validation images. dataset_parent_dir: path to a directory containing the validation tfrecord. grid_dims: 2-tuple int for the grid size (1 unit = 1 image). output_dir: string, where to save image samples. cur_nimg: int, current number of images seen by training. Returns: None. """ num_examples = grid_dims[0] * grid_dims[1] def input_val_fn(): dict_inp = data.provide_data(dataset_name=dataset_name, parent_dir=dataset_parent_dir, subset='val', batch_size=1, crop_flag=True, crop_size=opts.train_resolution, seeds=[0], max_examples=num_examples, use_appearance=opts.use_appearance, shuffle=0) x_in = dict_inp['conditional_input'] x_gt = dict_inp['expected_output'] # ground truth output x_app = dict_inp['peek_input'] return x_in, x_gt, x_app def est_input_val_fn(): x_in, _, x_app = input_val_fn() features = {'conditional_input': x_in, 'peek_input': x_app} return features images = [x for x in est.predict(est_input_val_fn)] images = np.array(images, 'f') images = images.reshape(grid_dims + images.shape[1:]) utils.save_images(utils.to_png(utils.images_to_grid(images)), output_dir, cur_nimg)
def __call__(self, image_rois): if not self.valid_image(image_rois[0]): return # TODO: replace with cv2 img = np.asarray(Image.open(image_rois[0])) i = 0 detection_file = self.classification_path + "/val.txt" with open(detection_file, "a+") as label_file: for roi in image_rois[1]: # TODO: check why? try: class_id = "false_pos" if roi.false_pos else conf.type2class_id[ roi.r_type] roi_img = img[roi.tl_row:roi.br_row, roi.tl_col:roi.br_col] r, g, b = roi_img.T roi_img_bgr = np.array((b, g, r)).T roi_img_path = self.classification_path + "/" + str( class_id) + "/" roi_img_path += str(i) + "_" + utils.to_png(image_rois[0]) # TODO: replace with cv2 Image.fromarray(roi_img_bgr).save(roi_img_path) i = i + 1 id = 0 if class_id == "false_pos": id = conf.invalid_id else: id = class_id label_file.write( roi_img_path[len(self.classification_path):] + " " + str(id)) label_file.write("\n") except ValueError: continue print("Rect goes outside in image {}.".format( image_rois[0])) except KeyError: continue
def __call__(self, image_rois): # TODO: cv2 img = Image.open(image_rois[0]) label = np.zeros((img.size[1], img.size[0]), np.uint8) for roi in image_rois[1]: # class_id = conf.type2class_id[roi.r_type] # For the moment we only support binary segmentation class_id = 1 label[roi.tl_row:roi.br_row, roi.tl_col:roi.br_col] = class_id label_img = Image.fromarray(label, 'P') label_img.putpalette(conf.palette) label_path = self.labels_path + utils.to_png(image_rois[0]) image.resize_image(label_img, (conf.image_size[0] // 2, conf.image_size[1] // 2))[0].save(label_path) label_file_path = self.labels_path + "/val.txt" with open(label_file_path, "a+") as label_file: label_file.write(label_path[len(self.labels_path):]) label_file.write("\n")
def interpolate_appearance(model_dir, input_dir, target_img_basename, appearance_img1_basename, appearance_img2_basename): # Create output direcotry output_dir = osp.join(model_dir, 'interpolate_appearance_out') tf.gfile.MakeDirs(output_dir) # Build estimator model_fn_old = build_model_fn() def model_fn_wrapper(features, labels, mode, params): del mode return model_fn_old(features, labels, 'interpolate_appearance', params) def appearance_model_fn(features, labels, mode, params): del mode return model_fn_old(features, labels, 'compute_appearance', params) config = tf.estimator.RunConfig( save_summary_steps=1000, save_checkpoints_steps=50000, keep_checkpoint_max=50, log_step_count_steps=1 << 30) model_dir = model_dir est = tf.estimator.Estimator(model_fn_wrapper, model_dir, config, params={}) est_app = tf.estimator.Estimator(appearance_model_fn, model_dir, config, params={}) # Compute appearance embeddings for the two input appearance images. app_inputs = [] for app_basename in [appearance_img1_basename, appearance_img2_basename]: app_rgb_path = osp.join(input_dir, app_basename + '_reference.png') app_rendered_path = osp.join(input_dir, app_basename + '_color.png') app_depth_path = osp.join(input_dir, app_basename + '_depth.png') app_seg_path = osp.join(input_dir, app_basename + '_seg_rgb.png') app_in = _load_and_concatenate_image_channels( rgb_path=app_rgb_path, rendered_path=app_rendered_path, depth_path=app_depth_path, seg_path=app_seg_path) # app_inputs.append(tf.convert_to_tensor(app_in)) app_inputs.append(app_in) embedding1 = next(est_app.predict( lambda: {'peek_input': app_inputs[0]})) embedding2 = next(est_app.predict( lambda: {'peek_input': app_inputs[1]})) embedding1 = np.expand_dims(embedding1, axis=0) embedding2 = np.expand_dims(embedding2, axis=0) # Compute interpolated appearance embeddings num_interpolations = 10 interpolated_embeddings = [] delta_vec = (embedding2 - embedding1) / num_interpolations for delta_iter in range(num_interpolations + 1): x_app_embedding = embedding1 + delta_iter * delta_vec interpolated_embeddings.append(x_app_embedding) # Read in the generator input for the target image to render rendered_img_path = osp.join(input_dir, target_img_basename + '_color.png') depth_img_path = osp.join(input_dir, target_img_basename + '_depth.png') seg_img_path = osp.join(input_dir, target_img_basename + '_seg_rgb.png') x_in = _load_and_concatenate_image_channels( rgb_path=None, rendered_path=rendered_img_path, depth_path=depth_img_path, seg_path=seg_img_path) # Generate and save interpolated images for interpolate_iter, embedding in enumerate(interpolated_embeddings): img = next(est.predict( lambda: {'conditional_input': tf.convert_to_tensor(x_in), 'appearance_embedding': tf.convert_to_tensor(embedding)})) output_img_name = 'interpolate_%s_%s_%s_%03d.png' % ( target_img_basename, appearance_img1_basename, appearance_img2_basename, interpolate_iter) output_img_path = osp.join(output_dir, output_img_name) print('Saving interpolated image to %s' % output_img_path) with tf.gfile.Open(output_img_path, 'wb') as f: f.write(utils.to_png(img))
def joint_interpolation(model_dir, app_input_dir, st_app_basename, end_app_basename, camera_path_dir): """ Interpolates both viewpoint and appearance between two input images. """ # Create output direcotry output_dir = osp.join(model_dir, 'joint_interpolation_out') tf.gfile.MakeDirs(output_dir) # Build estimator model_fn_old = build_model_fn() def model_fn_wrapper(features, labels, mode, params): del mode return model_fn_old(features, labels, 'interpolate_appearance', params) def appearance_model_fn(features, labels, mode, params): del mode return model_fn_old(features, labels, 'compute_appearance', params) config = tf.estimator.RunConfig( save_summary_steps=1000, save_checkpoints_steps=50000, keep_checkpoint_max=50, log_step_count_steps=1 << 30) model_dir = model_dir est = tf.estimator.Estimator(model_fn_wrapper, model_dir, config, params={}) est_app = tf.estimator.Estimator(appearance_model_fn, model_dir, config, params={}) # Compute appearance embeddings for the two input appearance images. app_inputs = [] for app_basename in [st_app_basename, end_app_basename]: app_rgb_path = osp.join(app_input_dir, app_basename + '_reference.png') app_rendered_path = osp.join(app_input_dir, app_basename + '_color.png') app_depth_path = osp.join(app_input_dir, app_basename + '_depth.png') app_seg_path = osp.join(app_input_dir, app_basename + '_seg_rgb.png') app_in = _load_and_concatenate_image_channels( rgb_path=app_rgb_path, rendered_path=app_rendered_path, depth_path=app_depth_path, seg_path=app_seg_path) # app_inputs.append(tf.convert_to_tensor(app_in)) app_inputs.append(app_in) embedding1 = next(est_app.predict( lambda: {'peek_input': app_inputs[0]})) embedding1 = np.expand_dims(embedding1, axis=0) embedding2 = next(est_app.predict( lambda: {'peek_input': app_inputs[1]})) embedding2 = np.expand_dims(embedding2, axis=0) file_paths = sorted(glob.glob(osp.join(camera_path_dir, '*_depth.png'))) base_paths = [x[:-10] for x in file_paths] # remove the '_depth.png' suffix # Compute interpolated appearance embeddings num_interpolations = len(base_paths) interpolated_embeddings = [] delta_vec = (embedding2 - embedding1) / (num_interpolations - 1) for delta_iter in range(num_interpolations): x_app_embedding = embedding1 + delta_iter * delta_vec interpolated_embeddings.append(x_app_embedding) # Generate and save interpolated images for frame_idx, embedding in enumerate(interpolated_embeddings): # Read in input frame frame_render_path = osp.join(base_paths[frame_idx] + '_color.png') frame_depth_path = osp.join(base_paths[frame_idx] + '_depth.png') frame_seg_path = osp.join(base_paths[frame_idx] + '_seg_rgb.png') x_in = _load_and_concatenate_image_channels( rgb_path=None, rendered_path=frame_render_path, depth_path=frame_depth_path, seg_path=frame_seg_path) img = next(est.predict( lambda: {'conditional_input': tf.convert_to_tensor(x_in), 'appearance_embedding': tf.convert_to_tensor(embedding)})) output_img_name = '%s_%s_%03d.png' % (st_app_basename, end_app_basename, frame_idx) output_img_path = osp.join(output_dir, output_img_name) print('Saving interpolated image to %s' % output_img_path) with tf.gfile.Open(output_img_path, 'wb') as f: f.write(utils.to_png(img))
def make_sample_grid_and_save(self, ops, batch_size=16, random=4, interpolation=16, height=16, save_to_disk=True): """ :param ops: AEops class, including train_op :param batch_size: :param random: number of reconstructed images = random * height :param interpolation: number of interpolation, namely the row number of compositive image :param height: number of hight :param save_to_disk: :return: recon, inter, slerp, samples """ # Gather images pool_size = random * height + 2 * height # 96 current_size = 0 with tf.Graph().as_default(): data_in = self.test_data.make_one_shot_iterator().get_next() with tf.Session() as sess_new: images = [] while current_size < pool_size: images.append(sess_new.run(data_in)['x']) current_size += images[-1].shape[0] images = np.concatenate(images, axis=0)[:pool_size] # [96, 32, 32, 1] def batched_op(op, op_input, array): return np.concatenate([ self.tf_sess.run(op, feed_dict={op_input: array[x:x + batch_size]}) for x in range(0, array.shape[0], batch_size) ], axis=0) # 1. Random reconstructions if random: # not zero random_x = images[:random * height] # [64, 32, 32, 1] random_y = batched_op(ops.ae, ops.x, random_x) randoms = np.concatenate( [random_x, random_y], axis=2) # ae output: [64, 32, 32, 1] => [64, 32, 64, 1] image_random = utils.images_to_grid( # [16, 4, 32, 64, 1] => [512, 256, 1] randoms.reshape((height, random) + randoms.shape[1:])) else: image_random = None # 2. Interpolations interpolation_x = images[-2 * height:] # [32, 32, 32, 1] latent_x = batched_op(ops.encode, ops.x, interpolation_x) # [32, 4, 4, 16] latents = [] for x in range(interpolation): latents.append((latent_x[:height] * (interpolation - x - 1) + latent_x[height:] * x) / float(interpolation - 1)) latents = np.concatenate(latents, axis=0) # [256, 4, 4, 16] interpolation_y = batched_op(ops.decode, ops.h, latents) # [256, 32, 32, 1] interpolation_y = interpolation_y.reshape( # [16, 16, 32, 32, 1] (interpolation, height) + interpolation_y.shape[1:]) interpolation_y = interpolation_y.transpose(1, 0, 2, 3, 4) image_interpolation = utils.images_to_grid( interpolation_y) # [512, 512, 1] # 3. Interpolation by slerp latents_slerp = [] dots = np.sum(latent_x[:height] * latent_x[height:], tuple(range(1, len(latent_x.shape))), keepdims=True) # [16, 1, 1, 1] norms = np.sum(latent_x * latent_x, tuple(range(1, len(latent_x.shape))), keepdims=True) # [32, 1, 1, 1] cosine_dist = dots / np.sqrt( norms[:height] * norms[height:]) # [16, 1, 1, 1] omega = np.arccos(cosine_dist) for x in range(interpolation): t = x / float(interpolation - 1) latents_slerp.append( np.sin((1 - t) * omega) / np.sin(omega) * latent_x[:height] + np.sin(t * omega) / np.sin(omega) * latent_x[height:]) latents_slerp = np.concatenate( latents_slerp, axis=0) # 16 of[16, 4, 4, 16] => [256, 4, 4, 16] interpolation_y_slerp = batched_op(ops.decode, ops.h, latents_slerp) # [256, 32, 32, 1] interpolation_y_slerp = interpolation_y_slerp.reshape( (interpolation, height) + interpolation_y_slerp.shape[1:]) # [16, 16, 32, 32, 1] interpolation_y_slerp = interpolation_y_slerp.transpose(1, 0, 2, 3, 4) image_interpolation_slerp = utils.images_to_grid( interpolation_y_slerp) # [512, 512, 1] # 4. get decoder by random normal dist of hidden h random_latents = np.random.standard_normal( latents.shape) # [256, 4, 4, 16] samples_y = batched_op(ops.decode, ops.h, random_latents) samples_y = samples_y.reshape((interpolation, height) + samples_y.shape[1:]) samples_y = samples_y.transpose(1, 0, 2, 3, 4) image_samples = utils.images_to_grid(samples_y) # [512, 512, 1] if random: # [512, 256+512+512+512, 1] image = np.concatenate([ image_random, image_interpolation, image_interpolation_slerp, image_samples ], axis=1) else: image = np.concatenate([ image_interpolation, image_interpolation_slerp, image_samples ], axis=1) if save_to_disk: utils.save_images(utils.to_png(image), self.image_dir, self.cur_nimg) return image_random, image_interpolation, image_interpolation_slerp, image_samples