def deep_dream_video(config): video_path = os.path.join(config['inputs_path'], config['input']) tmp_input_dir = os.path.join(config['out_videos_path'], 'tmp_input') tmp_output_dir = os.path.join(config['out_videos_path'], 'tmp_out') config['dump_dir'] = tmp_output_dir os.makedirs(tmp_input_dir, exist_ok=True) os.makedirs(tmp_output_dir, exist_ok=True) metadata = video_utils.dump_frames(video_path, tmp_input_dir) last_img = None for frame_id, frame_name in enumerate(sorted(os.listdir(tmp_input_dir))): print(f'Processing frame {frame_id}') frame_path = os.path.join(tmp_input_dir, frame_name) frame = utils.load_image(frame_path, target_shape=config['img_width']) if config['blend'] is not None and last_img is not None: # 1.0 - get only the current frame, 0.5 - combine with last dreamed frame and stabilize the video frame = utils.linear_blend(last_img, frame, config['blend']) dreamed_frame = deep_dream_static_image(config, frame) last_img = dreamed_frame utils.save_and_maybe_display_image( config, dreamed_frame, should_display=config['should_display'], name_modifier=frame_id) video_utils.create_video_from_intermediate_results(config, metadata) shutil.rmtree(tmp_input_dir) # remove tmp files print(f'Deleted tmp frame dump directory {tmp_input_dir}.')
def deep_dream_video_ouroboros(config): img_path = os.path.join(config['inputs_path'], config['input']) # load numpy, [0, 1], channel-last, RGB image, None will cause it to start from the uniform noise [0, 1] image frame = None if config['use_noise'] else utils.load_image(img_path, target_shape=config['img_width']) for frame_id in range(config['video_length']): print(f'Dream iteration {frame_id+1}.') frame = deep_dream_static_image(config, frame) utils.save_and_maybe_display_image(config, frame, should_display=config['should_display'], name_modifier=frame_id) frame = utils.transform_frame(config, frame) # transform frame e.g. central zoom, spiral, etc. video_utils.create_video_from_intermediate_results(config)
def deep_dream_video_ouroboros(config): """ Feeds the output dreamed image back to the input and repeat Name etymology for nerds: https://en.wikipedia.org/wiki/Ouroboros """ ts = time.time() assert any([config['input_name'].lower().endswith(img_ext) for img_ext in SUPPORTED_IMAGE_FORMATS]), \ f'Expected an image, but got {config["input_name"]}. Supported image formats {SUPPORTED_IMAGE_FORMATS}.' utils.print_ouroboros_video_header(config) # print some ouroboros-related metadata to the console img_path = utils.parse_input_file(config['input']) # load numpy, [0, 1] range, channel-last, RGB image # use_noise and consequently None value, will cause it to initialize the frame with uniform, [0, 1] range, noise frame = None if config['use_noise'] else utils.load_image(img_path, target_shape=config['img_width']) for frame_id in range(config['ouroboros_length']): print(f'Ouroboros iteration {frame_id+1}.') # Step 1: apply DeepDream and feed the last iteration's output to the input frame = deep_dream_static_image(config, frame) dump_path = utils.save_and_maybe_display_image(config, frame, name_modifier=frame_id) print(f'Saved ouroboros frame to: {os.path.relpath(dump_path)}\n') # Step 2: transform frame e.g. central zoom, spiral, etc. # Note: this part makes amplifies the psychodelic-like appearance frame = utils.transform_frame(config, frame) video_utils.create_video_from_intermediate_results(config) print(f'time elapsed = {time.time()-ts} seconds.')
def stylize_static_image(inference_config): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") content_img_path = os.path.join(inference_config['content_images_path'], inference_config['content_img_name']) content_image = utils.prepare_img(content_img_path, inference_config['img_width'], device) # load the weights and set the model to evaluation mode stylization_model = TransformerNet().to(device) training_state = torch.load(os.path.join(inference_config["model_binaries_path"], inference_config["model_name"])) utils.print_model_metadata(training_state) state_dict = training_state["state_dict"] stylization_model.load_state_dict(state_dict, strict=True) stylization_model.eval() with torch.no_grad(): stylized_img = stylization_model(content_image).to('cpu').numpy()[0] utils.save_and_maybe_display_image(inference_config, stylized_img, should_display=True)
def deep_dream_video(config): video_path = utils.parse_input_file(config['input']) tmp_input_dir = os.path.join(OUT_VIDEOS_PATH, 'tmp_input') tmp_output_dir = os.path.join(OUT_VIDEOS_PATH, 'tmp_out') config['dump_dir'] = tmp_output_dir os.makedirs(tmp_input_dir, exist_ok=True) os.makedirs(tmp_output_dir, exist_ok=True) metadata = video_utils.extract_frames(video_path, tmp_input_dir) config['fps'] = metadata['fps'] utils.print_deep_dream_video_header(config) last_img = None for frame_id, frame_name in enumerate(sorted(os.listdir(tmp_input_dir))): # Step 1: load the video frame print(f'Processing frame {frame_id}') frame_path = os.path.join(tmp_input_dir, frame_name) frame = utils.load_image(frame_path, target_shape=config['img_width']) # Step 2: potentially blend it with the last frame if config['blend'] is not None and last_img is not None: # blend: 1.0 - use the current frame, 0.0 - use the last frame, everything in between will blend the two frame = utils.linear_blend(last_img, frame, config['blend']) # Step 3: Send the blended frame to some good old DeepDreaming dreamed_frame = deep_dream_static_image(config, frame) # Step 4: save the frame and keep the reference last_img = dreamed_frame dump_path = utils.save_and_maybe_display_image(config, dreamed_frame, name_modifier=frame_id) print(f'Saved DeepDream frame to: {os.path.relpath(dump_path)}\n') video_utils.create_video_from_intermediate_results(config) shutil.rmtree(tmp_input_dir) # remove tmp files print(f'Deleted tmp frame dump directory {tmp_input_dir}.')
def generate_new_images(model_name, cgan_digit=None, generation_mode=True, slerp=True, a=None, b=None, should_display=True): """ Generate imagery using pre-trained generator (using vanilla_generator_000000.pth by default) Args: model_name (str): model name you want to use (default lookup location is BINARIES_PATH). cgan_digit (int): if specified generate that exact digit. generation_mode (enum): generate a single image from a random vector, interpolate between the 2 chosen latent vectors, or perform arithmetic over latent vectors (note: not every mode is supported for every model type) slerp (bool): if True use spherical interpolation otherwise use linear interpolation. a, b (numpy arrays): latent vectors, if set to None you'll be prompted to choose images you like, and use corresponding latent vectors instead. should_display (bool): Display the generated images before saving them. """ model_path = os.path.join(BINARIES_PATH, model_name) assert os.path.exists( model_path ), f'Could not find the model {model_path}. You first need to train your generator.' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Prepare the correct (vanilla, cGAN, DCGAN, ...) model, load the weights and put the model into evaluation mode model_state = torch.load(model_path) gan_type = model_state["gan_type"] print(f'Found {gan_type} GAN!') _, generator = utils.get_gan(device, gan_type) generator.load_state_dict(model_state["state_dict"], strict=True) generator.eval() # Generate a single image, save it and potentially display it if generation_mode == GenerationMode.SINGLE_IMAGE: generated_imgs_path = os.path.join(DATA_DIR_PATH, 'generated_imagery') os.makedirs(generated_imgs_path, exist_ok=True) generated_img, _ = generate_from_random_latent_vector( generator, cgan_digit if gan_type == GANType.CGAN.name else None) utils.save_and_maybe_display_image(generated_imgs_path, generated_img, should_display=should_display) # Pick 2 images you like between which you'd like to interpolate (by typing 'y' into console) elif generation_mode == GenerationMode.INTERPOLATION: assert gan_type == GANType.VANILLA.name or gan_type == GANType.DCGAN.name, f'Got {gan_type} but only VANILLA/DCGAN are supported for the interpolation mode.' interpolation_name = "spherical" if slerp else "linear" interpolation_fn = spherical_interpolation if slerp else linear_interpolation grid_interpolated_imgs_path = os.path.join( DATA_DIR_PATH, 'interpolated_imagery') # combined results dir decomposed_interpolated_imgs_path = os.path.join( grid_interpolated_imgs_path, f'tmp_{gan_type}_{interpolation_name}_dump' ) # dump separate results if os.path.exists(decomposed_interpolated_imgs_path): shutil.rmtree(decomposed_interpolated_imgs_path) os.makedirs(grid_interpolated_imgs_path, exist_ok=True) os.makedirs(decomposed_interpolated_imgs_path, exist_ok=True) latent_vector_a, latent_vector_b = [None, None] # If a and b were not specified loop until the user picked the 2 images he/she likes. found_good_vectors_flag = False if a is None or b is None: while not found_good_vectors_flag: generated_img, latent_vector = generate_from_random_latent_vector( generator) plt.imshow(generated_img) plt.title('Do you like this image?') plt.show() user_input = input( "Do you like this generated image? [y for yes]:") if user_input == 'y': if latent_vector_a is None: latent_vector_a = latent_vector print('Saved the first latent vector.') elif latent_vector_b is None: latent_vector_b = latent_vector print('Saved the second latent vector.') found_good_vectors_flag = True else: print('Well lets generate a new one!') continue else: print( 'Skipping latent vectors selection section and using cached ones.' ) latent_vector_a, latent_vector_b = [a, b] # Cache latent vectors if a is None or b is None: np.save(os.path.join(grid_interpolated_imgs_path, 'a.npy'), latent_vector_a) np.save(os.path.join(grid_interpolated_imgs_path, 'b.npy'), latent_vector_b) print(f'Lets do some {interpolation_name} interpolation!') interpolation_resolution = 47 # number of images between the vectors a and b num_interpolated_imgs = interpolation_resolution + 2 # + 2 so that we include a and b generated_imgs = [] for i in range(num_interpolated_imgs): t = i / (num_interpolated_imgs - 1) # goes from 0. to 1. current_latent_vector = interpolation_fn(t, latent_vector_a, latent_vector_b) generated_img = generate_from_specified_numpy_latent_vector( generator, current_latent_vector) print(f'Generated image [{i+1}/{num_interpolated_imgs}].') utils.save_and_maybe_display_image( decomposed_interpolated_imgs_path, generated_img, should_display=should_display) # Move from channel last to channel first (CHW->HWC), PyTorch's save_image function expects BCHW format generated_imgs.append( torch.tensor(np.moveaxis(generated_img, 2, 0))) interpolated_block_img = torch.stack(generated_imgs) interpolated_block_img = nn.Upsample( scale_factor=2.5, mode='nearest')(interpolated_block_img) save_image( interpolated_block_img, os.path.join( grid_interpolated_imgs_path, utils.get_available_file_name(grid_interpolated_imgs_path)), nrow=int(np.sqrt(num_interpolated_imgs))) elif generation_mode == GenerationMode.VECTOR_ARITHMETIC: assert gan_type == GANType.DCGAN.name, f'Got {gan_type} but only DCGAN is supported for arithmetic mode.' # Generate num_options face images and create a grid image from them num_options = 100 generated_imgs = [] latent_vectors = [] padding = 2 for i in range(num_options): generated_img, latent_vector = generate_from_random_latent_vector( generator) generated_imgs.append( torch.tensor(np.moveaxis(generated_img, 2, 0))) # make_grid expects CHW format latent_vectors.append(latent_vector) stacked_tensor_imgs = torch.stack(generated_imgs) final_tensor_img = make_grid(stacked_tensor_imgs, nrow=int(np.sqrt(num_options)), padding=padding) display_img = np.moveaxis(final_tensor_img.numpy(), 0, 2) # For storing latent vectors num_of_vectors_per_category = 3 happy_woman_latent_vectors = [] neutral_woman_latent_vectors = [] neutral_man_latent_vectors = [] # Make it easy - by clicking on the plot you pick the image. def onclick(event): if event.dblclick: pass else: # single click if event.button == 1: # left click x_coord = event.xdata y_coord = event.ydata column = int(x_coord / (64 + padding)) row = int(y_coord / (64 + padding)) # Store latent vector corresponding to the image that the user clicked on. if len(happy_woman_latent_vectors ) < num_of_vectors_per_category: happy_woman_latent_vectors.append( latent_vectors[10 * row + column]) print( f'Picked image row={row}, column={column} as {len(happy_woman_latent_vectors)}. happy woman.' ) elif len(neutral_woman_latent_vectors ) < num_of_vectors_per_category: neutral_woman_latent_vectors.append( latent_vectors[10 * row + column]) print( f'Picked image row={row}, column={column} as {len(neutral_woman_latent_vectors)}. neutral woman.' ) elif len(neutral_man_latent_vectors ) < num_of_vectors_per_category: neutral_man_latent_vectors.append( latent_vectors[10 * row + column]) print( f'Picked image row={row}, column={column} as {len(neutral_man_latent_vectors)}. neutral man.' ) else: plt.close() plt.figure(figsize=(10, 10)) plt.imshow(display_img) # This is just an example you could also pick 3 neutral woman images with sunglasses, etc. plt.title( 'Click on 3 happy women, 3 neutral women and \n 3 neutral men images (order matters!)' ) cid = plt.gcf().canvas.mpl_connect('button_press_event', onclick) plt.show() plt.gcf().canvas.mpl_disconnect(cid) print('Done choosing images.') # Calculate the average latent vector for every category (happy woman, neutral woman, neutral man) happy_woman_avg_latent_vector = np.mean( np.array(happy_woman_latent_vectors), axis=0) neutral_woman_avg_latent_vector = np.mean( np.array(neutral_woman_latent_vectors), axis=0) neutral_man_avg_latent_vector = np.mean( np.array(neutral_man_latent_vectors), axis=0) # By subtracting neutral woman from the happy woman we capture the "vector of smiling". Adding that vector # to a neutral man we get a happy man's latent vector! Our latent space has amazingly beautiful structure! happy_man_latent_vector = neutral_man_avg_latent_vector + ( happy_woman_avg_latent_vector - neutral_woman_avg_latent_vector) # Generate images from these latent vectors happy_women_imgs = np.hstack([ generate_from_specified_numpy_latent_vector(generator, v) for v in happy_woman_latent_vectors ]) neutral_women_imgs = np.hstack([ generate_from_specified_numpy_latent_vector(generator, v) for v in neutral_woman_latent_vectors ]) neutral_men_imgs = np.hstack([ generate_from_specified_numpy_latent_vector(generator, v) for v in neutral_man_latent_vectors ]) happy_woman_avg_img = generate_from_specified_numpy_latent_vector( generator, happy_woman_avg_latent_vector) neutral_woman_avg_img = generate_from_specified_numpy_latent_vector( generator, neutral_woman_avg_latent_vector) neutral_man_avg_img = generate_from_specified_numpy_latent_vector( generator, neutral_man_avg_latent_vector) happy_man_img = generate_from_specified_numpy_latent_vector( generator, happy_man_latent_vector) display_vector_arithmetic_results([ happy_women_imgs, happy_woman_avg_img, neutral_women_imgs, neutral_woman_avg_img, neutral_men_imgs, neutral_man_avg_img, happy_man_img ]) else: raise Exception(f'Generation mode not yet supported.')
# You usually won't need to change these as often parser.add_argument("--should_display", action='store_true', help="Display intermediate dreaming results (default False)") parser.add_argument("--spatial_shift_size", type=int, help='Number of pixels to randomly shift image before grad ascent', default=32) parser.add_argument("--smoothing_coefficient", type=float, help='Directly controls standard deviation for gradient smoothing', default=0.5) parser.add_argument("--use_noise", action='store_true', help="Use noise as a starting point instead of input image (default False)") args = parser.parse_args() # Wrapping configuration into a dictionary config = dict() for arg in vars(args): config[arg] = getattr(args, arg) config['dump_dir'] = OUT_VIDEOS_PATH if config['create_ouroboros'] else OUT_IMAGES_PATH config['dump_dir'] = os.path.join(config['dump_dir'], f'{config["model_name"]}_{config["pretrained_weights"]}') config['input_name'] = os.path.basename(config['input']) # Create Ouroboros video (feeding neural network's output to it's input) if config['create_ouroboros']: deep_dream_video_ouroboros(config) # Create a blended DeepDream video elif any([config['input_name'].lower().endswith(video_ext) for video_ext in SUPPORTED_VIDEO_FORMATS]): # only support mp4 atm deep_dream_video(config) else: # Create a static DeepDream image print('Dreaming started!') img = deep_dream_static_image(config, img=None) # img=None -> will be loaded inside of deep_dream_static_image dump_path = utils.save_and_maybe_display_image(config, img) print(f'Saved DeepDream static image to: {os.path.relpath(dump_path)}\n')
default=False) args = parser.parse_args() # Wrapping configuration into a dictionary - keeping things clean config = dict() for arg in vars(args): config[arg] = getattr(args, arg) config['inputs_path'] = inputs_path config['out_images_path'] = out_images_path config['out_videos_path'] = out_videos_path config['dump_dir'] = config['out_videos_path'] if config[ 'is_video'] else config['out_images_path'] config['dump_dir'] = os.path.join( config['dump_dir'], f'{config["model"].name}_{config["pretrained_weights"].name}') # DeepDream algorithm in 3 flavours: static image, video and ouroboros (feeding net output to it's input) if any([ config['input'].endswith(video_ext) for video_ext in SUPPORTED_VIDEO_FORMATS ]): # only support mp4 atm deep_dream_video(config) elif config['is_video']: deep_dream_video_ouroboros(config) else: img = deep_dream_static_image( config, img=None ) # img=None -> will be loaded inside of deep_dream_static_image utils.save_and_maybe_display_image( config, img, should_display=config['should_display'])
def stylize_static_image(inference_config): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Prepare the model - load the weights and put the model into evaluation mode stylization_model = TransformerNet().to(device) training_state = torch.load( os.path.join(inference_config["model_binaries_path"], inference_config["model_name"])) state_dict = training_state["state_dict"] stylization_model.load_state_dict(state_dict, strict=True) stylization_model.eval() if inference_config['verbose']: utils.print_model_metadata(training_state) with torch.no_grad(): if os.path.isdir( inference_config['content_input'] ): # do a batch stylization (every image in the directory) img_dataset = utils.SimpleDataset( inference_config['content_input'], inference_config['img_width']) img_loader = DataLoader(img_dataset, batch_size=inference_config['batch_size']) try: processed_imgs_cnt = 0 for batch_id, img_batch in enumerate(img_loader): processed_imgs_cnt += len(img_batch) if inference_config['verbose']: print( f'Processing batch {batch_id + 1} ({processed_imgs_cnt}/{len(img_dataset)} processed images).' ) img_batch = img_batch.to(device) stylized_imgs = stylization_model(img_batch).to( 'cpu').numpy() for stylized_img in stylized_imgs: utils.save_and_maybe_display_image( inference_config, stylized_img, should_display=False) except Exception as e: print(e) print( f'Consider making the batch_size (current = {inference_config["batch_size"]} images) or img_width (current = {inference_config["img_width"]} px) smaller' ) exit(1) else: # do stylization for a single image content_img_path = os.path.join( inference_config['content_images_path'], inference_config['content_input']) content_image = utils.prepare_img(content_img_path, inference_config['img_width'], device) stylized_img = stylization_model(content_image).to( 'cpu').numpy()[0] utils.save_and_maybe_display_image( inference_config, stylized_img, should_display=inference_config['should_not_display'])