def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--units', metavar='units', type=str, help='an unit to visualize e.g. [0, 999]') parser.add_argument('--n_iters', metavar='iter', type=int, default=10, help='Number of sampling steps per each unit') parser.add_argument( '--threshold', metavar='w', type=float, default=-1.0, nargs='?', help='The probability threshold to decide whether to keep an image') parser.add_argument( '--save_every', metavar='save_iter', type=int, default=1, help='Save a sample every N iterations. 0 to disable saving') parser.add_argument('--reset_every', metavar='reset_iter', type=int, default=0, help='Reset the code every N iterations') parser.add_argument('--lr', metavar='lr', type=float, default=2.0, nargs='?', help='Learning rate') parser.add_argument('--lr_end', metavar='lr', type=float, default=-1.0, nargs='?', help='Ending Learning rate') parser.add_argument('--epsilon2', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for condition ') parser.add_argument('--epsilon1', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for prior') parser.add_argument('--epsilon3', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for noise') parser.add_argument('--mask_epsilon', metavar='eps', type=float, default=1e-6, nargs='?', help='Scalar for mask loss') parser.add_argument('--edge_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for edge loss') parser.add_argument('--content_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for content loss') parser.add_argument('--style_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for style loss') parser.add_argument('--content_layer', metavar='layer', type=str, default='conv4', nargs='?', help='Layer to use for content loss') parser.add_argument('--mask_type', metavar='mask', type=str, default='', nargs='?', help='Mask type. Only square and random available') parser.add_argument('--ratio_sample', metavar='eps', type=float, default=1.0, nargs='?', help='Amount to sample for random mask') parser.add_argument('--seed', metavar='n', type=int, default=0, nargs='?', help='Random seed') parser.add_argument('--xy', metavar='n', type=int, default=0, nargs='?', help='Spatial position for conv units') parser.add_argument('--opt_layer', metavar='s', type=str, help='Layer at which we optimize a code') parser.add_argument('--act_layer', metavar='s', type=str, default="fc8", help='Layer at which we activate a neuron') parser.add_argument('--init_file', metavar='s', type=str, default="None", help='Init image') parser.add_argument('--write_labels', action='store_true', default=False, help='Write class labels to images') parser.add_argument('--output_dir', metavar='b', type=str, default=".", help='Output directory for saving results') parser.add_argument('--net_weights', metavar='b', type=str, default=settings.encoder_weights, help='Weights of the net being visualized') parser.add_argument('--net_definition', metavar='b', type=str, default=settings.encoder_definition, help='Definition of the net being visualized') args = parser.parse_args() # Default to constant learning rate if args.lr_end < 0: args.lr_end = args.lr # summary print "-------------" print " units: %s xy: %s" % (args.units, args.xy) print " n_iters: %s" % args.n_iters print " reset_every: %s" % args.reset_every print " save_every: %s" % args.save_every print " threshold: %s" % args.threshold print " epsilon1: %s" % args.epsilon1 print " epsilon2: %s" % args.epsilon2 print " epsilon3: %s" % args.epsilon3 print " mask_epsilon: %s" % args.mask_epsilon print " edge_epsilon: %s" % args.edge_epsilon print " content_epsilon: %s" % args.content_epsilon print " style_epsilon: %s" % args.style_epsilon print " mask_type: %s" % args.mask_type print " content_layer: %s" % args.content_layer print " start learning rate: %s" % args.lr print " end learning rate: %s" % args.lr_end print " seed: %s" % args.seed print " opt_layer: %s" % args.opt_layer print " act_layer: %s" % args.act_layer print " init_file: %s" % args.init_file print "-------------" print " output dir: %s" % args.output_dir print " net weights: %s" % args.net_weights print " net definition: %s" % args.net_definition print "-------------" # encoder and generator for images encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights, caffe.TEST) generator = caffe.Net(settings.generator_definition, settings.generator_weights, caffe.TEST) # condition network, here an image classification net net = caffe.Classifier( args.net_definition, args.net_weights, mean=np.float32([104.0, 117.0, 123.0]), # ImageNet mean channel_swap=( 2, 1, 0)) # the reference model has channels in BGR order instead of RGB edge_detector = caffe.Net(settings.edge_definition, caffe.TEST) # make Sobel operator for edge detection laplace = np.array((0, -1, 0, -1, 4, -1, 0, -1, 0), dtype=np.float32).reshape((3, 3)) edge_detector.params['laplace'][0].data[0, 0, :, :] = laplace # horizontal # Fix the seed np.random.seed(args.seed) # Separate the dash-separated list of units into numbers conditions = [{ "unit": int(u), "xy": args.xy } for u in args.units.split("_")] # Optimize a code via gradient ascent sampler = ClassConditionalSampler() if args.init_file != "None": start_image = sampler.load_image( shape=encoder.blobs["data"].data.shape, path=args.init_file, output_dir=args.output_dir) mask = get_mask(start_image, args.mask_type, inverse=True, args={'percent_pix': args.ratio_sample}) start_code = sampler.get_code(encoder=encoder, data=start_image, layer=args.opt_layer, mask=mask) print "Loaded start code: ", start_code.shape else: raise ValueError('must pass in an init file') # shape of the code being optimized shape = generator.blobs[settings.generator_in_layer].data.shape start_code = np.random.normal(0, 1, shape) print ">>", np.min(start_code), np.max(start_code) output_image, list_samples = sampler.sampling( condition_net=net, image_encoder=encoder, image_net=net, image_generator=generator, edge_detector=edge_detector, gen_in_layer=settings.generator_in_layer, gen_out_layer=settings.generator_out_layer, start_code=start_code, n_iters=args.n_iters, lr=args.lr, lr_end=args.lr_end, threshold=args.threshold, layer=args.act_layer, conditions=conditions, epsilon1=args.epsilon1, epsilon2=args.epsilon2, epsilon3=args.epsilon3, mask_epsilon=args.mask_epsilon, content_epsilon=args.content_epsilon, edge_epsilon=args.edge_epsilon, content_layer=args.content_layer, output_dir=args.output_dir, mask=mask, input_image=start_image, reset_every=args.reset_every, save_every=args.save_every) # Output image filename = "%s/%s_%04d_%04d_%s_h_%s_%s_%s__%s.jpg" % ( args.output_dir, args.act_layer, conditions[0]["unit"], args.n_iters, args.lr, str(args.epsilon1), str(args.epsilon2), str( args.epsilon3), args.seed) # Save the final image util.save_image(output_image, filename) print "%s/%s" % (os.getcwd(), filename) # Write labels to images print "Saving images..." for p in list_samples: img, name, label = p util.save_image(img, name) if args.write_labels: util.write_label_to_img(name, label)
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--units', metavar='units', type=str, help='an unit to visualize e.g. [0, 999]') parser.add_argument('--n_iters', metavar='iter', type=int, default=10, help='Number of sampling steps per each unit') parser.add_argument( '--threshold', metavar='w', type=float, default=-1.0, nargs='?', help='The probability threshold to decide whether to keep an image') parser.add_argument( '--save_every', metavar='save_iter', type=int, default=1, help='Save a sample every N iterations. 0 to disable saving') parser.add_argument('--reset_every', metavar='reset_iter', type=int, default=0, help='Reset the code every N iterations') parser.add_argument('--lr', metavar='lr', type=float, default=2.0, nargs='?', help='Learning rate') parser.add_argument('--lr_end', metavar='lr', type=float, default=-1.0, nargs='?', help='Ending Learning rate') parser.add_argument('--epsilon2', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for condition ') parser.add_argument('--epsilon1', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for prior') parser.add_argument('--epsilon3', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for noise') parser.add_argument('--mask_epsilon', metavar='eps', type=float, default=1e-6, nargs='?', help='Scalar for mask loss') parser.add_argument('--edge_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for edge loss') parser.add_argument('--content_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for content loss') parser.add_argument('--style_epsilon', metavar='eps', type=float, default=1.0, nargs='?', help='Scalar for style loss') parser.add_argument('--content_layer', metavar='layer', type=str, default='conv4', nargs='?', help='Layer to use for content loss') parser.add_argument('--mask_type', metavar='mask', type=str, default='', nargs='?', help='Mask type. Only square and random available') parser.add_argument('--ratio_sample', metavar='eps', type=float, default=1.0, nargs='?', help='Amount to sample for random mask') parser.add_argument('--seed', metavar='n', type=int, default=0, nargs='?', help='Random seed') parser.add_argument('--xy', metavar='n', type=int, default=0, nargs='?', help='Spatial position for conv units') parser.add_argument('--opt_layer', metavar='s', type=str, help='Layer at which we optimize a code') parser.add_argument('--act_layer', metavar='s', type=str, default="fc8", help='Layer at which we activate a neuron') parser.add_argument('--init_dir', metavar='s', type=str, default="None", help='Init image') parser.add_argument('--write_labels', action='store_true', default=False, help='Write class labels to images') parser.add_argument('--use_square', action='store_true', default=False, help='Whether or not to use the square') parser.add_argument('--output_dir', metavar='b', type=str, default=".", help='Output directory for saving results') parser.add_argument('--net_weights', metavar='b', type=str, default=settings.encoder_weights, help='Weights of the net being visualized') parser.add_argument('--net_definition', metavar='b', type=str, default=settings.encoder_definition, help='Definition of the net being visualized') args = parser.parse_args() # Default to constant learning rate if args.lr_end < 0: args.lr_end = args.lr # summary print("-------------") print(" units: %s xy: %s" % (args.units, args.xy)) print(" n_iters: %s" % args.n_iters) print(" reset_every: %s" % args.reset_every) print(" save_every: %s" % args.save_every) print(" threshold: %s" % args.threshold) print(" epsilon1: %s" % args.epsilon1) print(" epsilon2: %s" % args.epsilon2) print(" epsilon3: %s" % args.epsilon3) print(" mask_epsilon: %s" % args.mask_epsilon) print(" edge_epsilon: %s" % args.edge_epsilon) print(" content_epsilon: %s" % args.content_epsilon) print(" style_epsilon: %s" % args.style_epsilon) print(" mask_type: %s" % args.mask_type) print(" content_layer: %s" % args.content_layer) print(" start learning rate: %s" % args.lr) print(" end learning rate: %s" % args.lr_end) print(" seed: %s" % args.seed) print(" opt_layer: %s" % args.opt_layer) print(" act_layer: %s" % args.act_layer) print(" init_file: %s" % args.init_dir) print("-------------") print(" output dir: %s" % args.output_dir) print(" net weights: %s" % args.net_weights) print(" net definition: %s" % args.net_definition) print("-------------") # encoder and generator for images encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights, caffe.TEST) generator = caffe.Net(settings.generator_definition, settings.generator_weights, caffe.TEST) # condition network, here an image classification net net = caffe.Classifier( args.net_definition, args.net_weights, mean=np.float32([104.0, 117.0, 123.0]), # ImageNet mean channel_swap=( 2, 1, 0)) # the reference model has channels in BGR order instead of RGB edge_detector = caffe.Net(settings.edge_definition, caffe.TEST) # make Sobel operator for edge detection laplace = np.array((0, -1, 0, -1, 4, -1, 0, -1, 0), dtype=np.float32).reshape((3, 3)) edge_detector.params['laplace'][0].data[0, 0, :, :] = laplace # horizontal # Fix the seed np.random.seed(args.seed) args = util.AttributeDict(vars(args)) # Separate the dash-separated list of units into numbers conditions = [{ "unit": int(u), "xy": args.xy } for u in args.units.split("_")] files_to_read = [ os.path.join(args.init_dir, f) for f in os.listdir(args.init_dir) if 'lena' in f ] attributes = ['content_epsilon', 'style_epsilon', 'edge_epsilon'] # attributes = ['edge_epsilon']#,'style_epsilon', 'edge_epsilon'] masks = ['random', 'laplace', 'square_random', 'square_laplace'] eps = [(0, 0, 0), (1e-4, 0, 0), (0, 1e-6, 0), (0, 0, 1e-2), (0, 1e-6, 1e-2)] output_dir = args.output_dir images_to_save = [] for image_file in files_to_read: image_name = re.split('\.|/', image_file)[-2] print('image_name', image_name) image_path = os.path.join(args.output_dir, image_name) if not os.path.exists(image_path): os.makedirs(image_path) images_col = None for i, (edge, content, style) in enumerate(eps): sampler = ClassConditionalSampler() start_image = sampler.load_image( shape=encoder.blobs["data"].data.shape, path=image_file, output_dir=output_dir, save=False) if images_col is None: images_col = [start_image.copy()] print('running', image_file, i) mask = get_mask(start_image, args.mask_type, inverse=False) start_code = sampler.get_code(encoder=encoder, data=start_image, layer=args.opt_layer, mask=mask) output_image, list_samples = sampler.sampling( condition_net=net, image_encoder=encoder, image_net=net, image_generator=generator, edge_detector=edge_detector, gen_in_layer=settings.generator_in_layer, gen_out_layer=settings.generator_out_layer, start_code=start_code, n_iters=args.n_iters, lr=args.lr, lr_end=args.lr_end, threshold=args.threshold, layer=args.act_layer, conditions=conditions, epsilon1=args.epsilon1, epsilon2=args.epsilon2, epsilon3=args.epsilon3, mask_epsilon=args.mask_epsilon, content_epsilon=content, style_epsilon=style, edge_epsilon=edge, content_layer=args.content_layer, output_dir=output_dir, mask=mask, input_image=start_image, reset_every=args.reset_every, save_every=args.save_every) print('Saving {} for {}'.format(i, image_name)) images_col.append(output_image) file_path = os.path.join(image_path, str(i) + '.jpg') util.save_image(output_image, file_path) images_to_save.append(images_col) filename = "%s/%s_%04d_%04d_%s_h_%s_%s_%s__%s.jpg" % ( args.output_dir, 'loss_survey', conditions[0]["unit"], args.n_iters, args.lr, str(args.epsilon1), str(args.epsilon2), str( args.epsilon3), args.seed) util.save_checkerboard(images_to_save, filename, labels=[ 'ground truth', 'no loss', 'edge loss', 'content loss', 'style loss', 'content + style' ])
def filter_image(src_image, mask_id, borderType=cv2.BORDER_REPLICATE): return cv2.filter2D(src_image, -1, get_mask(mask_id), borderType=borderType)