예제 #1
0
def main():

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--units',
                        metavar='units',
                        type=str,
                        help='an unit to visualize e.g. [0, 999]')
    parser.add_argument('--n_iters',
                        metavar='iter',
                        type=int,
                        default=10,
                        help='Number of sampling steps per each unit')
    parser.add_argument(
        '--threshold',
        metavar='w',
        type=float,
        default=-1.0,
        nargs='?',
        help='The probability threshold to decide whether to keep an image')
    parser.add_argument(
        '--save_every',
        metavar='save_iter',
        type=int,
        default=1,
        help='Save a sample every N iterations. 0 to disable saving')
    parser.add_argument('--reset_every',
                        metavar='reset_iter',
                        type=int,
                        default=0,
                        help='Reset the code every N iterations')
    parser.add_argument('--lr',
                        metavar='lr',
                        type=float,
                        default=2.0,
                        nargs='?',
                        help='Learning rate')
    parser.add_argument('--lr_end',
                        metavar='lr',
                        type=float,
                        default=-1.0,
                        nargs='?',
                        help='Ending Learning rate')
    parser.add_argument('--epsilon2',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for condition ')
    parser.add_argument('--epsilon1',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for prior')
    parser.add_argument('--epsilon3',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for noise')
    parser.add_argument('--mask_epsilon',
                        metavar='eps',
                        type=float,
                        default=1e-6,
                        nargs='?',
                        help='Scalar for mask loss')
    parser.add_argument('--edge_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for edge loss')
    parser.add_argument('--content_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for content loss')
    parser.add_argument('--style_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for style loss')
    parser.add_argument('--content_layer',
                        metavar='layer',
                        type=str,
                        default='conv4',
                        nargs='?',
                        help='Layer to use for content loss')
    parser.add_argument('--mask_type',
                        metavar='mask',
                        type=str,
                        default='',
                        nargs='?',
                        help='Mask type. Only square and random available')
    parser.add_argument('--ratio_sample',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Amount to sample for random mask')
    parser.add_argument('--seed',
                        metavar='n',
                        type=int,
                        default=0,
                        nargs='?',
                        help='Random seed')
    parser.add_argument('--xy',
                        metavar='n',
                        type=int,
                        default=0,
                        nargs='?',
                        help='Spatial position for conv units')
    parser.add_argument('--opt_layer',
                        metavar='s',
                        type=str,
                        help='Layer at which we optimize a code')
    parser.add_argument('--act_layer',
                        metavar='s',
                        type=str,
                        default="fc8",
                        help='Layer at which we activate a neuron')
    parser.add_argument('--init_file',
                        metavar='s',
                        type=str,
                        default="None",
                        help='Init image')
    parser.add_argument('--write_labels',
                        action='store_true',
                        default=False,
                        help='Write class labels to images')
    parser.add_argument('--output_dir',
                        metavar='b',
                        type=str,
                        default=".",
                        help='Output directory for saving results')
    parser.add_argument('--net_weights',
                        metavar='b',
                        type=str,
                        default=settings.encoder_weights,
                        help='Weights of the net being visualized')
    parser.add_argument('--net_definition',
                        metavar='b',
                        type=str,
                        default=settings.encoder_definition,
                        help='Definition of the net being visualized')

    args = parser.parse_args()

    # Default to constant learning rate
    if args.lr_end < 0:
        args.lr_end = args.lr

    # summary
    print "-------------"
    print " units: %s    xy: %s" % (args.units, args.xy)
    print " n_iters: %s" % args.n_iters
    print " reset_every: %s" % args.reset_every
    print " save_every: %s" % args.save_every
    print " threshold: %s" % args.threshold

    print " epsilon1: %s" % args.epsilon1
    print " epsilon2: %s" % args.epsilon2
    print " epsilon3: %s" % args.epsilon3
    print " mask_epsilon: %s" % args.mask_epsilon
    print " edge_epsilon: %s" % args.edge_epsilon
    print " content_epsilon: %s" % args.content_epsilon
    print " style_epsilon: %s" % args.style_epsilon
    print " mask_type: %s" % args.mask_type
    print " content_layer: %s" % args.content_layer

    print " start learning rate: %s" % args.lr
    print " end learning rate: %s" % args.lr_end
    print " seed: %s" % args.seed
    print " opt_layer: %s" % args.opt_layer
    print " act_layer: %s" % args.act_layer
    print " init_file: %s" % args.init_file
    print "-------------"
    print " output dir: %s" % args.output_dir
    print " net weights: %s" % args.net_weights
    print " net definition: %s" % args.net_definition
    print "-------------"

    # encoder and generator for images
    encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights,
                        caffe.TEST)
    generator = caffe.Net(settings.generator_definition,
                          settings.generator_weights, caffe.TEST)

    # condition network, here an image classification net
    net = caffe.Classifier(
        args.net_definition,
        args.net_weights,
        mean=np.float32([104.0, 117.0, 123.0]),  # ImageNet mean
        channel_swap=(
            2, 1,
            0))  # the reference model has channels in BGR order instead of RGB
    edge_detector = caffe.Net(settings.edge_definition, caffe.TEST)
    # make Sobel operator for edge detection
    laplace = np.array((0, -1, 0, -1, 4, -1, 0, -1, 0),
                       dtype=np.float32).reshape((3, 3))
    edge_detector.params['laplace'][0].data[0, 0, :, :] = laplace  # horizontal
    # Fix the seed
    np.random.seed(args.seed)

    # Separate the dash-separated list of units into numbers
    conditions = [{
        "unit": int(u),
        "xy": args.xy
    } for u in args.units.split("_")]

    # Optimize a code via gradient ascent
    sampler = ClassConditionalSampler()
    if args.init_file != "None":
        start_image = sampler.load_image(
            shape=encoder.blobs["data"].data.shape,
            path=args.init_file,
            output_dir=args.output_dir)
        mask = get_mask(start_image,
                        args.mask_type,
                        inverse=True,
                        args={'percent_pix': args.ratio_sample})
        start_code = sampler.get_code(encoder=encoder,
                                      data=start_image,
                                      layer=args.opt_layer,
                                      mask=mask)
        print "Loaded start code: ", start_code.shape
    else:
        raise ValueError('must pass in an init file')
        # shape of the code being optimized
        shape = generator.blobs[settings.generator_in_layer].data.shape
        start_code = np.random.normal(0, 1, shape)
        print ">>", np.min(start_code), np.max(start_code)

    output_image, list_samples = sampler.sampling(
        condition_net=net,
        image_encoder=encoder,
        image_net=net,
        image_generator=generator,
        edge_detector=edge_detector,
        gen_in_layer=settings.generator_in_layer,
        gen_out_layer=settings.generator_out_layer,
        start_code=start_code,
        n_iters=args.n_iters,
        lr=args.lr,
        lr_end=args.lr_end,
        threshold=args.threshold,
        layer=args.act_layer,
        conditions=conditions,
        epsilon1=args.epsilon1,
        epsilon2=args.epsilon2,
        epsilon3=args.epsilon3,
        mask_epsilon=args.mask_epsilon,
        content_epsilon=args.content_epsilon,
        edge_epsilon=args.edge_epsilon,
        content_layer=args.content_layer,
        output_dir=args.output_dir,
        mask=mask,
        input_image=start_image,
        reset_every=args.reset_every,
        save_every=args.save_every)

    # Output image
    filename = "%s/%s_%04d_%04d_%s_h_%s_%s_%s__%s.jpg" % (
        args.output_dir, args.act_layer, conditions[0]["unit"], args.n_iters,
        args.lr, str(args.epsilon1), str(args.epsilon2), str(
            args.epsilon3), args.seed)

    # Save the final image
    util.save_image(output_image, filename)
    print "%s/%s" % (os.getcwd(), filename)

    # Write labels to images
    print "Saving images..."
    for p in list_samples:
        img, name, label = p
        util.save_image(img, name)
        if args.write_labels:
            util.write_label_to_img(name, label)
예제 #2
0
def main():

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--units',
                        metavar='units',
                        type=str,
                        help='an unit to visualize e.g. [0, 999]')
    parser.add_argument('--n_iters',
                        metavar='iter',
                        type=int,
                        default=10,
                        help='Number of sampling steps per each unit')
    parser.add_argument(
        '--threshold',
        metavar='w',
        type=float,
        default=-1.0,
        nargs='?',
        help='The probability threshold to decide whether to keep an image')
    parser.add_argument(
        '--save_every',
        metavar='save_iter',
        type=int,
        default=1,
        help='Save a sample every N iterations. 0 to disable saving')
    parser.add_argument('--reset_every',
                        metavar='reset_iter',
                        type=int,
                        default=0,
                        help='Reset the code every N iterations')
    parser.add_argument('--lr',
                        metavar='lr',
                        type=float,
                        default=2.0,
                        nargs='?',
                        help='Learning rate')
    parser.add_argument('--lr_end',
                        metavar='lr',
                        type=float,
                        default=-1.0,
                        nargs='?',
                        help='Ending Learning rate')
    parser.add_argument('--epsilon2',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for condition ')
    parser.add_argument('--epsilon1',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for prior')
    parser.add_argument('--epsilon3',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for noise')
    parser.add_argument('--mask_epsilon',
                        metavar='eps',
                        type=float,
                        default=1e-6,
                        nargs='?',
                        help='Scalar for mask loss')
    parser.add_argument('--edge_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for edge loss')
    parser.add_argument('--content_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for content loss')
    parser.add_argument('--style_epsilon',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Scalar for style loss')
    parser.add_argument('--content_layer',
                        metavar='layer',
                        type=str,
                        default='conv4',
                        nargs='?',
                        help='Layer to use for content loss')
    parser.add_argument('--mask_type',
                        metavar='mask',
                        type=str,
                        default='',
                        nargs='?',
                        help='Mask type. Only square and random available')
    parser.add_argument('--ratio_sample',
                        metavar='eps',
                        type=float,
                        default=1.0,
                        nargs='?',
                        help='Amount to sample for random mask')
    parser.add_argument('--seed',
                        metavar='n',
                        type=int,
                        default=0,
                        nargs='?',
                        help='Random seed')
    parser.add_argument('--xy',
                        metavar='n',
                        type=int,
                        default=0,
                        nargs='?',
                        help='Spatial position for conv units')
    parser.add_argument('--opt_layer',
                        metavar='s',
                        type=str,
                        help='Layer at which we optimize a code')
    parser.add_argument('--act_layer',
                        metavar='s',
                        type=str,
                        default="fc8",
                        help='Layer at which we activate a neuron')
    parser.add_argument('--init_dir',
                        metavar='s',
                        type=str,
                        default="None",
                        help='Init image')
    parser.add_argument('--write_labels',
                        action='store_true',
                        default=False,
                        help='Write class labels to images')
    parser.add_argument('--use_square',
                        action='store_true',
                        default=False,
                        help='Whether or not to use the square')
    parser.add_argument('--output_dir',
                        metavar='b',
                        type=str,
                        default=".",
                        help='Output directory for saving results')
    parser.add_argument('--net_weights',
                        metavar='b',
                        type=str,
                        default=settings.encoder_weights,
                        help='Weights of the net being visualized')
    parser.add_argument('--net_definition',
                        metavar='b',
                        type=str,
                        default=settings.encoder_definition,
                        help='Definition of the net being visualized')

    args = parser.parse_args()

    # Default to constant learning rate
    if args.lr_end < 0:
        args.lr_end = args.lr

    # summary
    print("-------------")
    print(" units: %s    xy: %s" % (args.units, args.xy))
    print(" n_iters: %s" % args.n_iters)
    print(" reset_every: %s" % args.reset_every)
    print(" save_every: %s" % args.save_every)
    print(" threshold: %s" % args.threshold)

    print(" epsilon1: %s" % args.epsilon1)
    print(" epsilon2: %s" % args.epsilon2)
    print(" epsilon3: %s" % args.epsilon3)
    print(" mask_epsilon: %s" % args.mask_epsilon)
    print(" edge_epsilon: %s" % args.edge_epsilon)
    print(" content_epsilon: %s" % args.content_epsilon)
    print(" style_epsilon: %s" % args.style_epsilon)
    print(" mask_type: %s" % args.mask_type)
    print(" content_layer: %s" % args.content_layer)

    print(" start learning rate: %s" % args.lr)
    print(" end learning rate: %s" % args.lr_end)
    print(" seed: %s" % args.seed)
    print(" opt_layer: %s" % args.opt_layer)
    print(" act_layer: %s" % args.act_layer)
    print(" init_file: %s" % args.init_dir)
    print("-------------")
    print(" output dir: %s" % args.output_dir)
    print(" net weights: %s" % args.net_weights)
    print(" net definition: %s" % args.net_definition)
    print("-------------")

    # encoder and generator for images
    encoder = caffe.Net(settings.encoder_definition, settings.encoder_weights,
                        caffe.TEST)
    generator = caffe.Net(settings.generator_definition,
                          settings.generator_weights, caffe.TEST)

    # condition network, here an image classification net
    net = caffe.Classifier(
        args.net_definition,
        args.net_weights,
        mean=np.float32([104.0, 117.0, 123.0]),  # ImageNet mean
        channel_swap=(
            2, 1,
            0))  # the reference model has channels in BGR order instead of RGB
    edge_detector = caffe.Net(settings.edge_definition, caffe.TEST)
    # make Sobel operator for edge detection
    laplace = np.array((0, -1, 0, -1, 4, -1, 0, -1, 0),
                       dtype=np.float32).reshape((3, 3))
    edge_detector.params['laplace'][0].data[0, 0, :, :] = laplace  # horizontal
    # Fix the seed
    np.random.seed(args.seed)
    args = util.AttributeDict(vars(args))
    # Separate the dash-separated list of units into numbers
    conditions = [{
        "unit": int(u),
        "xy": args.xy
    } for u in args.units.split("_")]
    files_to_read = [
        os.path.join(args.init_dir, f) for f in os.listdir(args.init_dir)
        if 'lena' in f
    ]
    attributes = ['content_epsilon', 'style_epsilon', 'edge_epsilon']
    # attributes = ['edge_epsilon']#,'style_epsilon', 'edge_epsilon']
    masks = ['random', 'laplace', 'square_random', 'square_laplace']

    eps = [(0, 0, 0), (1e-4, 0, 0), (0, 1e-6, 0), (0, 0, 1e-2),
           (0, 1e-6, 1e-2)]
    output_dir = args.output_dir
    images_to_save = []
    for image_file in files_to_read:
        image_name = re.split('\.|/', image_file)[-2]
        print('image_name', image_name)
        image_path = os.path.join(args.output_dir, image_name)
        if not os.path.exists(image_path):
            os.makedirs(image_path)

        images_col = None
        for i, (edge, content, style) in enumerate(eps):

            sampler = ClassConditionalSampler()
            start_image = sampler.load_image(
                shape=encoder.blobs["data"].data.shape,
                path=image_file,
                output_dir=output_dir,
                save=False)

            if images_col is None:
                images_col = [start_image.copy()]

            print('running', image_file, i)
            mask = get_mask(start_image, args.mask_type, inverse=False)
            start_code = sampler.get_code(encoder=encoder,
                                          data=start_image,
                                          layer=args.opt_layer,
                                          mask=mask)
            output_image, list_samples = sampler.sampling(
                condition_net=net,
                image_encoder=encoder,
                image_net=net,
                image_generator=generator,
                edge_detector=edge_detector,
                gen_in_layer=settings.generator_in_layer,
                gen_out_layer=settings.generator_out_layer,
                start_code=start_code,
                n_iters=args.n_iters,
                lr=args.lr,
                lr_end=args.lr_end,
                threshold=args.threshold,
                layer=args.act_layer,
                conditions=conditions,
                epsilon1=args.epsilon1,
                epsilon2=args.epsilon2,
                epsilon3=args.epsilon3,
                mask_epsilon=args.mask_epsilon,
                content_epsilon=content,
                style_epsilon=style,
                edge_epsilon=edge,
                content_layer=args.content_layer,
                output_dir=output_dir,
                mask=mask,
                input_image=start_image,
                reset_every=args.reset_every,
                save_every=args.save_every)

            print('Saving {} for {}'.format(i, image_name))
            images_col.append(output_image)
            file_path = os.path.join(image_path, str(i) + '.jpg')
            util.save_image(output_image, file_path)
        images_to_save.append(images_col)

    filename = "%s/%s_%04d_%04d_%s_h_%s_%s_%s__%s.jpg" % (
        args.output_dir, 'loss_survey', conditions[0]["unit"], args.n_iters,
        args.lr, str(args.epsilon1), str(args.epsilon2), str(
            args.epsilon3), args.seed)
    util.save_checkerboard(images_to_save,
                           filename,
                           labels=[
                               'ground truth', 'no loss', 'edge loss',
                               'content loss', 'style loss', 'content + style'
                           ])
예제 #3
0
def filter_image(src_image, mask_id, borderType=cv2.BORDER_REPLICATE):
    return cv2.filter2D(src_image,
                        -1,
                        get_mask(mask_id),
                        borderType=borderType)