Beispiel #1
0
def parse_args():
    '''Parses command line arguments for the image analogy command.'''
    parser = argparse.ArgumentParser(
        description='Neural image analogies with Keras.')
    parser.add_argument('a_image_path',
                        metavar='ref',
                        type=str,
                        help='Path to the reference image mask (A)')
    parser.add_argument('ap_image_path',
                        metavar='base',
                        type=str,
                        help='Path to the source image (A\')')
    parser.add_argument('b_image_path',
                        metavar='ref',
                        type=str,
                        help='Path to the new mask for generation (B)')
    parser.add_argument('result_prefix',
                        metavar='res_prefix',
                        type=str,
                        help='Prefix for the saved results (B\')')
    # size-related
    parser.add_argument('--width',
                        dest='out_width',
                        type=int,
                        default=0,
                        help='Set output width')
    parser.add_argument('--height',
                        dest='out_height',
                        type=int,
                        default=0,
                        help='Set output height')
    parser.add_argument('--scales',
                        dest='num_scales',
                        type=int,
                        default=3,
                        help='Run at N different scales')
    parser.add_argument('--min-scale',
                        dest='min_scale',
                        type=float,
                        default=0.25,
                        help='Smallest scale to iterate')
    parser.add_argument('--a-scale-mode',
                        dest='a_scale_mode',
                        type=str,
                        default='none',
                        help='Method of scaling A and A\' relative to B')
    parser.add_argument('--a-scale',
                        dest='a_scale',
                        type=float,
                        default=1.0,
                        help='Additional scale factor for A and A\'')
    parser.add_argument(
        '--output-full',
        dest='output_full_size',
        action='store_true',
        help=
        'Output all intermediate images at full size regardless of current scale.'
    )
    # optimizer
    parser.add_argument('--iters',
                        dest='num_iterations_per_scale',
                        type=int,
                        default=5,
                        help='Number of iterations per scale')
    parser.add_argument('--model',
                        dest='match_model',
                        type=str,
                        default='patchmatch',
                        help='Matching algorithm (patchmatch or brute)')
    parser.add_argument(
        '--mrf-nnf-steps',
        dest='mrf_nnf_steps',
        type=int,
        default=5,
        help=
        'Number of patchmatch updates per iteration for local coherence loss.')
    parser.add_argument(
        '--randomize-mrf-nnf',
        dest='randomize_mnf_nnf',
        action='store_true',
        help=
        'Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.'
    )
    parser.add_argument(
        '--analogy-nnf-steps',
        dest='analogy_nnf_steps',
        type=int,
        default=15,
        help=
        'Number of patchmatch updates for the analogy loss (done once per scale).'
    )
    # loss
    parser.add_argument('--tv-w',
                        dest='tv_weight',
                        type=float,
                        default=1.0,
                        help='Weight for TV loss.')
    parser.add_argument('--analogy-w',
                        dest='analogy_weight',
                        type=float,
                        default=1.0,
                        help='Weight for analogy loss.')
    parser.add_argument(
        '--analogy-layers',
        dest='analogy_layers',
        action=CommaSplitAction,
        default=['conv3_1', 'conv4_1'],
        help=
        'Comma-separated list of layer names to be used for the analogy loss')
    parser.add_argument(
        '--use-full-analogy',
        dest='use_full_analogy',
        action="store_true",
        help=
        'Use the full set of analogy patches (slower/more memory but maybe more accurate)'
    )
    parser.add_argument('--mrf-w',
                        dest='mrf_weight',
                        type=float,
                        default=0.5,
                        help='Weight for MRF loss between A\' and B\'')
    parser.add_argument(
        '--mrf-layers',
        dest='mrf_layers',
        action=CommaSplitAction,
        default=['conv3_1', 'conv4_1'],
        help='Comma-separated list of layer names to be used for the MRF loss')
    parser.add_argument('--b-content-w',
                        dest='b_bp_content_weight',
                        type=float,
                        default=0.0,
                        help='Weight for content loss between B and B\'')
    parser.add_argument(
        '--content-layers',
        dest='b_content_layers',
        action=CommaSplitAction,
        default=['conv3_1', 'conv4_1'],
        help=
        'Comma-separated list of layer names to be used for the content loss')
    parser.add_argument(
        '--nstyle-w',
        dest='neural_style_weight',
        type=float,
        default=0.0,
        help='Weight for neural style loss between A\' and B\'')
    parser.add_argument(
        '--nstyle-layers',
        dest='neural_style_layers',
        action=CommaSplitAction,
        default=['conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'],
        help=
        'Comma-separated list of layer names to be used for the neural style')
    parser.add_argument('--patch-size',
                        dest='patch_size',
                        type=int,
                        default=1,
                        help='Patch size used for matching.')
    parser.add_argument(
        '--patch-stride',
        dest='patch_stride',
        type=int,
        default=1,
        help='Patch stride used for matching. Currently required to be 1.')
    # VGG
    parser.add_argument('--vgg-weights',
                        dest='vgg_weights',
                        type=str,
                        default=os.environ.get(VGG_ENV_VAR,
                                               'vgg16_weights.h5'),
                        help='Path to VGG16 weights.')
    parser.add_argument('--pool-mode',
                        dest='pool_mode',
                        type=str,
                        default='max',
                        help='Pooling mode for VGG ("avg" or "max")')
    # jitter
    parser.add_argument('--jitter',
                        dest='jitter',
                        type=float,
                        default=0,
                        help='Magnitude of random shift at scale x1')
    parser.add_argument('--color-jitter',
                        dest='color_jitter',
                        type=float,
                        default=0,
                        help='Magnitude of random jitter to each pixel')
    parser.add_argument(
        '--contrast',
        dest='contrast_percent',
        type=float,
        default=0.02,
        help=
        'Drop the bottom x percentile and scale by the top (100 - x)th percentile'
    )
    args = parser.parse_args()

    # hack for CPU users :(
    assert args.a_scale_mode in (
        'ratio', 'none', 'match'
    ), 'a-scale-mode must be set to one of "ratio", "none", or "match"'
    if K._BACKEND == 'theano':
        from keras.backend import theano_backend
        if args.a_scale_mode != 'match':
            if not theano_backend._on_gpu():
                print(
                    'Theano CPU mode detected. Forcing a-scale-mode to "match"'
                )
                args.a_scale_mode = 'match'  # prevent conv2d errors when using CPU
                args.a_scale = 1.0
            elif theano_backend.dnn and not theano_backend.dnn.dnn_available():
                print(
                    'Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"'
                )
                args.a_scale_mode = 'match'  # prevent conv2d errors when using CUDA without cuDNN
                args.a_scale = 1.0

    # make sure weights are in place
    if not os.path.exists(args.vgg_weights):
        print('Model weights not found (see "--vgg-weights" parameter).')
        return None
    return args
Beispiel #2
0
    'Use the full set of analogy patches (slower/more memory but maybe more accurate)'
)

args = parser.parse_args()
a_image_path = args.a_image_path
ap_image_path = args.ap_image_path
b_image_path = args.b_image_path
result_prefix = args.result_prefix
weights_path = args.vgg_weights
a_scale_mode = args.a_scale_mode
assert a_scale_mode in (
    'ratio', 'none',
    'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"'
# hack for CPU users :(
from keras.backend import theano_backend
if not theano_backend._on_gpu():
    a_scale_mode = 'match'  # prevent conv2d errors when using CPU
    args.a_scale = 1
    print('CPU mode detected. Forcing a-scale-mode to "match"')

# these are the weights of the different loss components
total_variation_weight = args.tv_weight
analogy_weight = args.analogy_weight
b_bp_content_weight = args.b_bp_content_weight
mrf_weight = args.mrf_weight
patch_size = args.patch_size
patch_stride = 1

analogy_layers = args.analogy_layers.split(',')
mrf_layers = args.mrf_layers.split(',')
b_content_layers = args.content_layers.split(',')
def deconv2d_fast(x,
                  kernel,
                  strides=(1, 1),
                  border_mode='valid',
                  dim_ordering='th',
                  image_shape=None,
                  filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, rows, cols)
        # TF input shape: (samples, rows, cols, input_depth)
        # TH kernel shape: (depth, input_depth, rows, cols)
        # TF kernel shape: (rows, cols, input_depth, depth)
        x = x.dimshuffle((0, 3, 1, 2))
        kernel = kernel.dimshuffle((3, 2, 0, 1))
        if image_shape:
            image_shape = (image_shape[0], image_shape[3], image_shape[1],
                           image_shape[2])
        if filter_shape:
            filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0],
                            filter_shape[1])

    if _on_gpu() and dnn.dnn_available():
        if border_mode == 'same':
            assert (strides == (1, 1))
            conv_out = dnn.dnn_conv(img=x, kerns=kernel, border_mode='full')
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x,
                                shift_y:x.shape[3] + shift_y]
        else:
            conv_out = dnn.dnn_conv(img=x,
                                    conv_mode='cross',
                                    kerns=kernel,
                                    border_mode=border_mode,
                                    subsample=strides)
    else:
        if border_mode == 'same':
            th_border_mode = 'full'
            assert (strides == (1, 1))
        elif border_mode == 'valid':
            th_border_mode = 'valid'
        else:
            raise Exception('Border mode not supported: ' + str(border_mode))

        conv_out = T.nnet.conv2d(
            x,
            kernel,
            border_mode=th_border_mode,
            subsample=strides,
            filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
            input_shape=image_shape,
            filter_shape=filter_shape)
        if border_mode == 'same':
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x,
                                shift_y:x.shape[3] + shift_y]
    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 1))
    return conv_out
Beispiel #4
0
                    default=3, help='Patch size used for matching.')
parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true",
                    help='Use the full set of analogy patches (slower/more memory but maybe more accurate)')


args = parser.parse_args()
a_image_path = args.a_image_path
ap_image_path = args.ap_image_path
b_image_path = args.b_image_path
result_prefix = args.result_prefix
weights_path = args.vgg_weights
a_scale_mode = args.a_scale_mode
assert a_scale_mode in ('ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"'
# hack for CPU users :(
from keras.backend import theano_backend
if not theano_backend._on_gpu():
    a_scale_mode = 'match'  # prevent conv2d errors when using CPU
    args.a_scale = 1
    print('CPU mode detected. Forcing a-scale-mode to "match"')

# these are the weights of the different loss components
total_variation_weight = args.tv_weight
analogy_weight = args.analogy_weight
b_bp_content_weight = args.b_bp_content_weight
mrf_weight = args.mrf_weight
patch_size = args.patch_size
patch_stride = 1

analogy_layers = args.analogy_layers.split(',')
mrf_layers = args.mrf_layers.split(',')
b_content_layers = args.content_layers.split(',')
def deconv2d(x, kernel, n_kern=32, strides=(1, 1), border_mode='valid', dim_ordering='th',
             image_shape=None, filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, rows, cols)
        # TF input shape: (samples, rows, cols, input_depth)
        # TH kernel shape: (depth, input_depth, rows, cols)
        # TF kernel shape: (rows, cols, input_depth, depth)
        x = x.dimshuffle((0, 3, 1, 2))
        kernel = kernel.dimshuffle((3, 2, 0, 1))
        if image_shape:
            image_shape = (image_shape[0], image_shape[3],
                           image_shape[1], image_shape[2])
        if filter_shape:
            filter_shape = (filter_shape[3], filter_shape[2],
                            filter_shape[0], filter_shape[1])

    if _on_gpu() and dnn.dnn_available():
        if border_mode == 'same':
            assert (strides == (1, 1))

            def conv_me(single_kernel, single_image):
                # scan iterates over leading dimension
                # single_kernel - Dx3x3 => 1xDx3x3
                # single_result - Nx28x28 => Nx1x28x28 [BC01]
                return dnn.dnn_conv(img=single_image.reshape(
                        (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])),
                        kerns=single_kernel.reshape((single_kernel.shape[0], 1, single_kernel.shape[1],
                                                     single_kernel.shape[2])),
                        conv_mode='cross',  # <<<<< IMPORTANT 111, dont flip kern
                        border_mode='full')

            results, updates = theano.scan(fn=conv_me,
                                           outputs_info=None,
                                           sequences=[kernel, x.dimshuffle((1, 0, 2, 3))])

            s = results.sum(axis=0)
            conv_out = T.reshape(s, (s.shape[0], 1, s.shape[1], s.shape[2]))

            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :,
                       shift_x:x.shape[2] + shift_x,
                       shift_y:x.shape[3] + shift_y]
        else:
            raise NotImplementedError()
            # conv_out = dnn.dnn_conv(img=x,
            #                         kerns=kernel,
            #                         border_mode=border_mode,
            #                         subsample=strides)
    else:
        if border_mode == 'same':
            th_border_mode = 'full'
            assert (strides == (1, 1))
        elif border_mode == 'valid':
            th_border_mode = 'valid'
        else:
            raise Exception('Border mode not supported: ' + str(border_mode))

        def conv_me_nnet(single_kernel, single_image):
            # scan iterates over leading dimension
            # single_kernel - Dx3x3 => 1xDx3x3
            # single_result - Nx28x28 => Nx1x28x28 [BC01]
            return T.nnet.conv2d(
                    single_image,
                    # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])),
                    single_kernel,
                    # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])),
                    input_shape=(image_shape[0], image_shape[2], image_shape[3]),
                    filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]),
                    filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
                    border_mode='full', )

        conv_out = T.nnet.conv2d(
                x,
                # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])),
                kernel,
                # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])),
                input_shape=(image_shape[0], image_shape[2], image_shape[3]),
                filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]),
                filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
                border_mode='full', )

        # if not True:
        #     # SLOW AS F**K
        #     results, updates = theano.scan(fn=conv_me_nnet,
        #                                    outputs_info=None,
        #                                    sequences=[kernel, x.dimshuffle((1, 0, 2, 3))])
        #
        #     conv_out = results.sum(axis=0)
        # else:
        #     conv_out = None
        #     for i in range(n_kern):
        #         if conv_out is None:
        #             conv_out = conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:])
        #         else:
        #             conv_out += conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:])

        if border_mode == 'same':
            # print(x)
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :,
                       shift_x:x.shape[2] + shift_x,
                       shift_y:x.shape[3] + shift_y]
            # raise NotImplementedError()

    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 1))
    return conv_out
def deconv2d_fast(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
                  image_shape=None, filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, rows, cols)
        # TF input shape: (samples, rows, cols, input_depth)
        # TH kernel shape: (depth, input_depth, rows, cols)
        # TF kernel shape: (rows, cols, input_depth, depth)
        x = x.dimshuffle((0, 3, 1, 2))
        kernel = kernel.dimshuffle((3, 2, 0, 1))
        if image_shape:
            image_shape = (image_shape[0], image_shape[3],
                           image_shape[1], image_shape[2])
        if filter_shape:
            filter_shape = (filter_shape[3], filter_shape[2],
                            filter_shape[0], filter_shape[1])

    if _on_gpu() and dnn.dnn_available():
        if border_mode == 'same':
            assert (strides == (1, 1))
            conv_out = dnn.dnn_conv(img=x,
                                    kerns=kernel,
                                    border_mode='full')
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :,
                       shift_x:x.shape[2] + shift_x,
                       shift_y:x.shape[3] + shift_y]
        else:
            conv_out = dnn.dnn_conv(img=x,
                                    conv_mode='cross',
                                    kerns=kernel,
                                    border_mode=border_mode,
                                    subsample=strides)
    else:
        if border_mode == 'same':
            th_border_mode = 'full'
            assert (strides == (1, 1))
        elif border_mode == 'valid':
            th_border_mode = 'valid'
        else:
            raise Exception('Border mode not supported: ' + str(border_mode))

        conv_out = T.nnet.conv2d(x, kernel,
                                      border_mode=th_border_mode,
                                      subsample=strides,
                                      filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
                                      input_shape=image_shape,
                                      filter_shape=filter_shape)
        if border_mode == 'same':
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :,
                       shift_x:x.shape[2] + shift_x,
                       shift_y:x.shape[3] + shift_y]
    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 1))
    return conv_out
Beispiel #7
0
def deconv2d(x,
             kernel,
             n_kern=32,
             strides=(1, 1),
             border_mode='valid',
             dim_ordering='th',
             image_shape=None,
             filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, rows, cols)
        # TF input shape: (samples, rows, cols, input_depth)
        # TH kernel shape: (depth, input_depth, rows, cols)
        # TF kernel shape: (rows, cols, input_depth, depth)
        x = x.dimshuffle((0, 3, 1, 2))
        kernel = kernel.dimshuffle((3, 2, 0, 1))
        if image_shape:
            image_shape = (image_shape[0], image_shape[3], image_shape[1],
                           image_shape[2])
        if filter_shape:
            filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0],
                            filter_shape[1])

    if _on_gpu() and dnn.dnn_available():
        if border_mode == 'same':
            assert (strides == (1, 1))

            def conv_me(single_kernel, single_image):
                # scan iterates over leading dimension
                # single_kernel - Dx3x3 => 1xDx3x3
                # single_result - Nx28x28 => Nx1x28x28 [BC01]
                return dnn.dnn_conv(
                    img=single_image.reshape(
                        (1, single_kernel.shape[0], single_kernel.shape[1],
                         single_kernel.shape[2])),
                    kerns=single_kernel.reshape(
                        (single_kernel.shape[0], 1, single_kernel.shape[1],
                         single_kernel.shape[2])),
                    conv_mode='cross',  # <<<<< IMPORTANT 111, dont flip kern
                    border_mode='full')

            results, updates = theano.scan(
                fn=conv_me,
                outputs_info=None,
                sequences=[kernel, x.dimshuffle((1, 0, 2, 3))])

            s = results.sum(axis=0)
            conv_out = T.reshape(s, (s.shape[0], 1, s.shape[1], s.shape[2]))

            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x,
                                shift_y:x.shape[3] + shift_y]
        else:
            raise NotImplementedError()
            # conv_out = dnn.dnn_conv(img=x,
            #                         kerns=kernel,
            #                         border_mode=border_mode,
            #                         subsample=strides)
    else:
        if border_mode == 'same':
            th_border_mode = 'full'
            assert (strides == (1, 1))
        elif border_mode == 'valid':
            th_border_mode = 'valid'
        else:
            raise Exception('Border mode not supported: ' + str(border_mode))

        def conv_me_nnet(single_kernel, single_image):
            # scan iterates over leading dimension
            # single_kernel - Dx3x3 => 1xDx3x3
            # single_result - Nx28x28 => Nx1x28x28 [BC01]
            return T.nnet.conv2d(
                single_image,
                # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])),
                single_kernel,
                # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])),
                input_shape=(image_shape[0], image_shape[2], image_shape[3]),
                filter_shape=(filter_shape[1], filter_shape[2],
                              filter_shape[3]),
                filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
                border_mode='full',
            )

        conv_out = T.nnet.conv2d(
            x,
            # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])),
            kernel,
            # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])),
            input_shape=(image_shape[0], image_shape[2], image_shape[3]),
            filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]),
            filter_flip=False,  # <<<<< IMPORTANT 111, dont flip kern
            border_mode='full',
        )

        # if not True:
        #     # SLOW AS F**K
        #     results, updates = theano.scan(fn=conv_me_nnet,
        #                                    outputs_info=None,
        #                                    sequences=[kernel, x.dimshuffle((1, 0, 2, 3))])
        #
        #     conv_out = results.sum(axis=0)
        # else:
        #     conv_out = None
        #     for i in range(n_kern):
        #         if conv_out is None:
        #             conv_out = conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:])
        #         else:
        #             conv_out += conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:])

        if border_mode == 'same':
            # print(x)
            shift_x = (kernel.shape[2] - 1) // 2
            shift_y = (kernel.shape[3] - 1) // 2
            conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x,
                                shift_y:x.shape[3] + shift_y]
            # raise NotImplementedError()

    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 1))
    return conv_out
def parse_args():
    '''Parses command line arguments for the image analogy command.'''
    parser = argparse.ArgumentParser(description='Neural image analogies with Keras.')
    parser.add_argument('a_image_path', metavar='ref', type=str,
                        help='Path to the reference image mask (A)')
    parser.add_argument('ap_image_path', metavar='base', type=str,
                        help='Path to the source image (A\')')
    parser.add_argument('b_image_path', metavar='ref', type=str,
                        help='Path to the new mask for generation (B)')
    parser.add_argument('result_prefix', metavar='res_prefix', type=str,
                        help='Prefix for the saved results (B\')')
    # size-related
    parser.add_argument('--width', dest='out_width', type=int,
                        default=0, help='Set output width')
    parser.add_argument('--height', dest='out_height', type=int,
                        default=0, help='Set output height')
    parser.add_argument('--scales', dest='num_scales', type=int,
                        default=3, help='Run at N different scales')
    parser.add_argument('--min-scale', dest='min_scale', type=float,
                        default=0.25, help='Smallest scale to iterate')
    parser.add_argument('--a-scale-mode', dest='a_scale_mode', type=str,
                        default='ratio', help='Method of scaling A and A\' relative to B')
    parser.add_argument('--a-scale', dest='a_scale', type=float,
                        default=1.0, help='Additional scale factor for A and A\'')
    parser.add_argument('--output-full', dest='output_full_size', action='store_true',
                        help='Output all intermediate images at full size regardless of current scale.')
    # optimizer
    parser.add_argument('--iters', dest='num_iterations_per_scale', type=int,
                        default=5, help='Number of iterations per scale')
    parser.add_argument('--model', dest='match_model', type=str,
                        default='patchmatch', help='Matching algorithm (patchmatch or brute)')
    parser.add_argument('--mrf-nnf-steps', dest='mrf_nnf_steps', type=int,
                        default=7, help='Number of patchmatch updates per iteration for local coherence loss.')
    parser.add_argument('--no-zoom-mrf-nnf', dest='reset_mnf_steps', action='store_true',
                        help='Randomize the local coherence similarity matrix at the start of a new scale instead of scaling up the last one.')
    parser.add_argument('--analogy-nnf-steps', dest='analogy_nnf_steps', type=int,
                        default=15, help='Number of patchmatch updates for the analogy loss (done once per scale).')
    # loss
    parser.add_argument('--mrf-w', dest='mrf_weight', type=float,
                        default=0.5, help='Weight for MRF loss between A\' and B\'')
    parser.add_argument('--b-content-w', dest='b_bp_content_weight', type=float,
                        default=0.0, help='Weight for content loss between B and B\'')
    parser.add_argument('--analogy-w', dest='analogy_weight', type=float,
                        default=1.0, help='Weight for analogy loss.')
    parser.add_argument('--tv-w', dest='tv_weight', type=float,
                        default=1.0, help='Weight for TV loss.')
    parser.add_argument('--analogy-layers', dest='analogy_layers', action=CommaSplitAction,
                        default=['conv3_1', 'conv4_1'],
                        help='Comma-separated list of layer names to be used for the analogy loss')
    parser.add_argument('--mrf-layers', dest='mrf_layers', action=CommaSplitAction,
                        default=['conv3_1', 'conv4_1'],
                        help='Comma-separated list of layer names to be used for the MRF loss')
    parser.add_argument('--content-layers', dest='b_content_layers', action=CommaSplitAction,
                        default=['conv3_1', 'conv4_1'],
                        help='Comma-separated list of layer names to be used for the content loss')
    parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true",
                        help='Use the full set of analogy patches (slower/more memory but maybe more accurate)')
    parser.add_argument('--patch-size', dest='patch_size', type=int,
                        default=1, help='Patch size used for matching.')
    parser.add_argument('--patch-stride', dest='patch_stride', type=int,
                        default=1, help='Patch stride used for matching. Currently required to be 1.')
    # VGG
    parser.add_argument('--vgg-weights', dest='vgg_weights', type=str,
                        default=os.environ.get(VGG_ENV_VAR, 'vgg16_weights.h5'), help='Path to VGG16 weights.')
    parser.add_argument('--pool-mode', dest='pool_mode', type=str,
                        default='max', help='Pooling mode for VGG ("avg" or "max")')
    # jitter
    parser.add_argument('--jitter', dest='jitter', type=float,
                        default=0, help='Magnitude of random shift at scale x1')
    parser.add_argument('--color-jitter', dest='color_jitter', type=float,
                        default=0, help='Magnitude of random jitter to each pixel')
    parser.add_argument('--contrast', dest='contrast_percent', type=float,
                        default=0.02, help='Drop the bottom x percentile and scale by the top (100 - x)th percentile')
    args = parser.parse_args()

    # hack for CPU users :(
    assert args.a_scale_mode in ('ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"'
    if K._BACKEND == 'theano':
        from keras.backend import theano_backend
        if args.a_scale_mode != 'match':
            if not theano_backend._on_gpu():
                print('Theano CPU mode detected. Forcing a-scale-mode to "match"')
                args.a_scale_mode = 'match'  # prevent conv2d errors when using CPU
                args.a_scale = 1.0
            elif theano_backend.dnn and not theano_backend.dnn.dnn_available():
                print('Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"')
                args.a_scale_mode = 'match'  # prevent conv2d errors when using CUDA without cuDNN
                args.a_scale = 1.0

    # make sure weights are in place
    if not os.path.exists(args.vgg_weights):
        print('Model weights not found (see "--vgg-weights" parameter).')
        return None
    return args
Beispiel #9
0
def parse_args():
    """Parses command line arguments for the image analogy command."""
    parser = argparse.ArgumentParser(description="Neural image analogies with Keras.")
    parser.add_argument("a_image_path", metavar="ref", type=str, help="Path to the reference image mask (A)")
    parser.add_argument("ap_image_path", metavar="base", type=str, help="Path to the source image (A')")
    parser.add_argument("b_image_path", metavar="ref", type=str, help="Path to the new mask for generation (B)")
    parser.add_argument("result_prefix", metavar="res_prefix", type=str, help="Prefix for the saved results (B')")
    # size-related
    parser.add_argument("--width", dest="out_width", type=int, default=0, help="Set output width")
    parser.add_argument("--height", dest="out_height", type=int, default=0, help="Set output height")
    parser.add_argument("--scales", dest="num_scales", type=int, default=3, help="Run at N different scales")
    parser.add_argument("--min-scale", dest="min_scale", type=float, default=0.25, help="Smallest scale to iterate")
    parser.add_argument(
        "--a-scale-mode", dest="a_scale_mode", type=str, default="none", help="Method of scaling A and A' relative to B"
    )
    parser.add_argument(
        "--a-scale", dest="a_scale", type=float, default=1.0, help="Additional scale factor for A and A'"
    )
    parser.add_argument(
        "--output-full",
        dest="output_full_size",
        action="store_true",
        help="Output all intermediate images at full size regardless of current scale.",
    )
    # optimizer
    parser.add_argument(
        "--iters", dest="num_iterations_per_scale", type=int, default=5, help="Number of iterations per scale"
    )
    parser.add_argument(
        "--model", dest="match_model", type=str, default="patchmatch", help="Matching algorithm (patchmatch or brute)"
    )
    parser.add_argument(
        "--mrf-nnf-steps",
        dest="mrf_nnf_steps",
        type=int,
        default=5,
        help="Number of patchmatch updates per iteration for local coherence loss.",
    )
    parser.add_argument(
        "--randomize-mrf-nnf",
        dest="randomize_mnf_nnf",
        action="store_true",
        help="Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.",
    )
    parser.add_argument(
        "--analogy-nnf-steps",
        dest="analogy_nnf_steps",
        type=int,
        default=15,
        help="Number of patchmatch updates for the analogy loss (done once per scale).",
    )
    # loss
    parser.add_argument("--tv-w", dest="tv_weight", type=float, default=1.0, help="Weight for TV loss.")
    parser.add_argument("--analogy-w", dest="analogy_weight", type=float, default=1.0, help="Weight for analogy loss.")
    parser.add_argument(
        "--analogy-layers",
        dest="analogy_layers",
        action=CommaSplitAction,
        default=["conv3_1", "conv4_1"],
        help="Comma-separated list of layer names to be used for the analogy loss",
    )
    parser.add_argument(
        "--use-full-analogy",
        dest="use_full_analogy",
        action="store_true",
        help="Use the full set of analogy patches (slower/more memory but maybe more accurate)",
    )
    parser.add_argument(
        "--mrf-w", dest="mrf_weight", type=float, default=0.5, help="Weight for MRF loss between A' and B'"
    )
    parser.add_argument(
        "--mrf-layers",
        dest="mrf_layers",
        action=CommaSplitAction,
        default=["conv3_1", "conv4_1"],
        help="Comma-separated list of layer names to be used for the MRF loss",
    )
    parser.add_argument(
        "--b-content-w",
        dest="b_bp_content_weight",
        type=float,
        default=0.0,
        help="Weight for content loss between B and B'",
    )
    parser.add_argument(
        "--content-layers",
        dest="b_content_layers",
        action=CommaSplitAction,
        default=["conv3_1", "conv4_1"],
        help="Comma-separated list of layer names to be used for the content loss",
    )
    parser.add_argument(
        "--nstyle-w",
        dest="neural_style_weight",
        type=float,
        default=0.0,
        help="Weight for neural style loss between A' and B'",
    )
    parser.add_argument(
        "--nstyle-layers",
        dest="neural_style_layers",
        action=CommaSplitAction,
        default=["conv2_1", "conv3_1", "conv4_1", "conv5_1"],
        help="Comma-separated list of layer names to be used for the neural style",
    )
    parser.add_argument("--patch-size", dest="patch_size", type=int, default=1, help="Patch size used for matching.")
    parser.add_argument(
        "--patch-stride",
        dest="patch_stride",
        type=int,
        default=1,
        help="Patch stride used for matching. Currently required to be 1.",
    )
    # VGG
    parser.add_argument(
        "--vgg-weights",
        dest="vgg_weights",
        type=str,
        default=os.environ.get(VGG_ENV_VAR, "vgg16_weights.h5"),
        help="Path to VGG16 weights.",
    )
    parser.add_argument(
        "--pool-mode", dest="pool_mode", type=str, default="max", help='Pooling mode for VGG ("avg" or "max")'
    )
    # jitter
    parser.add_argument("--jitter", dest="jitter", type=float, default=0, help="Magnitude of random shift at scale x1")
    parser.add_argument(
        "--color-jitter", dest="color_jitter", type=float, default=0, help="Magnitude of random jitter to each pixel"
    )
    parser.add_argument(
        "--contrast",
        dest="contrast_percent",
        type=float,
        default=0.02,
        help="Drop the bottom x percentile and scale by the top (100 - x)th percentile",
    )
    args = parser.parse_args()

    # hack for CPU users :(
    assert args.a_scale_mode in (
        "ratio",
        "none",
        "match",
    ), 'a-scale-mode must be set to one of "ratio", "none", or "match"'
    if K._BACKEND == "theano":
        from keras.backend import theano_backend

        if args.a_scale_mode != "match":
            if not theano_backend._on_gpu():
                print('Theano CPU mode detected. Forcing a-scale-mode to "match"')
                args.a_scale_mode = "match"  # prevent conv2d errors when using CPU
                args.a_scale = 1.0
            elif theano_backend.dnn and not theano_backend.dnn.dnn_available():
                print('Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"')
                args.a_scale_mode = "match"  # prevent conv2d errors when using CUDA without cuDNN
                args.a_scale = 1.0

    # make sure weights are in place
    if not os.path.exists(args.vgg_weights):
        print('Model weights not found (see "--vgg-weights" parameter).')
        return None
    return args