Example #1
0
    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']}

        for l in range(self.nb_layers):
            for c in ['i', 'f', 'c', 'o']:
                act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation
                self.conv_layers[c].append(
                    Convolution2D(self.R_stack_sizes[l],
                                  self.R_filt_sizes[l],
                                  padding='same',
                                  data_format="channels_last",
                                  activation=act))
            act = 'relu' if l == 0 else self.A_activation
            self.conv_layers['ahat'].append(
                Convolution2D(self.stack_sizes[l],
                              self.Ahat_filt_sizes[l],
                              padding='same',
                              data_format="channels_last",
                              activation=act))
            if l < self.nb_layers - 1:
                self.conv_layers['a'].append(
                    Convolution2D(self.stack_sizes[l + 1],
                                  self.A_filt_sizes[l],
                                  padding='same',
                                  data_format="channels_last",
                                  activation=self.A_activation))

        self.upsample = UpSampling2D(data_format="channels_last")  # upsampling
        self.pool = MaxPooling2D(data_format="channels_last")  # downsampling

        self._trainable_weights = []
        nb_row, nb_col = (input_shape[-3], input_shape[-2])
        # Super model
        for c in sorted(self.conv_layers.keys()):
            for l in range(len(self.conv_layers[c])):
                ds_factor = 2**l
                if c == 'ahat':
                    nb_channels = self.R_stack_sizes[l]
                elif c == 'a':
                    nb_channels = 2 * self.stack_sizes[l]
                else:  # i, c, o, f
                    nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[
                        l]
                    if l < self.nb_layers - 1:
                        nb_channels += self.R_stack_sizes[l + 1]
                in_shape = (input_shape[0], nb_row // ds_factor,
                            nb_col // ds_factor, nb_channels
                            )  # up -> downsampling
                self.conv_layers[c][l].build(in_shape)
                self._trainable_weights += self.conv_layers[c][
                    l].trainable_weights

        self.states = [None] * self.nb_layers * 3  # ['r', 'c', 'e']
        if self.extrap_start_time is not None:
            self.t_extrap = K.variable(np.array(self.extrap_start_time),
                                       'int32')
            self.states += [None] * 2
Example #2
0
    def get_initial_states(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(
            x)  # (batch_samples, timesteps) + image_shape
        non_channel_axis = -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state,
                                       axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state,
                                   axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
            # pass prediction in states so can use as actual for t+1 when extrapolating
            states_to_pass.append('ahat')
            nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:  # ['r', 'c', 'e'] is the state
            for l in range(
                    nlayers_to_pass[u]):  # initialize all the state with zero
                ds_factor = 2**l  # why downsampling?
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = nb_row * nb_col * stack_size  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis],
                                   output_size))  # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state,
                                      reducer)  # (samples, output_size)
                output_shp = [-1, nb_row, nb_col, stack_size]
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if self.extrap_start_time is not None:
            initial_states += [
                K.variable(0, 'int32')
            ]  # the last state will correspond to the current timestep
        return initial_states
Example #3
0
# We are going to use VGG16 pretrained on ImageNet. In order to match the VGG16
# paper Very Deep Convolutional Networks for Large-Scale Image Recognition by
# Simomyan and Zisserman 2015 we need to subtract the mean RGB values from all
# channels. Those values have been computed on the ImageNet dataset. We also need
# to flip the ordering of the channels to BGR.
content_array[:, :, :, 0] -= 103.939
content_array[:, :, :, 1] -= 116.779
content_array[:, :, :, 2] -= 123.68
style_array[:, :, :, 0] -= 103.939
style_array[:, :, :, 1] -= 116.779
style_array[:, :, :, 2] -= 123.68
content_array = content_array[:, :, :, ::-1]
style_array = style_array[:, :, :, ::-1]

# Create the backend variables. In our case tensorflow.
content_image = K.variable(content_array)
style_image = K.variable(style_array)
combination_image = K.placeholder((1, height, width, 3))

# Concatenate all tensors

input_tensor = K.concatenate([content_image,
                                    style_image,
                                    combination_image], axis=0)

# Load the VGG16 model from Keras. We are only interested in getting the features
# from the different layers hence we omit the dense layers at the top.
model = applications.VGG16(input_tensor=input_tensor, weights='imagenet',
              include_top=False)

# Store layers of the model. We'll need that to refer to the layers we want to
# In[ ]:

content_array[:, :, :, 0] -= 103.939
content_array[:, :, :, 1] -= 116.779
content_array[:, :, :, 2] -= 123.68
content_array = content_array[:, :, :, ::-1]

style_array[:, :, :, 0] -= 103.939
style_array[:, :, :, 1] -= 116.779
style_array[:, :, :, 2] -= 123.68
style_array = style_array[:, :, :, ::-1]

# In[ ]:

content_image = backend.variable(content_array)
style_image = backend.variable(style_array)
combination_image = backend.placeholder((1, height, width, 3))

# In[ ]:

input_tensor = backend.concatenate(
    [content_image, style_image, combination_image], axis=0)

# In[12]:

model = applications.VGG16(input_tensor=input_tensor,
                           weights='imagenet',
                           include_top=False)

# In[ ]:
def main(_):
    # disable all training specific operations
    K.set_learning_phase(0)

    model = applications.inception_v3.InceptionV3(weights='imagenet',
                                                  include_top=False)
    layer_contributions = {
        'mixed2': 0.2,
        'mixed3': 3.0,
        'mixed4': 2.0,
        'mixed5': 1.5
    }

    layer_dict = dict([(layer.name, layer) for layer in model.layers])

    loss = K.variable(0.,)
    for layer_name in layer_contributions:
        coeff = layer_contributions[layer_name]
        activation = layer_dict[layer_name].output

        scaling = K.prod(K.cast(K.shape(activation), 'float32'))
        # avoid artifacts by only involving non-boarder pixels
        loss += coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling

    # start the gradient-ascent process
    dream = model.input

    grads_list = K.gradients(loss, dream)
    grads = grads_list[0]

    # trick: normalize gradients
    grads /= K.maximum(K.mean(K.abs(grads)), 1e-7)

    fetch_loss_and_grads = K.function(inputs=[dream],
                                      outputs=[loss, grads])

    def gradient_ascent(x, iterations, step_rate, max_loss=None):
        for i in range(iterations):
            loss_value, grads_value = fetch_loss_and_grads([x])
            if max_loss is not None and loss_value > max_loss:
                break
            print('@{:4d}: {:.4f}'.format(i, loss_value))
            x += step_rate * grads_value
        return x

    img = preprocess_img(FLAGS.img_path)

    original_shape = img.shape[1:3]
    successive_shapes = [original_shape]
    for i in range(1, NUM_OCTAVES):
        shape = tuple([int(dim / (OCTAVES_SCLAE ** i))
                      for dim in original_shape])
        successive_shapes.append(shape)

    # reverse
    successive_shapes = successive_shapes[::-1]

    original_img = np.copy(img)
    shrunk_original_img = resize_img(img, successive_shapes[0])

    for shape in successive_shapes:
        print('Preprocess image with shape: {}'.format(shape))
        img = resize_img(img, shape)
        img = gradient_ascent(img,
                              iterations=FLAGS.iterations,
                              step_rate=FLAGS.step_rate,
                              max_loss=MAX_LOSS)

        same_size_original = resize_img(original_img, shape)

        if FLAGS.repair_lost_detail:
            upscale_shrunk_original_img = resize_img(shrunk_original_img, shape)
            lost_detail = same_size_original - upscale_shrunk_original_img
            img += lost_detail

        shrunk_original_img = same_size_original
        save_img(img, filename='dream_at_scale_{}.png'.format(str(shape)))

    save_img(img, filename='dream.png')
 def __init__(self, hp_lambda, **kwargs):
     super(GradientReversal, self).__init__(**kwargs)
     self._hp_lambda = hp_lambda
     self.hp_lambda = K.variable(hp_lambda)
     self.supports_masking = False
     self.op = ReverseGradient(self.hp_lambda)
Example #7
0
def main(_):
    width, height = preprocessing.image.load_img(FLAGS.target_img_path).size
    gen_img_height = 400
    gen_img_width = int(width * gen_img_height / height)

    target_x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width))
    target_img = K.constant(target_x)
    style_x = preprocess_img(FLAGS.style_img_path, target_size=(gen_img_height, gen_img_width))
    style_img = K.constant(style_x)
    combination_img = K.placeholder(shape=(1, gen_img_height, gen_img_width, 3))

    input_tensor = K.concatenate([
        target_img,
        style_img,
        combination_img
    ], axis=0)

    model = applications.vgg19.VGG19(input_tensor=input_tensor,
                                     weights='imagenet',
                                     include_top=False)
    model.summary()

    outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
    content_layer = 'block5_conv2'
    style_layers = [
        'block1_conv1',
        'block2_conv1',
        'block3_conv1',
        'block4_conv1',
        'block5_conv1'
    ]

    total_variation_weight = 1e-4
    style_weight = 1.0
    content_weight = 0.025

    loss = K.variable(0.)
    layer_features = outputs_dict[content_layer]
    target_img_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss += content_weight * content_loss(target_img_features, combination_features)

    for layer_name in style_layers:
        layer_features = outputs_dict[layer_name]
        style_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        sl = style_loss(style_features, combination_features,
                        target_size=(gen_img_height, gen_img_width))
        loss += (style_weight / len(style_layers)) * sl

    loss += total_variation_weight * total_variation_loss(combination_img, target_size=(gen_img_height, gen_img_width))

    # setup gradient-descent
    grads_list = K.gradients(loss, combination_img)
    grads = grads_list[0]

    fetch_loss_and_grads = K.function(inputs=[combination_img],
                                      outputs=[loss, grads])

    lossAndGradsCache = LossAndGradsCache(fetch_loss_and_grads,
                                          target_size=(gen_img_height, gen_img_width))

    x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width))
    x = x.flatten()

    for i in range(FLAGS.iterations):
        start_time = time.time()

        x, min_val, info = fmin_l_bfgs_b(lossAndGradsCache.loss,
                                         x,
                                         fprime=lossAndGradsCache.grads,
                                         maxfun=20)
        print('@{:4d}: {:.4f}'.format(i + 1, min_val))

        x_copy = x.copy().reshape((gen_img_height, gen_img_width, 3))
        print(np.min(x_copy), np.mean(x_copy), np.max(x_copy))
        img = deprocess_img(x_copy)
        os.makedirs('out', exist_ok=True)
        filename = 'out/result_{:04d}.png'.format(i + 1)
        imsave(filename, img)
        print('Iteration took {:.1f}s'.format(time.time() - start_time))