def compute_output_shape(self, input_shape): if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
def gram_matrix(x): assert K.ndim(x) == 3 if K.image_dim_ordering() == "th": features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram
def eval_loss_and_grads(x): if K.image_dim_ordering() == 'th': x = x.reshape((1, 3, img_nrows, img_ncols)) else: x = x.reshape((1, img_nrows, img_ncols, 3)) outs = f_outputs([x]) loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values
def total_variation_loss(x): assert 4 == K.ndim(x) if K.image_dim_ordering() == 'th': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def deprocess_image(x): if K.image_dim_ordering() == 'th': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # BGR to RGB x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x
def load_mask_labels(): '''Load both target and style masks. A mask image (nr x nc) with m labels/colors will be loaded as a 4D boolean tensor: (1, m, nr, nc) for 'th' or (1, nr, nc, m) for 'tf' ''' target_mask_img = load_img(target_mask_path, target_size=(img_nrows, img_ncols)) target_mask_img = img_to_array(target_mask_img) style_mask_img = load_img(style_mask_path, target_size=(img_nrows, img_ncols)) style_mask_img = img_to_array(style_mask_img) if K.image_dim_ordering() == 'th': mask_vecs = np.vstack([ style_mask_img.reshape((3, -1)).T, target_mask_img.reshape((3, -1)).T ]) else: mask_vecs = np.vstack([ style_mask_img.reshape((-1, 3)), target_mask_img.reshape((-1, 3)) ]) labels = kmeans(mask_vecs, nb_labels) style_mask_label = labels[:img_nrows * img_ncols].reshape( (img_nrows, img_ncols)) target_mask_label = labels[img_nrows * img_ncols:].reshape( (img_nrows, img_ncols)) stack_axis = 0 if K.image_dim_ordering() == 'th' else -1 style_mask = np.stack([style_mask_label == r for r in range(nb_labels)], axis=stack_axis) target_mask = np.stack([target_mask_label == r for r in range(nb_labels)], axis=stack_axis) return (np.expand_dims(style_mask, axis=0), np.expand_dims(target_mask, axis=0))
def style_loss(style_image, target_image, style_masks, target_masks): '''Calculate style loss between style_image and target_image, in all regions. ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 3 == K.ndim(style_masks) == K.ndim(target_masks) loss = K.variable(0) for i in range(nb_labels): if K.image_dim_ordering() == 'th': style_mask = style_masks[i, :, :] target_mask = target_masks[i, :, :] else: style_mask = style_masks[:, :, i] target_mask = target_masks[:, :, i] loss += region_style_weight * region_style_loss( style_image, target_image, style_mask, target_mask) return loss
def build(input_shape, num_outputs, block_fn, repetitions): """Builds a custom ResNet like architecture. Args: input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) num_outputs: The number of outputs at final softmax layer block_fn: The block function to use. This is either `basic_block` or `bottleneck`. The original paper used basic_block for layers < 50 repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved Returns: The keras `Model`. """ _handle_dim_ordering() if len(input_shape) != 3: raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") # Permute dimension order if necessary if K.image_dim_ordering() == 'tf': input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Load function from str if needed. block_fn = _get_block(block_fn) input = Input(shape=input_shape) conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1) block = pool1 filters = 64 for i, r in enumerate(repetitions): block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block) filters *= 2 # Last activation block = _bn_relu(block) # Classifier block block_shape = K.int_shape(block) pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), strides=(1, 1))(block) flatten1 = Flatten()(pool2) dense = Dense(units=num_outputs, kernel_initializer="he_normal", activation="softmax")(flatten1) model = Model(inputs=input, outputs=dense) return model
def region_style_loss(style_image, target_image, style_mask, target_mask): '''Calculate style loss between style_image and target_image, for one common region specified by their (boolean) masks ''' assert 3 == K.ndim(style_image) == K.ndim(target_image) assert 2 == K.ndim(style_mask) == K.ndim(target_mask) if K.image_dim_ordering() == 'th': masked_style = style_image * style_mask masked_target = target_image * target_mask nb_channels = K.shape(style_image)[0] else: masked_style = K.permute_dimensions(style_image, (2, 0, 1)) * style_mask masked_target = K.permute_dimensions(target_image, (2, 0, 1)) * target_mask nb_channels = K.shape(style_image)[-1] s = gram_matrix(masked_style) / K.mean(style_mask) / nb_channels c = gram_matrix(masked_target) / K.mean(target_mask) / nb_channels return K.mean(K.square(s - c))
(img_nrows, img_ncols)) target_mask_label = labels[img_nrows * img_ncols:].reshape( (img_nrows, img_ncols)) stack_axis = 0 if K.image_dim_ordering() == 'th' else -1 style_mask = np.stack([style_mask_label == r for r in range(nb_labels)], axis=stack_axis) target_mask = np.stack([target_mask_label == r for r in range(nb_labels)], axis=stack_axis) return (np.expand_dims(style_mask, axis=0), np.expand_dims(target_mask, axis=0)) # Create tensor variables for images if K.image_dim_ordering() == 'th': shape = (1, nb_colors, img_nrows, img_ncols) else: shape = (1, img_nrows, img_ncols, nb_colors) style_image = K.variable(preprocess_image(style_img_path)) target_image = K.placeholder(shape=shape) if use_content_img: content_image = K.variable(preprocess_image(content_img_path)) else: content_image = K.zeros(shape=shape) images = K.concatenate([style_image, target_image, content_image], axis=0) # Create tensor variables for masks raw_style_mask, raw_target_mask = load_mask_labels()
def to_plot(img): if K.image_dim_ordering() == 'tf': return np.rollaxis(img, 0, 1).astype(np.uint8) else: return np.rollaxis(img, 0, 3).astype(np.uint8)