def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def _get_135_CostVolume_(inputs): shape = K.shape(inputs[0]) disparity_costs = [] for d in range(-4, 5): if d == 0: tmp_list = [] for i in range(len(inputs)): tmp_list.append(inputs[i]) else: tmp_list = [] for i in range(len(inputs)): (v, u) = divmod(i, 9) v = v + i u = 8 - u tensor = tf.contrib.image.translate(inputs[i], [d * (u - 4), d * (v - 4)], 'BILINEAR') tmp_list.append(tensor) cost = K.concatenate(tmp_list, axis=3) disparity_costs.append(cost) cost_volume = K.stack(disparity_costs, axis=1) cost_volume = K.reshape(cost_volume, (shape[0], 9, shape[1], shape[2], 4 * 9)) return cost_volume
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def channel_attention(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(15, 1, 1, 'same')(x) # [B, 1, 1, 1, 15] x = Activation('sigmoid')(x) # 15 -> 25 # 0 1 2 3 4 # 5 6 7 8 # 9 10 11 # 12 13 # 14 # # 0 1 2 3 4 # 1 5 6 7 8 # 2 6 9 10 11 # 3 7 10 12 13 # 4 8 11 13 14 x = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9], y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12], y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11], y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9], y[:, :, :, :, 11:12], y[:, :, :, :, 13:15] ], axis=-1))(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
content_array[:, :, :, 2] -= 123.68 style_array[:, :, :, 0] -= 103.939 style_array[:, :, :, 1] -= 116.779 style_array[:, :, :, 2] -= 123.68 content_array = content_array[:, :, :, ::-1] style_array = style_array[:, :, :, ::-1] # Create the backend variables. In our case tensorflow. content_image = K.variable(content_array) style_image = K.variable(style_array) combination_image = K.placeholder((1, height, width, 3)) # Concatenate all tensors input_tensor = K.concatenate([content_image, style_image, combination_image], axis=0) # Load the VGG16 model from Keras. We are only interested in getting the features # from the different layers hence we omit the dense layers at the top. model = applications.VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) # Store layers of the model. We'll need that to refer to the layers we want to # use for the transfer. layers = dict([(layer.name, layer.output) for layer in model.layers]) #pprint(layers) # Define the total loss. We'll add to this in stages loss = K.variable(0.)
def step(self, a, states): """ :param a: ground-truth :param states: type: list index[:-2]: r, c, e (#: self.nb_layers) index[-2:] (if self.extrap_start_time is not None:): [frame_prediction, t+1] :return: """ r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2 * self.nb_layers] e_tm1 = states[2 * self.nb_layers:3 * self.nb_layers] if self.extrap_start_time is not None: t = states[-1] # if past self.extrap_start_time, the previous prediction will be treated as the actual a = K.switch(t >= self.t_extrap, states[-2], a) c = [] r = [] e = [] for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) # print l, inputs.shape i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) # upsampling for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat ### threshold where = K.greater_equal(frame_prediction, K.constant(self.threshold)) frame_prediction = tf.where( where, 0.5 * tf.ones_like(frame_prediction, dtype=tf.float32), tf.zeros_like(frame_prediction, dtype=tf.float32)) ### # compute errors e_up = ahat - a e_down = a - ahat # ROI loss if l == 0 and self.use_roi_loss: e_up = tf.add(e_up, tf.multiply(e_up, a, name='multiply_up_err'), name='add_up_err') e_down = tf.add(e_down, tf.multiply(e_down, a, name='multiply_down_err'), name='add_down_err') # e_up = self.error_activation(e_up) e_down = self.error_activation(e_down) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if self.output_layer_num == l: if self.output_layer_type == 'A': output = a elif self.output_layer_type == 'Ahat': output = ahat elif self.output_layer_type == 'R': output = r[l] elif self.output_layer_type == 'E': output = e[l] if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer (downsampling) if self.output_layer_type is None: if self.output_mode == 'prediction': output = frame_prediction else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) # TODO: where is all_error ? all_error = layer_error if l == 0 else K.concatenate( (all_error, layer_error), axis=-1) # print l, e[l].shape, layer_error.shape, all_error.shape if self.output_mode == 'error': output = all_error else: output = K.concatenate( (K.batch_flatten(frame_prediction), all_error), axis=-1) # print output.shape states = r + c + e if self.extrap_start_time is not None: ### ''' sess = tf.get_default_session() comparison = tf.greater_equal(frame_prediction, tf.constant(0.3)) sess.run(comparison) conditional_op = tf.assign(frame_prediction, tf.where(comparison, 0.5 * tf.ones_like(frame_prediction), tf.zeros_like(frame_prediction))) sess.run(conditional_op) ''' ### states += [frame_prediction, t + 1] return output, states
def main(_): width, height = preprocessing.image.load_img(FLAGS.target_img_path).size gen_img_height = 400 gen_img_width = int(width * gen_img_height / height) target_x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) target_img = K.constant(target_x) style_x = preprocess_img(FLAGS.style_img_path, target_size=(gen_img_height, gen_img_width)) style_img = K.constant(style_x) combination_img = K.placeholder(shape=(1, gen_img_height, gen_img_width, 3)) input_tensor = K.concatenate([ target_img, style_img, combination_img ], axis=0) model = applications.vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) model.summary() outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) content_layer = 'block5_conv2' style_layers = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] total_variation_weight = 1e-4 style_weight = 1.0 content_weight = 0.025 loss = K.variable(0.) layer_features = outputs_dict[content_layer] target_img_features = layer_features[0, :, :, :] combination_features = layer_features[2, :, :, :] loss += content_weight * content_loss(target_img_features, combination_features) for layer_name in style_layers: layer_features = outputs_dict[layer_name] style_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] sl = style_loss(style_features, combination_features, target_size=(gen_img_height, gen_img_width)) loss += (style_weight / len(style_layers)) * sl loss += total_variation_weight * total_variation_loss(combination_img, target_size=(gen_img_height, gen_img_width)) # setup gradient-descent grads_list = K.gradients(loss, combination_img) grads = grads_list[0] fetch_loss_and_grads = K.function(inputs=[combination_img], outputs=[loss, grads]) lossAndGradsCache = LossAndGradsCache(fetch_loss_and_grads, target_size=(gen_img_height, gen_img_width)) x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width)) x = x.flatten() for i in range(FLAGS.iterations): start_time = time.time() x, min_val, info = fmin_l_bfgs_b(lossAndGradsCache.loss, x, fprime=lossAndGradsCache.grads, maxfun=20) print('@{:4d}: {:.4f}'.format(i + 1, min_val)) x_copy = x.copy().reshape((gen_img_height, gen_img_width, 3)) print(np.min(x_copy), np.mean(x_copy), np.max(x_copy)) img = deprocess_img(x_copy) os.makedirs('out', exist_ok=True) filename = 'out/result_{:04d}.png'.format(i + 1) imsave(filename, img) print('Iteration took {:.1f}s'.format(time.time() - start_time))