def call(self, inputs): # Input = [X^H, X^L] assert len(inputs) == 2 high_input, low_input = inputs # Transpose Convolution: High Channels -> High Channels high_to_high = K.conv2d_transpose(high_input, self.high_to_high_kernel, output_shape=self.high_out_shape, strides=self.strides, padding=self.padding, data_format="channels_last") # Transpose Convolution: High Channels -> Low Channels high_to_low = K.pool2d(high_input, (2, 2), strides=(2, 2), pool_mode="avg") high_to_low = K.conv2d_transpose(high_to_low, self.high_to_low_kernel, output_shape=self.low_out_shape, strides=self.strides, padding=self.padding, data_format="channels_last") # Transpose Convolution: Low Channels -> High Channels # Note: there is intermediate output size high_out_channels = self.high_out_shape[3] low_out_N, low_out_W, low_out_H = self.low_out_shape[:3] intermediate_shape = (low_out_N, low_out_W, low_out_H, high_out_channels) low_to_high = K.conv2d_transpose(low_input, self.low_to_high_kernel, output_shape=intermediate_shape, strides=self.strides, padding=self.padding, data_format="channels_last") low_to_high = K.repeat_elements(low_to_high, 2, axis=1) # Nearest Neighbor Upsampling low_to_high = K.repeat_elements(low_to_high, 2, axis=2) # Transpose Convolution: Low Channels -> Low Channels low_to_low = K.conv2d_transpose(low_input, self.low_to_low_kernel, output_shape=self.low_out_shape, strides=self.strides, padding=self.padding, data_format="channels_last") # Cross Add high_add = high_to_high + low_to_high low_add = high_to_low + low_to_low return [high_add, low_add]
def gconv2d(x, kernel, gconv_indices, gconv_shape_info, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), transpose=False, output_shape=None): """2D group equivariant convolution. # Arguments x: Tensor or variable. kernel: kernel tensor. strides: strides tuple. padding: string, `"same"` or `"valid"`. data_format: string, `"channels_last"` or `"channels_first"`. Whether to use Theano or TensorFlow data format for inputs/kernels/ouputs. dilation_rate: tuple of 2 integers. # Returns A tensor, result of 2D convolution. # Raises ValueError: if `data_format` is neither `channels_last` or `channels_first`. """ # Transform the filters transformed_filter = transform_filter_2d_nhwc(w=kernel, flat_indices=gconv_indices, shape_info=gconv_shape_info) if transpose: output_shape = (K.shape(x)[0], output_shape[1], output_shape[2], output_shape[3]) transformed_filter = transform_filter_2d_nhwc(w=kernel, flat_indices=gconv_indices, shape_info=gconv_shape_info) transformed_filter = K.permute_dimensions(transformed_filter, [0, 1, 3, 2]) return K.conv2d_transpose(x=x, kernel=transformed_filter, output_shape=output_shape, strides=strides, padding=padding, data_format=data_format) return K.conv2d(x=x, kernel=transformed_filter, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate)
def draw_image(candidate, placement, template): char_height, char_width = template.shape[:2] batch_size, height, width = K.int_shape(candidate)[:3] image = K.conv2d_transpose(candidate * placement, template, output_shape=(batch_size, height * char_height, width + char_width - 1, 1), strides=(char_height, 1), padding="valid") return image[:, :, :-char_width + 1, :]
def draw_mask(candidate, placement, mask_template, space_only=False): char_height, char_width = mask_template.shape[:2] batch_size, height, width = K.int_shape(candidate)[:3] if space_only: mask_template = mask_template[:, :, :, :2] candidate = candidate[:, :, :, :2] mask = K.conv2d_transpose(candidate * placement, mask_template, output_shape=(batch_size, height * char_height, width + char_width - 1, 1), strides=(char_height, 1), padding="valid") return mask[:, :, :-char_width + 1, :]
def call(self, input_tensor, training=None): input_transposed = tf.transpose(input_tensor, [3, 0, 1, 2, 4]) input_shape = K.shape(input_transposed) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[1] * input_shape[0], self.input_height, self.input_width, self.input_num_atoms]) input_tensor_reshaped.set_shape((None, self.input_height, self.input_width, self.input_num_atoms)) if self.upsamp_type == 'resize': upsamp = K.resize_images(input_tensor_reshaped, self.scaling, self.scaling, 'channels_last') outputs = K.conv2d(upsamp, kernel=self.W, strides=(1, 1), padding=self.padding, data_format='channels_last') elif self.upsamp_type == 'subpix': conv = K.conv2d(input_tensor_reshaped, kernel=self.W, strides=(1, 1), padding='same', data_format='channels_last') outputs = tf.depth_to_space(conv, self.scaling) else: batch_size = input_shape[1] * input_shape[0] # Infer the dynamic output shape: out_height = deconv_length(self.input_height, self.scaling, self.kernel_size, self.padding, output_padding=None) out_width = deconv_length(self.input_width, self.scaling, self.kernel_size, self.padding, output_padding=None) output_shape = (batch_size, out_height, out_width, self.num_capsule * self.num_atoms) outputs = K.conv2d_transpose(input_tensor_reshaped, self.W, output_shape, (self.scaling, self.scaling), padding=self.padding, data_format='channels_last') votes_shape = K.shape(outputs) _, conv_height, conv_width, _ = outputs.get_shape() votes = K.reshape(outputs, [input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule, self.num_atoms]) votes.set_shape((None, self.input_num_capsule, conv_height, conv_width, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule]) biases_replicated = K.tile(self.b, [votes_shape[1], votes_shape[2], 1, 1]) activations = update_routing( votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings) return activations
def call(self, inputs): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Hard-code the output shape because Keras screws this up: # this only works for padding='same' stride_h, stride_w = self.strides out_height = inputs_shape[h_axis] * stride_h out_width = inputs_shape[w_axis] * stride_w if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) outputs = K.conv2d_transpose(inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, training=None): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides out_pad_h = out_pad_w = None # Infer the dynamic output shape: out_height = conv_utils.deconv_output_length( height, kernel_h, self.padding, stride=stride_h) out_width = conv_utils.deconv_output_length( width, kernel_w, self.padding, stride=stride_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) outputs = K.conv2d_transpose( inputs, self.compute_spectral_normal(training=training), output_shape, self.strides, padding=self.padding, data_format=self.data_format ) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, training=None): if training is None: training = K.learning_phase() # Get the output space inputSpace = K.shape(inputs) batchSize = inputSpace[0] height, width = inputSpace[1], inputSpace[2] kHeight, kWidth = self.kernel_size sHeight, sWidth = self.strides if self.output_padding is None: opHeight = opWidth = None else: opHeight, opWidth = self.output_padding outHeight = conv_utils.deconv_output_length(height, kHeight, self.padding, opHeight, sHeight) outWidth = conv_utils.deconv_output_length(width, kWidth, self.padding, opWidth, sWidth) outputSpace = (batchSize, outHeight, outWidth, self.filters) self.kernel.assign(self._computeWeights(training)) output = K.conv2d_transpose(inputs, self.kernel, outputSpace, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) if self.activation is not None: output = self.activation(output) return output
def conv2d_transpose( inputs, filter, # pylint: disable=redefined-builtin kernel_size=None, filters=None, strides=(1, 1), padding="SAME", output_padding=None, data_format="channels_last"): """Compatibility layer for K.conv2d_transpose Take a filter defined for forward convolution and adjusts it for a transposed convolution.""" input_shape = inputs.shape batch_size = input_shape[0] if data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = kernel_size stride_h, stride_w = strides # Infer the dynamic output shape: out_height = conv_utils.deconv_output_length(input_length=height, filter_size=kernel_h, padding=padding, output_padding=output_padding, stride=stride_h) out_width = conv_utils.deconv_output_length(input_length=width, filter_size=kernel_w, padding=padding, output_padding=output_padding, stride=stride_w) if data_format == 'channels_first': output_shape = (batch_size, filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, filters) filter = K.permute_dimensions(filter, (0, 1, 3, 2)) return K.conv2d_transpose(inputs, filter, output_shape, strides, padding=padding, data_format=data_format)
def call(self, input_tensor, training=None): z_app = np.product(self.app_dim) z_pos = np.product(self.pos_dim) # For each child (input) capsule (t_0) project into all parent (output) capsule domain (t_1) idx_all = 0 u_hat_t_list = [] # Split input_tensor by capsule types u_t_list = [tf.squeeze(u_t, axis=1) for u_t in tf.split(input_tensor, self.input_num_capsule, axis=1)] for u_t in u_t_list: u_t = tf.reshape(u_t, [self.batch * self.channels, self.input_height, self.input_width, 1]) u_t.set_shape((None, self.input_height, self.input_width, 1)) # Apply spatial kernel # Incorporating local neighborhood information by learning a convolution kernel of size k x k for the pose # and appearance matrices Pi and Ai if self.op == "conv": u_spat_t = K.conv2d(u_t, self.W[idx_all], (self.strides, self.strides), padding=self.padding, data_format='channels_last') elif self.op == "deconv": out_height = deconv_length(self.input_height, self.strides, self.kernel_size, self.padding, output_padding=None) out_width = deconv_length(self.input_width, self.strides, self.kernel_size, self.padding, output_padding=None) output_shape = (self.batch * self.channels, out_height, out_width, self.num_capsule) u_spat_t = K.conv2d_transpose(u_t, self.W[idx_all], output_shape, (self.strides, self.strides), padding=self.padding, data_format='channels_last') else: raise ValueError("Wrong type of operation for capsule") # Some shape operation H_1 = u_spat_t.get_shape()[1] W_1 = u_spat_t.get_shape()[2] # H_1 = tf.shape(u_spat_t)[1] # W_1 = tf.shape(u_spat_t)[2] u_spat_t = tf.reshape(u_spat_t, [self.batch, self.channels, H_1, W_1, self.num_capsule]) u_spat_t = tf.transpose(u_spat_t, (0, 2, 3, 4, 1)) u_spat_t = tf.reshape(u_spat_t, [self.batch, H_1, W_1, self.num_capsule * self.channels]) # Split convolution output of input_tensor to Pose and Appearance matrices u_t_pos, u_t_app = tf.split(u_spat_t, [self.num_capsule * z_pos, self.num_capsule * z_app], axis=-1) u_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, self.pos_dim[0], self.pos_dim[1]]) u_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, self.app_dim[0], self.app_dim[1]]) # Gather projection matrices and bias # Take appropriate capsule type mult_pos = tf.gather(self.W_pos, idx_all, axis=0) mult_pos = tf.reshape(mult_pos, [self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) mult_app = tf.gather(self.W_app, idx_all, axis=0) mult_app = tf.reshape(mult_app, [self.num_capsule, self.app_dim[1], self.app_dim[1]]) bias = tf.reshape(tf.gather(self.b_app, idx_all, axis=0), (1, 1, 1, self.num_capsule, 1, 1)) u_t_app += bias # Prepare the pose projection matrix mult_pos = K.l2_normalize(mult_pos, axis=-2) if self.coord_add: mult_pos = coordinate_addition(mult_pos, [1, H_1, W_1, self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) u_t_pos = mat_mult_2d(u_t_pos, mult_pos) u_t_app = mat_mult_2d(u_t_app, mult_app) # Store the result u_hat_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, z_pos]) u_hat_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, z_app]) u_hat_t = tf.concat([u_hat_t_pos, u_hat_t_app], axis=-1) u_hat_t_list.append(u_hat_t) idx_all += 1 u_hat_t_list = tf.stack(u_hat_t_list, axis=-2) u_hat_t_list = tf.transpose(u_hat_t_list, [0, 5, 1, 2, 4, 3]) # [N, H, W, t_1, t_0, z] = > [N, z, H_1, W_1, t_0, t_1] # Routing operation if self.routings > 0: if self.routing_type is 'dynamic': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = routing2d(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list) # [T1][N,H,W,to] elif self.routing_type is 'dual': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = dual_routing(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list, z_app=z_app, z_pos=z_pos) # [T1][N,H,W,to] else: raise ValueError(self.routing_type + ' is an invalid routing; try dynamic or dual') else: self.routing_type = 'NONE' c = tf.ones([self.batch, H_1, W_1, self.input_num_capsule, self.num_capsule]) c_t_list = [tf.squeeze(c_t, axis=-1) for c_t in tf.split(c, self.num_capsule, axis=-1)] # Form each parent capsule through the weighted sum of all child capsules r_t_mul_u_hat_t_list = [] u_hat_t_list_ = [(tf.squeeze(u_hat_t, axis=-1)) for u_hat_t in tf.split(u_hat_t_list, self.num_capsule, axis=-1)] for c_t, u_hat_t in zip(c_t_list, u_hat_t_list_): r_t = tf.expand_dims(c_t, axis=1) r_t_mul_u_hat_t_list.append(tf.reduce_sum(r_t * u_hat_t, axis=-1)) p = r_t_mul_u_hat_t_list p = tf.stack(p, axis=1) p_pos, p_app = tf.split(p, [z_pos, z_app], axis=2) # Squash the weighted sum to form the final parent capsule v_pos = Psquash(p_pos, axis=2) v_app = matwo_squash(p_app, axis=2) outputs = tf.concat([v_pos, v_app], axis=2) return outputs
def call(self, inputs): W_shape = self.kernel.shape.as_list() W_reshaped = tf.reshape(self.kernel, (-1, W_shape[-1])) new_kernel = self.compute_spectral_norm(W_reshaped, self.u, W_shape) inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding out_height = conv_utils.deconv_output_length(height, kernel_h, padding=self.padding, output_padding=out_pad_h, stride=stride_h, dilation=self.dilation_rate[0]) out_width = conv_utils.deconv_output_length(width, kernel_w, padding=self.padding, output_padding=out_pad_w, stride=stride_w, dilation=self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) output_shape_tensor = array_ops.stack(output_shape) outputs = K.conv2d_transpose( inputs, new_kernel, output_shape_tensor, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if not context.executing_eagerly(): out_shape = self.compute_output_shape(inputs.shape) outputs.set_shape(out_shape) if self.use_bias: outputs = tf.nn.bias_add( outputs, self.bias, data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) #Spectral Normalization def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): #Accroding the paper, we only need to do power iteration one time. _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.kernel.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) self.kernel = W_bar outputs = K.conv2d_transpose( inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs