def call(self, inputs, *args, **kwargs): outputs = super(Convolution2DTranspose, self).call(inputs, *args, **kwargs) if not outputs.get_shape().is_fully_defined(): input_shape = K.int_shape(inputs) if self.data_format == "channels_last": height, width = input_shape[1], input_shape[2] else: height, width = input_shape[2], input_shape[3] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides batch_size = inputs.get_shape()[0] # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding) if self.data_format == "channels_last": output_shape = (batch_size, out_height, out_width, self.filters) else: output_shape = (batch_size, self.filters, out_height, out_width) outputs.set_shape(output_shape) return outputs
def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': h_axis, w_axis, d_axis = 2, 3, 4 else: h_axis, w_axis, d_axis = 1, 2, 3 height, width, depth = input_shape[h_axis], input_shape[w_axis], input_shape[d_axis] kernel_h, kernel_w, kernel_d = self.kernel_size stride_h, stride_w, stride_d = self.strides batch_size = input_shape[0] # Infer the dynamic output shape: out_height = conv_utils.deconv_length( height, stride_h, kernel_h, self.padding) out_width = conv_utils.deconv_length( width, stride_w, kernel_w, self.padding) out_depth = conv_utils.deconv_length( depth, stride_d, kernel_d, self.padding) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width, out_depth) else: output_shape = (batch_size, out_height, out_width, out_depth, self.filters) # print("in compute_output_shape") # print("output_shape:{}".format(output_shape)) return output_shape
def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding output_shape[c_axis] = self.filters output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], stride_h, kernel_h, self.padding, out_pad_h, self.dilation_rate[0]) output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], stride_w, kernel_w, self.padding, out_pad_w, self.dilation_rate[1]) return tuple(output_shape)
def compute_output_shape(self, input_shape): output_shape = list(input_shape) output_shape[1] = deconv_length(output_shape[1], self.scaling, self.kernel_size, self.padding) output_shape[2] = deconv_length(output_shape[2], self.scaling, self.kernel_size, self.padding) output_shape[3] = self.num_capsule output_shape[4] = self.num_atoms return tuple(output_shape)
def Unpool(self,pooled_Maps, indices, Rectified_FM = None, pool_size = [1,2,2,1], strides = [1,1,1,1], padding = 'valid', scope = 'unpool'): """ Unpooling layer after max_pool_with_argmax. The name of args cited from this paper(Fig.1 at https://arxiv.org/pdf/1311.2901.pdf) Args: pooled_Maps : max pooled output tensor indices : argmax indices Rectified_FM : Rectified Feature Maps Return: unpool: unpooling tensor """ with tf.variable_scope(scope): input_shape = tf.shape(pooled_Maps) if Rectified_FM is None: # Calculate output shape height, width, filters = input_shape[1:] kernel_h, kernel_w = pool_size[1:3] stride_h, stride_w = strides[1:3] out_height = conv_utils.deconv_length(height, stride_h, kernel_h, padding) out_width = conv_utils.deconv_length(width, stride_h, kernel_h, padding) output_shape = [input_shape[0], out_height, out_width, filters] else: output_shape = Rectified_FM.get_shape().as_list() if output_shape[0] is None: output_shape = [input_shape[0], ] + output_shape[1:] flat_input_size = tf.reduce_prod(input_shape) flat_output_shape = [output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]] flat_pooled = tf.reshape(pooled_Maps, tf.stack([flat_input_size])) batch_range = tf.reshape(tensor = tf.range(tf.cast(input_shape[0], tf.int64), dtype = indices.dtype), shape = [input_shape[0], 1, 1, 1]) b = tf.ones_like(indices) * batch_range b = tf.reshape(b, tf.stack([flat_input_size, 1])) flat_indices = tf.reshape(indices, tf.stack([flat_input_size, 1])) flat_indices = tf.concat([b, flat_indices], 1) #Switches is Fig.1 at https://arxiv.org/pdf/1311.2901.pdf Switches = tf.scatter_nd(flat_indices, tf.ones_like(flat_pooled), shape=tf.cast(flat_output_shape, tf.int64)) Switches = tf.reshape(Switches, [-1]) Switches = tf.greater(Switches, tf.zeros_like(Switches)) Switches = tf.reshape(Switches, tf.stack(output_shape)) img = tf.image.resize_nearest_neighbor(pooled_Maps, output_shape[1:3]) Unpooled_Maps = tf.multiply(img, tf.cast(Switches, img.dtype)) return Unpooled_Maps
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 c_axis = 1 else: h_axis, w_axis = 1, 2 c_axis = 3 ##BTEK kernel = self.U() in_channels = input_shape[c_axis] height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h, self.dilation_rate[0]) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w, self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) ##BTEK kernel = self.U() print("kernel shape in output:", kernel.shape) print("channel axis") kernel = K.repeat_elements(kernel, self.input_channels, axis=c_axis) print("kernel reshaped :", kernel.shape) outputs = K.conv2d_transpose(inputs, kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): if self.transpose_output_shape is None: input_shape = K.shape(inputs) input_shape_list = inputs.get_shape().as_list() batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape_list[h_axis], input_shape_list[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding) if self.data_format == 'channels_first': self.transpose_output_shape = (batch_size, self.filters, out_height, out_width) else: self.transpose_output_shape = (batch_size, out_height, out_width, self.filters) shape = self.transpose_output_shape else: shape = self.transpose_output_shape if self.data_format == 'channels_first': shape = (shape[0], shape[2], shape[3], shape[1]) if shape[0] is None: shape = (tf.shape(inputs)[0], ) + tuple(shape[1:]) shape = tf.stack(list(shape)) outputs = K.conv2d_transpose(x=inputs, kernel=self.kernel, output_shape=shape, strides=self.strides, padding=self.padding, data_format=self.data_format) outputs = tf.reshape(outputs, shape) # if self.bias: # outputs = K.bias_add( # outputs, # self.bias, # data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h, self.dilation_rate[0]) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w, self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) outputs = inputs if self.use_bias: outputs = K.bias_add( outputs, -self.bias, data_format=self.data_format) outputs = K.conv2d_transpose( outputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.activation is not None: return self.activation(outputs) return outputs
def compute_output_shape(self, input_shapes): input_shape = input_shapes[0] output_shape = list(input_shape) c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides output_shape[c_axis] = self.filters output_shape[h_axis] = conv_utils.deconv_length( output_shape[h_axis], stride_h, kernel_h, self.padding, None) output_shape[w_axis] = conv_utils.deconv_length( output_shape[w_axis], stride_w, kernel_w, self.padding, None) return tuple(output_shape)
def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 else: c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides output_shape[c_axis] = self.filters output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis], stride_d, kernel_d, self.padding) output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], stride_h, kernel_h, self.padding) output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], stride_w, kernel_w, self.padding) #print('compute_outptu_shape', output_shape) return tuple(output_shape)
def call(self, input_tensor, training=None): input_transposed = tf.transpose(input_tensor, [4, 0, 1, 2, 3, 5]) input_shape = K.shape(input_transposed) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[1] * input_shape[0], self.input_height, self.input_width, self.input_depth, self.input_num_atoms]) input_tensor_reshaped.set_shape((None, self.input_height, self.input_width, self.input_depth, self.input_num_atoms)) if self.upsamp_type == 'resize': # added 1 more self.scaling upsamp = K.resize_images(input_tensor_reshaped, self.scaling, self.scaling, self.scaling, 'channels_last') outputs = K.conv3d(upsamp, kernel=self.W, strides=(1, 1, 1), padding=self.padding, data_format='channels_last') elif self.upsamp_type == 'subpix': conv = K.conv3d(input_tensor_reshaped, kernel=self.W, strides=(1, 1, 1), padding='same', data_format='channels_last') outputs = tf.depth_to_space(conv, self.scaling) else: batch_size = input_shape[1] * input_shape[0] # Infer the dynamic output shape: out_height = deconv_length(self.input_height, self.scaling, self.kernel_size, self.padding) out_width = deconv_length(self.input_width, self.scaling, self.kernel_size, self.padding) out_depth = deconv_length(self.input_depth, self.scaling, self.kernel_size, self.padding) output_shape = (batch_size, out_height, out_width, out_depth, self.num_capsule * self.num_atoms) outputs = K.conv3d_transpose(input_tensor_reshaped, self.W, output_shape, (self.scaling, self.scaling, self.scaling), padding=self.padding, data_format='channels_last') votes_shape = K.shape(outputs) _, conv_height, conv_width, conv_depth, _ = outputs.get_shape() votes = K.reshape(outputs, [input_shape[2], input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule, self.num_atoms]) votes.set_shape((None, self.input_num_capsule, conv_height.value, conv_width.value, conv_depth.value, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], votes_shape[3], self.num_capsule]) biases_replicated = K.tile(self.b, [votes_shape[1], votes_shape[2], votes_shape[3], 1, 1]) activations = update_routing( votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=7, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings) return activations
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides # Infer the dynamic output shape: if self._output_shape is None: out_height = deconv_length(height, stride_h, kernel_h, self.padding) out_width = deconv_length(width, stride_w, kernel_w, self.padding) if self.data_format == 'channels_first': output_shape = ( batch_size, self.filters, out_height, out_width ) else: output_shape = ( batch_size, out_height, out_width, self.filters ) else: output_shape = (batch_size,) + self._output_shape outputs = K.conv2d_transpose( inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format ) if self.bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format ) if self.activation is not None: return self.activation(outputs) return outputs
def test_deconv_length(): assert conv_utils.deconv_length(None, 1, 7, 'same') is None assert conv_utils.deconv_length(224, 1, 7, 'same') == 224 assert conv_utils.deconv_length(224, 2, 7, 'same') == 448 assert conv_utils.deconv_length(32, 1, 5, 'valid') == 36 assert conv_utils.deconv_length(32, 2, 5, 'valid') == 67 assert conv_utils.deconv_length(32, 1, 5, 'full') == 28 assert conv_utils.deconv_length(32, 2, 5, 'full') == 59
def compute_output_shape(self, input_shape): if self.unpooling_output_shape is None: output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.pool_size stride_h, stride_w = self.strides output_shape[c_axis] = input_shape[c_axis] output_shape[h_axis] = conv_utils.deconv_length( output_shape[h_axis], stride_h, kernel_h, self.padding) output_shape[w_axis] = conv_utils.deconv_length( output_shape[w_axis], stride_w, kernel_w, self.padding) else: output_shape = self.unpooling_output_shape return tuple(output_shape)
def conv2d_transpose( inputs, filter, # pylint: disable=redefined-builtin kernel_size=None, filters=None, strides=(1, 1), padding='same', output_padding=None, data_format='channels_last'): """Compatibility layer for K.conv2d_transpose Take a filter defined for forward convolution and adjusts it for a transposed convolution.""" input_shape = K.shape(inputs) batch_size = input_shape[0] if data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = kernel_size stride_h, stride_w = strides # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, padding, output_padding) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, padding, output_padding) if data_format == 'channels_first': output_shape = (batch_size, filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, filters) filter = K.permute_dimensions(filter, (0, 1, 3, 2)) return K.conv2d_transpose(inputs, filter, output_shape, strides, padding=padding, data_format=data_format)
def conv_transpose_output_length( input_length, filter_size, padding, stride, dilation=1): """Rearrange arguments for compatibility with conv_output_length.""" if dilation != 1: msg = f"Dilation must be 1 for transposed convolution. " msg += f"Got dilation = {dilation}" raise ValueError(msg) return conv_utils.deconv_length( input_length, # dim_size stride, # stride_size filter_size, # kernel_size padding, # padding )
def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self._output_shape is None: output_shape[c_axis] = self.filters output_shape[h_axis] = deconv_length( output_shape[h_axis], stride_h, kernel_h, self.padding ) output_shape[w_axis] = deconv_length( output_shape[w_axis], stride_w, kernel_w, self.padding ) else: output_shape[1:] = self._output_shape return tuple(output_shape)
def compute_output_shape(self, input_shape): if self.transpose_output_shape is None: output_shape = list(input_shape) if self.data_format == 'channels_first': c_axis, h_axis, w_axis = 1, 2, 3 else: c_axis, h_axis, w_axis = 3, 1, 2 kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides output_shape[c_axis] = self.filters output_shape[h_axis] = conv_utils.deconv_length( output_shape[h_axis], stride_h, kernel_h, self.padding) output_shape[w_axis] = conv_utils.deconv_length( output_shape[w_axis], stride_w, kernel_w, self.padding) else: if not isinstance(self.transpose_output_shape, (list, tuple)): output_shape = self.transpose_output_shape else: output_shape = self.transpose_output_shape return tuple(output_shape)
def call(self, inputs): input_shape = K.shape(inputs) output_shape = [0]*5 output_shape[0] = input_shape[0] if self.data_format == 'channels_first': c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 else: c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides output_shape[c_axis] = self.filters output_shape[d_axis] = conv_utils.deconv_length(input_shape[d_axis], stride_d, kernel_d, self.padding) output_shape[h_axis] = conv_utils.deconv_length(input_shape[h_axis], stride_h, kernel_h, self.padding) output_shape[w_axis] = conv_utils.deconv_length(input_shape[w_axis], stride_w, kernel_w, self.padding) #print('call',output_shape) outputs = K.deconv3d(inputs, self.kernel, output_shape, strides=self.strides, padding=self.padding, data_format=self.data_format) if self.bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': d_axis, h_axis, w_axis = 2, 3, 4 else: d_axis, h_axis, w_axis = 1, 2, 3 depth, height, width = input_shape[d_axis], input_shape[h_axis], input_shape[w_axis] kernel_d, kernel_h, kernel_w = self.kernel_size stride_d, stride_h, stride_w = self.strides # Infer the dynamic output shape: out_depth = conv_utils.deconv_length(depth, stride_h, kernel_h, self.padding, None) out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, None) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, None) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_depth, out_height, out_width) else: output_shape = (batch_size, out_depth, out_height, out_width, self.filters) outputs = conv3d_transpose( inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format) if self.bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): input_shape = K.shape(inputs) input_shape = tuple(d.value for d in inputs.shape) batch_size = input_shape[0] if self.data_format == 'channel_first': w_axis = 2 else: w_axis = 1 width = input_shape[w_axis] kernel_w, = self.kernel_size stride_w, = self.strides out_pad = self.output_padding # Infer dynamic output shape out_width = conv_utils.deconv_length( dim_size=width, stride_size=stride_w, kernel_size=kernel_w, padding=self.padding, # output_padding=out_pad # Does not exists in earlier keras versions ) # Define output shape based on channel index position if self.data_format == 'channel_first': output_shape = (batch_size, self.filters, out_width) else: output_shape = (batch_size, out_width, self.filters) # output_shape = np.asarray( output_shape ) # Apply tensorflow's conv1d transpose function outputs = conv1d_transpose(value=inputs, filter=self.kernel, output_shape=output_shape, stride=stride_w, padding=self.padding, data_format=self.data_format) # Add bias if enabled if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) # Apply activation function if provided if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] #if self.data_format == 'channels_first': # s_axis = 2 #else: s_axis = 1 steps = input_shape[s_axis] kernel_w, = self.kernel_size stride, = self.strides if self.output_padding is None: out_pad_w = None else: out_pad_w, = self.output_padding # Infer the dynamic output shape: out_width = conv_utils.deconv_length(steps, stride, kernel_w, self.padding, out_pad_w, self.dilation_rate[0]) #if self.data_format == 'channels_first': # output_shape = (batch_size, self.filters, 1, out_width) # inputs = K.expand_dims(inputs, axis=2) #else: output_shape = (batch_size, 1, out_width, self.filters) inputs = K.expand_dims(inputs, axis=1) outputs = K.conv2d_transpose(inputs, K.expand_dims(self.kernel, axis=0), output_shape, (1, self.strides[0]), padding=self.padding, data_format=self.data_format, dilation_rate=(1, self.dilation_rate[0])) #if self.data_format == 'channels_first': # outputs = K.squeeze(outputs, axis=2) #else: outputs = K.squeeze(outputs, axis=1) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def compute_output_shape(self, input_shape): output_shape = list(input_shape) #if self.data_format == 'channels_first': # c_axis, w_axis = 1, 2 #else: c_axis, w_axis = 2, 1 kernel_w, = self.kernel_size stride, = self.strides if self.output_padding is None: out_pad_w = None else: out_pad_w, = self.output_padding output_shape[c_axis] = self.filters output_shape[w_axis] = conv_utils.deconv_length( output_shape[w_axis], stride, kernel_w, self.padding, out_pad_w, self.dilation_rate[0]) return tuple(output_shape)
def call(self, inputs): revcomp_kernel =\ K.concatenate([self.kernel, self.kernel[::-1,::-1,::-1]],axis=-2) if (self.use_bias): revcomp_bias = K.concatenate([self.bias, self.bias[::-1]], axis=-1) input_shape = K.shape(inputs) batch_size = input_shape[0] s_axis = 1 steps = input_shape[s_axis] kernel_w, = self.kernel_size stride, = self.strides if self.output_padding is None: out_pad_w = None else: out_pad_w, = self.output_padding # Infer the dynamic output shape: out_width = conv_utils.deconv_length(steps, stride, kernel_w, self.padding, out_pad_w, self.dilation_rate[0]) output_shape = (batch_size, 1, out_width, 2 * self.filters) inputs = K.expand_dims(inputs, axis=1) outputs = K.conv2d_transpose(inputs, K.expand_dims(revcomp_kernel, axis=0), output_shape, (1, self.strides[0]), padding=self.padding, data_format=self.data_format, dilation_rate=(1, self.dilation_rate[0])) outputs = K.squeeze(outputs, axis=1) if self.use_bias: outputs = K.bias_add(outputs, revcomp_bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def compute_output_shape(self, input_shape): output_shape = list(input_shape) if self.data_format == 'channel_first': c_axis, w_axis = 1, 2 else: c_axis, w_axis = 2, 1 kernel_w, = self.kernel_size stride_w, = self.strides out_pad = self.output_padding out_width = conv_utils.deconv_length( dim_size=output_shape[w_axis], stride_size=stride_w, kernel_size=kernel_w, padding=self.padding, # output_padding=out_pad # Does not exists in earlier keras versions ) output_shape[c_axis] = self.filters output_shape[w_axis] = out_width return tuple(output_shape)
def compute_output_shape(self, input_shape): space = input_shape[3:] if self.op == "conv": new_space = [] for i in range(len(space)): new_dim = conv_output_length( space[i], self.kernel_size, padding=self.padding, stride=self.strides, dilation=1) new_space.append(new_dim) elif self.op == 'deconv': new_space = [] for i in range(len(space)): new_dim = deconv_length(space[i], self.strides, self.kernel_size, self.padding, output_padding=None) new_space.append(new_dim) else: raise ValueError("Wrong type of operation for capsule") return (input_shape[0],) + (self.num_capsule, np.product(self.app_dim) + np.product(self.pos_dim)) + \ tuple(new_space)
def call(self, inputs, training=None): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) w_shape = self.kernel.shape.as_list() # Flatten the Tensor w_reshaped = K.reshape(self.kernel, [-1, w_shape[-1]]) _u, _v = power_iteration(w_reshaped, self.u) # Calculate Sigma sigma = K.dot(_v, w_reshaped) sigma = K.dot(sigma, K.transpose(_u)) # normalize it w_bar = w_reshaped / sigma # reshape weight tensor if not training: w_bar = K.reshape(w_bar, w_shape) else: with tf.control_dependencies([self.u.assign(_u)]): w_bar = K.reshape(w_bar, w_shape) outputs = K.conv2d_transpose( inputs, w_bar, output_shape, self.strides, padding=self.padding, data_format=self.data_format) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, input_tensor, training=None): z_app = np.product(self.app_dim) z_pos = np.product(self.pos_dim) # For each child (input) capsule (t_0) project into all parent (output) capsule domain (t_1) idx_all = 0 u_hat_t_list = [] # Split input_tensor by capsule types u_t_list = [tf.squeeze(u_t, axis=1) for u_t in tf.split(input_tensor, self.input_num_capsule, axis=1)] for u_t in u_t_list: u_t = tf.reshape(u_t, [self.batch * self.channels, self.input_height, self.input_width, 1]) u_t.set_shape((None, self.input_height, self.input_width, 1)) # Apply spatial kernel # Incorporating local neighborhood information by learning a convolution kernel of size k x k for the pose # and appearance matrices Pi and Ai if self.op == "conv": u_spat_t = K.conv2d(u_t, self.W[idx_all], (self.strides, self.strides), padding=self.padding, data_format='channels_last') elif self.op == "deconv": out_height = deconv_length(self.input_height, self.strides, self.kernel_size, self.padding, output_padding=None) out_width = deconv_length(self.input_width, self.strides, self.kernel_size, self.padding, output_padding=None) output_shape = (self.batch * self.channels, out_height, out_width, self.num_capsule) u_spat_t = K.conv2d_transpose(u_t, self.W[idx_all], output_shape, (self.strides, self.strides), padding=self.padding, data_format='channels_last') else: raise ValueError("Wrong type of operation for capsule") # Some shape operation H_1 = u_spat_t.get_shape()[1] W_1 = u_spat_t.get_shape()[2] # H_1 = tf.shape(u_spat_t)[1] # W_1 = tf.shape(u_spat_t)[2] u_spat_t = tf.reshape(u_spat_t, [self.batch, self.channels, H_1, W_1, self.num_capsule]) u_spat_t = tf.transpose(u_spat_t, (0, 2, 3, 4, 1)) u_spat_t = tf.reshape(u_spat_t, [self.batch, H_1, W_1, self.num_capsule * self.channels]) # Split convolution output of input_tensor to Pose and Appearance matrices u_t_pos, u_t_app = tf.split(u_spat_t, [self.num_capsule * z_pos, self.num_capsule * z_app], axis=-1) u_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, self.pos_dim[0], self.pos_dim[1]]) u_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, self.app_dim[0], self.app_dim[1]]) # Gather projection matrices and bias # Take appropriate capsule type mult_pos = tf.gather(self.W_pos, idx_all, axis=0) mult_pos = tf.reshape(mult_pos, [self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) mult_app = tf.gather(self.W_app, idx_all, axis=0) mult_app = tf.reshape(mult_app, [self.num_capsule, self.app_dim[1], self.app_dim[1]]) bias = tf.reshape(tf.gather(self.b_app, idx_all, axis=0), (1, 1, 1, self.num_capsule, 1, 1)) u_t_app += bias # Prepare the pose projection matrix mult_pos = K.l2_normalize(mult_pos, axis=-2) if self.coord_add: mult_pos = coordinate_addition(mult_pos, [1, H_1, W_1, self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) u_t_pos = mat_mult_2d(u_t_pos, mult_pos) u_t_app = mat_mult_2d(u_t_app, mult_app) # Store the result u_hat_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, z_pos]) u_hat_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, z_app]) u_hat_t = tf.concat([u_hat_t_pos, u_hat_t_app], axis=-1) u_hat_t_list.append(u_hat_t) idx_all += 1 u_hat_t_list = tf.stack(u_hat_t_list, axis=-2) u_hat_t_list = tf.transpose(u_hat_t_list, [0, 5, 1, 2, 4, 3]) # [N, H, W, t_1, t_0, z] = > [N, z, H_1, W_1, t_0, t_1] # Routing operation if self.routings > 0: if self.routing_type is 'dynamic': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = routing2d(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list) # [T1][N,H,W,to] elif self.routing_type is 'dual': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = dual_routing(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list, z_app=z_app, z_pos=z_pos) # [T1][N,H,W,to] else: raise ValueError(self.routing_type + ' is an invalid routing; try dynamic or dual') else: self.routing_type = 'NONE' c = tf.ones([self.batch, H_1, W_1, self.input_num_capsule, self.num_capsule]) c_t_list = [tf.squeeze(c_t, axis=-1) for c_t in tf.split(c, self.num_capsule, axis=-1)] # Form each parent capsule through the weighted sum of all child capsules r_t_mul_u_hat_t_list = [] u_hat_t_list_ = [(tf.squeeze(u_hat_t, axis=-1)) for u_hat_t in tf.split(u_hat_t_list, self.num_capsule, axis=-1)] for c_t, u_hat_t in zip(c_t_list, u_hat_t_list_): r_t = tf.expand_dims(c_t, axis=1) r_t_mul_u_hat_t_list.append(tf.reduce_sum(r_t * u_hat_t, axis=-1)) p = r_t_mul_u_hat_t_list p = tf.stack(p, axis=1) p_pos, p_app = tf.split(p, [z_pos, z_app], axis=2) # Squash the weighted sum to form the final parent capsule v_pos = Psquash(p_pos, axis=2) v_app = matwo_squash(p_app, axis=2) outputs = tf.concat([v_pos, v_app], axis=2) return outputs
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_length(height, stride_h, kernel_h, self.padding, out_pad_h) out_width = conv_utils.deconv_length(width, stride_w, kernel_w, self.padding, out_pad_w) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) #Spectral Normalization def _l2normalize(v, eps=1e-12): return v / (K.sum(v**2)**0.5 + eps) def power_iteration(W, u): #Accroding the paper, we only need to do power iteration one time. _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.kernel.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma = K.dot(_v, W_reshaped) sigma = K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) self.kernel = W_bar outputs = K.conv2d_transpose(inputs, self.kernel, output_shape, self.strides, padding=self.padding, data_format=self.data_format) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def test_deconv_length(): assert conv_utils.deconv_length(None, 1, 7, 'same', None) is None assert conv_utils.deconv_length(224, 1, 7, 'same', None) == 224 assert conv_utils.deconv_length(224, 2, 7, 'same', None) == 448 assert conv_utils.deconv_length(32, 1, 5, 'valid', None) == 36 assert conv_utils.deconv_length(32, 2, 5, 'valid', None) == 67 assert conv_utils.deconv_length(32, 1, 5, 'full', None) == 28 assert conv_utils.deconv_length(32, 2, 5, 'full', None) == 59 assert conv_utils.deconv_length(224, 1, 7, 'same', 0) == 224 assert conv_utils.deconv_length(224, 2, 7, 'same', 0) == 447 assert conv_utils.deconv_length(224, 2, 7, 'same', 1) == 448 assert conv_utils.deconv_length(32, 1, 5, 'valid', 0) == 36 assert conv_utils.deconv_length(32, 2, 5, 'valid', 0) == 67 assert conv_utils.deconv_length(32, 2, 5, 'valid', 1) == 68 assert conv_utils.deconv_length(6, 1, 3, 'full', 0) == 4 assert conv_utils.deconv_length(6, 2, 3, 'full', 1) == 10 assert conv_utils.deconv_length(6, 2, 3, 'full', 2) == 11