def conv2d(inputs, activation, filters, out_caps_dims, kernel_size, strides, padding="valid", routing_method="EMRouting", name=None, reuse=None): """A 2D convolutional capsule layer. Args: inputs: A 6-D tensor with shape [batch_size, in_height, in_width, in_channels] + in_caps_dims. activation: A 4-D tensor with shape [batch_size, in_height, in_width, in_channels]. filters: Integer, the dimensionality of the output space (i.e. the number of filters in the convolution). out_caps_dims: A tuple/list of 2 integers, specifying the dimensions of output capsule, e.g. out_caps_dims=[4, 4] representing that each output capsule has shape [4, 4]. kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. padding: One of "valid" or "same" (case-insensitive), now only support "valid". routing_method: One of "EMRouting" or "DynamicRouting", the method of routing-by-agreement algorithm. name: A string, the name of the layer. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: pose: A 6-D tensor with shape [batch_size, out_height, out_width, out_channels] + out_caps_dims. activation: A 4-D tensor with shape [batch_size, out_height, out_width, out_channels]. """ name = "conv2d" if name is None else name with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() input_shape = cl.shape(inputs) input_rank = len(input_shape) activation_rank = len(activation.shape) if not input_rank == 6: raise ValueError('Inputs to `conv2d` should have rank 6. Received inputs rank:', str(input_rank)) if not activation_rank == 4: raise ValueError('Activation to `conv2d` should have rank 4. Received activation rank:', str(activation_rank)) if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size, input_shape[3]] elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 2: kernel_size = [kernel_size[0], kernel_size[1], input_shape[3]] else: raise ValueError('"kernel_size" should be an integer or tuple/list of 2 integers. Received:', str(kernel_size)) if isinstance(strides, int): strides = [strides, strides, 1] elif isinstance(strides, (list, tuple)) and len(strides) == 2: strides = [strides[0], strides[1], 1] else: raise ValueError('"strides" should be an integer or tuple/list of 2 integers. Received:', str(kernel_size)) if not isinstance(out_caps_dims, (list, tuple)) or len(out_caps_dims) != 2: raise ValueError('"out_caps_dims" should be a tuple/list of 2 integers. Received:', str(out_caps_dims)) elif isinstance(out_caps_dims, tuple): out_caps_dims = list(out_caps_dims) # 1. space to batch # patching everything into [batch_size, out_height, out_width, in_channels] + in_caps_dims (batched) # and [batch_size, out_height, out_width, in_channels] (activation). batched = cl.space_to_batch_nd(inputs, kernel_size, strides) activation = cl.space_to_batch_nd(activation, kernel_size, strides) # 2. transforming # transforming to [batch_size, out_height, out_width, in_channels, out_channels/filters] + out_caps_dims vote = transforming(batched, num_outputs=filters, out_caps_dims=out_caps_dims) # 3. routing pose, activation = routing(vote, activation, method=routing_method) return pose, activation
def dense(inputs, activation, num_outputs, out_caps_dims, routing_method='EMRouting', routing_iter=3, coordinate_addition=False, reuse=None, name=None): """A fully connected capsule layer. Args: inputs: A 4-D tensor with shape [batch_size, num_inputs] + in_caps_dims or [batch_size, in_height, in_width, in_channels] + in_caps_dims activation: [batch_size, num_inputs] or [batch_size, in_height, in_width, in_channels] num_outputs: Integer, the number of output capsules in the layer. out_caps_dims: A list with two elements, pose shape of output capsules. routing_iter: Number of iterations during routing algorithm. Returns: pose: A 4-D tensor with shape [batch_size, num_outputs] + out_caps_dims activation: [batch_size, num_outputs] """ name = "dense" if name is None else name with tf.name_scope(name) as scope: if reuse: scope.reuse() if coordinate_addition and len(inputs.shape) == 6 and len(activation.shape) == 4: vote = transforming(inputs, num_outputs=num_outputs, out_caps_dims=out_caps_dims) with tf.name_scope("coodinate_addition"): batch_size, in_height, in_width, in_channels, _, out_caps_height, out_caps_width = cl.shape(vote) num_inputs = in_height * in_width * in_channels zeros = np.zeros((in_height, out_caps_width - 1)) coord_offset_h = ((np.arange(in_height) + 0.5) / in_height).reshape([in_height, 1]) coord_offset_h = np.concatenate([zeros, coord_offset_h], axis=-1) zeros = np.zeros((out_caps_height - 1, out_caps_width)) coord_offset_h = np.stack([np.concatenate([coord_offset_h[i:(i + 1), :], zeros], axis=0) for i in range(in_height)], axis=0) coord_offset_h = coord_offset_h.reshape((1, in_height, 1, 1, 1, out_caps_height, out_caps_width)) zeros = np.zeros((1, in_width)) coord_offset_w = ((np.arange(in_width) + 0.5) / in_width).reshape([1, in_width]) coord_offset_w = np.concatenate([zeros, coord_offset_w, zeros, zeros], axis=0) zeros = np.zeros((out_caps_height, out_caps_width - 1)) coord_offset_w = np.stack([np.concatenate([zeros, coord_offset_w[:, i:(i + 1)]], axis=1) for i in range(in_width)], axis=0) coord_offset_w = coord_offset_w.reshape((1, 1, in_width, 1, 1, out_caps_height, out_caps_width)) vote = vote + tf.constant(coord_offset_h + coord_offset_w, dtype=tf.float32) vote = tf.reshape(vote, shape=[batch_size, num_inputs, num_outputs] + out_caps_dims) activation = tf.reshape(activation, shape=[batch_size, num_inputs]) elif len(inputs.shape) == 4 and len(activation.shape) == 2: vote = transforming(inputs, num_outputs=num_outputs, out_caps_dims=out_caps_dims) else: raise TypeError("Wrong rank for inputs or activation") pose, activation = routing(vote, activation, routing_method, num_iter=routing_iter) # pose, activation = cl.core.gluing(vote, activation) assert len(pose.shape) == 4 assert len(activation.shape) == 2 return(pose, activation)
def conv3d(inputs, activation, filters, out_caps_dims, kernel_size, strides, padding="valid", routing_method="EMRouting", name=None, reuse=None): """A 3D convolutional capsule layer. Args: inputs: A 7-D tensor with shape [batch_size, in_depth, in_height, in_width, in_channels] + in_caps_dims. activation: A 5-D tensor with shape [batch_size, in_depth, in_height, in_width, in_channels]. filters: Integer, the dimensionality of the output space (i.e. the number of filters in the convolution). out_caps_dims: A tuple/list of 2 integers, specifying the dimensions of output capsule, e.g. out_caps_dims=[4, 4] representing that each output capsule has shape [4, 4]. kernel_size: An integer or tuple/list of 3 integers, specifying the height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 3 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. padding: One of "valid" or "same" (case-insensitive), now only support "valid". routing_method: One of "EMRouting" or "DynamicRouting", the method of routing-by-agreement algorithm. name: String, a name for the operation (optional). reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: pose: A 7-D tensor with shape [batch_size, out_depth, out_height, out_width, out_channels] + out_caps_dims. activation: A 5-D tensor with shape [batch_size, out_depth, out_height, out_width, out_channels]. """ name = "conv1d" if name is None else name with tf.name_scope(name): input_shape = cl.shape(inputs) input_rank = len(input_shape) activation_rank = len(activation.shape) if input_rank != 7: raise ValueError('Inputs to `conv3d` should have rank 7. Received input rank:', str(input_rank)) if activation_rank != 5: raise ValueError('Activation to `conv3d` should have rank 5. Received input shape:', str(activation_rank)) if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size, kernel_size] elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 3: kernel_size = kernel_size else: raise ValueError('"kernel_size" should be an integer or tuple/list of 3 integers. Received:', str(kernel_size)) if isinstance(strides, int): strides = [strides, strides, strides] elif isinstance(strides, (list, tuple)) and len(strides) == 3: strides = strides else: raise ValueError('"strides" should be an integer or tuple/list of 3 integers. Received:', str(strides)) if not isinstance(out_caps_dims, (list, tuple)) or len(out_caps_dims) != 2: raise ValueError('"out_caps_dims" should be a tuple/list of 2 integers. Received:', str(out_caps_dims)) elif isinstance(out_caps_dims, tuple): out_caps_dims = list(out_caps_dims) # 1. space to batch batched = cl.space_to_batch_nd(inputs, kernel_size, strides) activation = cl.space_to_batch_nd(activation, kernel_size, strides) # 2. transforming vote = transforming(batched, num_outputs=filters, out_caps_dims=out_caps_dims) # 3. routing pose, activation = routing(vote, activation, method=routing_method) return pose, activation