def matmul_v2(a, b, transpose_a=False, transpose_b=False, name=None): name = "matmul" if name is None else name with tf.name_scope(name): rank_a = len(a.shape) rank_b = len(b.shape) if rank_a < 2 or rank_b < 2: raise TypeError("Rank must be greater than 2") if transpose_a: perm = [i for i in range(rank_a - 2)] perm = perm + [rank_a - 1, rank_a - 2] a = tf.transpose(a, perm=perm) if transpose_b: perm = [i for i in range(rank_b - 2)] perm = perm + [rank_b - 1, rank_b - 2] b = tf.transpose(b, perm=perm) b = tf.tile(b, [1 for i in range(rank_b - 2)] + [cl.shape(a)[-2], 1]) shape = cl.shape(a)[:-2] + [np.prod(cl.shape(a)[-2:]), 1] a_prime = tf.reshape(a, shape=shape) c = a_prime * b shape = cl.shape(a) + cl.shape(b)[-1:] c = tf.reshape(c, shape=shape) c = tf.reduce_sum(c, axis=-2) return c
def space_to_batch_nd_v1(inputs, kernel_size, strides, name=None): """ for convCapsNet model: memory 4719M, speed 0.169 sec/step """ name = "space_to_batch_nd" if name is None else name with tf.name_scope(name): height, width, depth = cl.shape(inputs)[1:4] h_offsets = [[(h + k) for k in range(0, kernel_size[0])] for h in range(0, height + 1 - kernel_size[0], strides[0])] w_offsets = [[(w + k) for k in range(0, kernel_size[1])] for w in range(0, width + 1 - kernel_size[1], strides[1])] d_offsets = [[(d + k) for k in range(0, kernel_size[2])] for d in range(0, depth + 1 - kernel_size[2], strides[2])] patched = tf.gather(inputs, h_offsets, axis=1) patched = tf.gather(patched, w_offsets, axis=3) patched = tf.gather(patched, d_offsets, axis=5) if len(patched.shape) == 7: perm = [0, 1, 3, 5, 2, 4, 6] else: perm = [0, 1, 3, 5, 2, 4, 6, 7, 8] patched = tf.transpose(patched, perm=perm) shape = cl.shape(patched) if depth == kernel_size[2]: # for conv2d shape = shape[:3] + [np.prod(shape[3:-2])] + shape[-2:] if len(patched.shape) == 9 else shape[:3] + [np.prod(shape[3:])] else: # for conv3d shape = shape[:4] + [np.prod(shape[4:-2])] + shape[-2:] if len(patched.shape) == 9 else shape[:4] + [np.prod(shape[4:])] patched = tf.reshape(patched, shape=shape) return patched
def DCCN2(patch, spectrum, k, output): pt = tf.layers.conv2d(patch, filters=50, kernel_size=3, strides=1, padding="same", activation=tf.nn.relu) pt = tf.layers.max_pooling2d(pt, 2, strides=2, padding="same") pt = tf.nn.dropout(pt, k) pt, ptAct = cl.layers.primaryCaps(pt, filters=64, kernel_size=3, strides=1, out_caps_dims=[8, 1], method="logistic", name="pt") ptNumInput = np.prod(cl.shape(pt)[1:4]) pt = tf.reshape(pt, shape=[-1, ptNumInput, 8, 1]) ptAct = tf.reshape(ptAct, shape=[-1, ptNumInput]) sp = tf.layers.conv1d(spectrum, filters=30, kernel_size=7, strides=1, padding="valid", activation=tf.nn.relu) sp = tf.layers.max_pooling1d(sp, 7, strides=2, padding="valid") sp = tf.nn.dropout(sp, k) # print(sp.shape,"******************************") sp = tf.reshape(sp, [-1, sp.shape[1], 1, sp.shape[2]]) sp, spAct = cl.layers.primaryCaps(sp, filters=64, kernel_size=(3, 1), strides=1, out_caps_dims=[8, 1], method="logistic", name="sp") spNumInput = np.prod(cl.shape(sp)[1:4]) sp = tf.reshape(sp, shape=[-1, spNumInput, 8, 1]) spAct = tf.reshape(spAct, shape=[-1, spNumInput]) net = tf.concat([pt, sp], 1) act = tf.concat([ptAct, spAct], 1) net, act = cl.layers.dense(net, act, num_outputs=output, out_caps_dims=[16, 1], routing_method="DynamicRouting") return act
def caps_net_3(x): net = tf.reshape(x, [-1, patch_size, patch_size, num_band]) conv1 = tf.layers.conv2d(net, filters=100, kernel_size=3, strides=1, padding="valid", activation=tf.nn.relu, name="convLayer") conv1 = tf.layers.max_pooling2d(conv1, 2, strides=2, padding="same") convCaps, activation = cl.layers.primaryCaps(conv1, filters=300, kernel_size=3, strides=1, out_caps_dims=[8, 1], method="logistic") n_input = np.prod(cl.shape(convCaps)[1:4]) convCaps = tf.reshape(convCaps, shape=[-1, n_input, 8, 1]) activation = tf.reshape(activation, shape=[-1, n_input]) rt_poses, rt_probs = cl.layers.dense(convCaps, activation, num_outputs=num_classes, out_caps_dims=[16, 1], routing_method="DynamicRouting") return rt_probs
def CapsNet(net, output): conv1 = tf.layers.conv2d(net, filters=100, kernel_size=3, strides=1, padding="same", activation=tf.nn.relu, name="convLayer") conv1 = tf.layers.max_pooling2d(conv1, 2, strides=2, padding="same") conv2 = tf.layers.conv2d(conv1, filters=300, kernel_size=3, padding="same", activation=tf.nn.relu) conv2 = tf.layers.max_pooling2d(conv2, 2, strides=2, padding="same") convCaps, activation = cl.layers.primaryCaps(conv2, filters=64, kernel_size=3, strides=1, out_caps_dims=[8, 1], method="logistic") n_input = np.prod(cl.shape(convCaps)[1:4]) convCaps = tf.reshape(convCaps, shape=[-1, n_input, 8, 1]) activation = tf.reshape(activation, shape=[-1, n_input]) rt_poses, rt_probs = cl.layers.dense(convCaps, activation, num_outputs=output, out_caps_dims=[16, 1], routing_method="DynamicRouting") # print(rt_probs.shape) return rt_probs
def dynamicRouting(self, votes): """ Dynamic routing algorithm. See [Sabour et al., 2017](https://arxiv.org/abs/1710.09829). Args: votes: A 5-D or 7-D tensor with shape [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims. Returns: poses: A 4-D or 6-D tensor. probs: A 2-D or 4-D tensor. """ vote_shape = cl.shape(votes) logit_shape = vote_shape[:-2] + [1, 1] logits = tf.fill(logit_shape, 0.0) squash_on = -2 if vote_shape[-1] == 1 else [-2, -1] def _body(i, logits, poses): if self.leaky: route = _leaky_routing(logits) else: route = tf.nn.softmax(logits, axis=-3) if self.use_bias: preactivate = cl.reduce_sum( route * votes, axis=-4, keepdims=True) + self.biases else: preactivate = cl.reduce_sum(route * votes, axis=-4, keepdims=True) pose = cl.ops.squash(preactivate, axis=squash_on) poses = poses.write(i, pose) if vote_shape[-1] == 1: distances = cl.matmul(votes, pose, transpose_a=True) else: diff = votes - pose distances = tf.linalg.trace(cl.matmul(diff, diff))[..., tf.newaxis, tf.newaxis] logits += distances return (i + 1, logits, poses) poses = tf.TensorArray(dtype=tf.float32, size=self.num_iter, clear_after_read=False) i = tf.constant(0, dtype=tf.int32) _, logits, poses = tf.while_loop( lambda i, logits, poses: i < self.num_iter, _body, loop_vars=[i, logits, poses], swap_memory=True) poses = tf.squeeze(poses.read(self.num_iter - 1), axis=-4) probs = tf.norm(poses, axis=[-2, -1]) return poses, probs
def transforming(inputs, num_outputs, out_caps_dims, share, transform, identity=None, identity_dim=None, name=None): """ Args: inputs: A 4-D or 6-D tensor, [batch_size, num_inputs] + in_caps_dims or [batch_size, height, width, channels] + in_caps_dims. num_outputs: Integer, the number of output capsules. out_caps_dims: A list of 2 integers. The dimensions of output capsule, e.g. out_caps_dims=[4, 4]. name: String, a name for this operation. Returns: votes: A 5-D or 7-D tensor, [batch_size, num_inputs, num_outputs] + out_caps_dims or [batch_size, height, width, channels, num_outputs] + out_caps_dims. """ name = "transforming" if name is None else name with tf.variable_scope(name) as scope: input_shape = cl.shape(inputs) prefix_shape = [1 for i in range(len(input_shape) - 3) ] + input_shape[-3:-2] + [num_outputs] prefix_shape[1] = 1 in_caps_dims = input_shape[-2:] # if share is True: # shape = prefix_shape + [1, out_caps_dims[0], 1] # else: shape = prefix_shape + [in_caps_dims[0], out_caps_dims[0], 1] expand_axis = -2 reduce_sum_axis = -3 in_pose = tf.expand_dims(inputs, axis=-3) ones = tf.ones(shape=prefix_shape + [1, 1]) in_pose = tf.expand_dims(in_pose * ones, axis=expand_axis) transform_mat = tf.get_variable("transformation_matrix", shape=shape) bias = tf.get_variable('transformation_bias', shape=[num_outputs] + out_caps_dims) if transform: votes = tf.reduce_sum(in_pose * transform_mat, axis=reduce_sum_axis) votes += bias else: votes = in_pose + bias[:, :, :, None] votes = tf.reduce_sum(in_pose, axis=-1) if identity is not None: dim = shape[-2] for n, i in enumerate(identity): bias_mat = tf.get_variable('transformation_bias' + str(n), shape=[identity_dim, dim]) bias = tf.matmul(i, bias_mat)[:, None, None, :, None] votes += bias return votes
def dynamicRouting_v1(votes, num_routing=3, use_bias=True, leaky=True): """ Dynamic routing algorithm. See [Sabour et al., 2017](https://arxiv.org/abs/1710.09829). Args: votes: A 5-D or 7-D tensor with shape [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims. num_routing: Integer, number of routing iterations. use_bias: Boolean, whether the layer uses a bias. leaky: Boolean, whether the algorithm uses leaky routing. Returns: poses: A 4-D or 6-D tensor. probs: A 2-D or 4-D tensor. """ vote_shape = cl.shape(votes) logit_shape = vote_shape[:-2] + [1, 1] logits = tf.fill(logit_shape, 0.0) squash_on = -2 if vote_shape[-1] == 1 else [-2, -1] if use_bias: bias_shape = [1 for i in range(len(vote_shape) - 3)] + vote_shape[-3:] biases = tf.get_variable("biases", bias_shape, initializer=tf.constant_initializer(0.1), dtype=tf.float32) vote_stopped = tf.stop_gradient(votes, name="stop_gradient") for i in range(num_routing): with tf.variable_scope("iter_" + str(i)): if leaky: route = _leaky_routing(logits) else: route = cl.softmax(logits, axis=-3) if i == num_routing - 1: if use_bias: preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True) + biases else: preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True) poses = cl.ops.squash(preactivate, axis=squash_on) else: if use_bias: preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True) + biases else: preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True) poses = cl.ops.squash(preactivate, axis=squash_on) logits += cl.reduce_sum(vote_stopped * poses, axis=-4, keepdims=True) poses = tf.squeeze(poses, axis=-4) probs = tf.norm(poses, axis=(-2, -1)) return(poses, probs)
def primaryCaps(inputs, filters, kernel_size, strides, out_caps_dims, method=None, name=None): '''Primary capsule layer. Args: inputs: [batch_size, in_height, in_width, in_channels]. filters: Integer, the dimensionality of the output space. kernel_size: kernel_size strides: strides out_caps_dims: A list of 2 integers. method: the method of calculating probability of entity existence(logistic, norm, None) Returns: pose: A 6-D tensor, [batch_size, out_height, out_width, filters] + out_caps_dims activation: A 4-D tensor, [batch_size, out_height, out_width, filters] ''' name = "primary_capsule" if name is None else name with tf.variable_scope(name): channels = filters * np.prod(out_caps_dims) channels = channels + filters if method == "logistic" else channels pose = tf.layers.conv2d(inputs, channels, kernel_size=kernel_size, strides=strides, activation=None) shape = cl.shape(pose, name="get_pose_shape") batch_size = shape[0] height = shape[1] width = shape[2] shape = [batch_size, height, width, filters] + out_caps_dims if method == 'logistic': # logistic activation unit pose, activation_logit = tf.split(pose, [channels - filters, filters], axis=-1) pose = tf.reshape(pose, shape=shape) activation = tf.sigmoid(activation_logit) elif method == 'norm' or method is None: pose = tf.reshape(pose, shape=shape) squash_on = -2 if out_caps_dims[-1] == 1 else [-2, -1] pose = cl.ops.squash(pose, axis=squash_on) activation = cl.norm(pose, axis=(-2, -1)) activation = tf.clip_by_value(activation, 1e-20, 1. - 1e-20) return (pose, activation)
def EMRouting(self, votes, activation): """ Args: votes: [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims. activation: [batch_size, ..., in_channels/num_inputs] Returns: pose: [batch_size, num_outputs] + out_caps_dims activation: [batch_size, num_outputs] """ vote_shape = cl.shape(votes) num_outputs = vote_shape[-3] out_caps_dims = vote_shape[-2:] shape = vote_shape[:-2] + [np.prod(out_caps_dims)] votes = tf.reshape(votes, shape=shape) activation = activation[..., tf.newaxis, tf.newaxis] log_activation = tf.math.log(activation) log_R = tf.math.log(tf.fill(vote_shape[:-2] + [1], 1.) / num_outputs) lambda_min = 0.001 lambda_max = 0.006 for t_iter in range(self.num_iter): # increase inverse temperature linearly: lambda = k * t_iter + lambda_min # let k = (lambda_max - lambda_min) / self.num_iter # TODO: search for the better lambda_min and lambda_max inverse_temperature = lambda_min + \ (lambda_max - lambda_min) * t_iter / max(1.0, self.num_iter) with tf.name_scope('M-STEP') as scope: # if t_iter > 0: # scope.reuse_variables() pose, log_var, log_activation_prime = self.M_step( log_R, log_activation, votes, lambda_val=inverse_temperature) # It's no need to do the `E-STEP` in the last iteration if t_iter == self.num_iter - 1: break with tf.name_scope('E-STEP'): log_R = self.E_step(pose, log_var, log_activation_prime, votes) pose = tf.reshape(pose, shape=vote_shape[:-4] + [num_outputs] + out_caps_dims) activation = tf.reshape(tf.exp(log_activation_prime), shape=vote_shape[:-4] + [num_outputs]) return pose, activation
def transforming(inputs, num_outputs, out_caps_dims, name=None): """ Args: inputs: A 4-D or 6-D tensor, [batch_size, num_inputs] + in_caps_dims or [batch_size, height, width, channels] + in_caps_dims. num_outputs: Integer, the number of output capsules. out_caps_dims: A list of 2 integers. The dimensions of output capsule, e.g. out_caps_dims=[4, 4]. name: String, a name for this operation. Returns: votes: A 5-D or 7-D tensor, [batch_size, num_inputs, num_outputs] + out_caps_dims or [batch_size, height, width, channels, num_outputs] + out_caps_dims. """ name = "transforming" if name is None else name with tf.name_scope(name) as scope: input_shape = cl.shape(inputs) prefix_shape = [1 for i in range(len(input_shape) - 3) ] + input_shape[-3:-2] + [num_outputs] in_caps_dims = input_shape[-2:] if in_caps_dims[0] == out_caps_dims[1]: shape = prefix_shape + [out_caps_dims[0], 1, in_caps_dims[1]] expand_axis = -3 reduce_sum_axis = -1 elif in_caps_dims[1] == out_caps_dims[0]: shape = prefix_shape + [in_caps_dims[0], 1, out_caps_dims[1]] expand_axis = -1 reduce_sum_axis = -3 elif in_caps_dims[0] == out_caps_dims[0]: shape = prefix_shape + [1, out_caps_dims[1], in_caps_dims[1]] expand_axis = -2 reduce_sum_axis = -1 elif in_caps_dims[1] == out_caps_dims[1]: shape = prefix_shape + [in_caps_dims[0], out_caps_dims[0], 1] expand_axis = -2 reduce_sum_axis = -3 else: raise TypeError( "out_caps_dims must have at least one value being the same with the in_caps_dims" ) in_pose = tf.expand_dims(inputs, axis=-3) ones = tf.ones(shape=prefix_shape + [1, 1]) in_pose = tf.expand_dims(in_pose * ones, axis=expand_axis) transform_mat = tf.Variable( initial_value=tf.random_uniform_initializer()(shape=shape), name="transformation_matrix", shape=shape) votes = tf.reduce_sum(in_pose * transform_mat, axis=reduce_sum_axis) return votes
def forward(self, inputs): """ Computes the probs and poses Args: inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels]. Returns: poses: [batch_size, num_label, 16, 1]. probs: Tensor with shape [batch_size, num_label], the probability of entity presence. """ x = inputs x = tf.reshape(x, shape=[-1, self.height, self.width, self.channels]) x = self.conv1(x) x, activation = self.primaryCaps(x) num_inputs = np.prod(cl.shape(x)[1:4]) x = tf.reshape(x, shape=[-1, num_inputs, 8, 1]) activation = tf.reshape(activation, shape=[-1, num_inputs]) poses, probs = self.denseCaps((x, activation)) cl.summary.histogram('activation', probs, verbose=cfg.summary_verbose) return poses, probs
def space_to_batch_nd(input, kernel_size, strides, name=None): """ Space to batch with strides. Different to tf.space_to_batch_nd. for convCapsNet model: memory 4729M, speed 0.165 sec/step, similiar to space_to_batch_nd_v1 Args: input: A Tensor. N-D with shape input_shape = [batch] + spatial_shape + remaining_shape, where spatial_shape has M dimensions. kernel_size: A sequence of len(spatial_shape)-D positive integers specifying the spatial dimensions of the filters. strides: A sequence of len(spatial_shape)-D positive integers specifying the stride at which to compute output. Returns: A Tensor. """ assert len(kernel_size) == 3 assert len(strides) == 3 name = "space_to_batch_nd" if name is None else name with tf.name_scope(name): input_shape = cl.shape(input) h_steps = int((input_shape[1] - kernel_size[0]) / strides[0] + 1) w_steps = int((input_shape[2] - kernel_size[1]) / strides[1] + 1) d_steps = int((input_shape[3] - kernel_size[2]) / strides[2] + 1) blocks = [] # each element with shape [batch, h_kernel_size * w_kernel_size * d_kernel_size] + remaining_shape for d in range(d_steps): d_s = d * strides[2] d_e = d_s + kernel_size[2] h_blocks = [] for h in range(h_steps): h_s = h * strides[0] h_e = h_s + kernel_size[0] w_blocks = [] for w in range(w_steps): w_s = w * strides[1] w_e = w_s + kernel_size[1] block = input[:, h_s:h_e, w_s:w_e, d_s:d_e] # block = tf.reshape(block, shape=[tf.shape(input)[0], np.prod(kernel_size)] + input_shape[4:]) w_blocks.append(block) h_blocks.append(tf.concat(w_blocks, axis=2)) blocks.append(tf.concat(h_blocks, axis=1)) return tf.concat(blocks, axis=0)
def call(self, inputs): pose = self.conv2d(inputs) shape = cl.shape(pose, name="get_pose_shape") batch_size = shape[0] height = shape[1] width = shape[2] shape = [batch_size, height, width, self.filters] + self.out_caps_dims if self.method == 'logistic': # logistic activation unit num_or_size_splits = [self.channels - self.filters, self.filters] pose, activation_logit = tf.split(pose, num_or_size_splits, axis=-1) pose = tf.reshape(pose, shape=shape) activation = tf.sigmoid(activation_logit) elif self.method == 'norm' or self.method is None: pose = tf.reshape(pose, shape=shape) squash_on = -2 if self.out_caps_dims[-1] == 1 else [-2, -1] pose = cl.ops.squash(pose, axis=squash_on) activation = cl.norm(pose, axis=(-2, -1)) activation = tf.clip_by_value(activation, 1e-20, 1. - 1e-20) return (pose, activation)
def _process_images(self, inputs, labels_one_hoted): """ Setup capsule network. Args: inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels]. labels: Tensor or array with shape [batch_size]. Returns: poses: [batch_size, num_label, 16, 1]. probs: Tensor with shape [batch_size, num_label], the probability of entity presence. """ with tf.name_scope("caps_net"): tf.summary.image("input_image", inputs, self.batch_size, family="input_image") with tf.name_scope('conv1_layer'): conv1 = tf.layers.conv2d(inputs, filters=256, kernel_size=9, strides=1, padding='valid', activation=tf.nn.relu) with tf.name_scope('primaryCaps_layer'): primary_caps, activation = cl.layers.primaryCaps( conv1, filters=32, kernel_size=9, strides=2, out_caps_dims=[8, 1]) with tf.name_scope('classCaps_layer'): # cl.shape(primary_caps) = [<tf.Tensor 'train/ClassCaps_layer/shape_1/strided_slice:0' shape=() dtype=int32>, 6, 6, 32, 8, 1] # cl.shape(primary_caps)[1:4] = [6, 6, 32] # num_inputs = 1152 num_inputs = np.prod(cl.shape(primary_caps)[1:4]) primary_caps = tf.reshape(primary_caps, shape=[-1, num_inputs, 8, 1]) activation = tf.reshape(activation, shape=[-1, num_inputs]) # The routing goes three times and return the result poses, probs = cl.layers.dense(primary_caps, activation, num_outputs=self.num_label, out_caps_dims=[16, 1], routing_method="DynamicRouting") with tf.name_scope('decoder'): masked_caps = tf.multiply(poses, labels_one_hoted) num_inputs = np.prod(masked_caps.get_shape().as_list()[1:]) active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs)) fc1 = tf.layers.dense(active_caps, units=512, activation=tf.nn.relu) fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu) num_outputs = self.height * self.width * self.channels recon_imgs = tf.layers.dense(fc2, units=num_outputs, activation=tf.sigmoid) imgs = tf.reshape( recon_imgs, shape=[-1, self.height, self.width, self.channels]) tf.summary.image("reconstructed_images", imgs, self.batch_size, family="reconstructed_images") return poses, probs, recon_imgs
def create_net(self): # # conv1: 24 filters / 5x5 kernel / 2x2 stride # conv1 = tf.contrib.layers.conv2d(self.x,24, 5, 2, activation_fn = tf.nn.relu) # # conv2: 36 filters / 5x5 kernel / 2x2 stride # conv2 = tf.contrib.layers.conv2d(conv1, 36, 5, 2, activation_fn = tf.nn.relu) # # conv1: 48 filters / 5x5 kernel / 2x2 stride # conv3 = tf.contrib.layers.conv2d(conv2, 48, 5, 2, activation_fn = tf.nn.relu) # # conv1: 24 filters / 3x3 kernel / 1x1 stride # conv4 = tf.contrib.layers.conv2d(conv3, 64, 3, 1, activation_fn = tf.nn.relu) # # conv1: 24 filters / 3x3 kernel / 1x1 stride # conv5 = tf.contrib.layers.conv2d(conv4, 64, 3, 1, activation_fn = tf.nn.relu) # # To extract features # self.conv5 = conv5 # # fully connected layers # flattened = tf.contrib.layers.flatten(conv5) # fc1 = tf.contrib.layers.fully_connected(flattened, 1164, activation_fn = tf.nn.relu) # fc2 = tf.contrib.layers.fully_connected(fc1, 100, activation_fn = tf.nn.relu) # fc3 = tf.contrib.layers.fully_connected(fc2, 50, activation_fn = tf.nn.relu) # fc4 = tf.contrib.layers.fully_connected(fc3, 10, activation_fn = tf.nn.relu) # self.y = tf.contrib.layers.fully_connected(fc4, 1, activation_fn = None) #Caps Net Testing # conv1: 24 filters / 5x5 kernel / 2x2 stride conv1 = tf.layers.conv2d(self.x, 24, 9, 2, padding="VALID", activation=tf.nn.relu) conv1 = tf.nn.dropout(conv1, 0.8) # conv2: 36 filters / 5x5 kernel / 2x2 stride conv2 = tf.layers.conv2d(conv1, 36, 5, 2, activation=tf.nn.relu) conv2 = tf.nn.dropout(conv2, 0.8) # conv1: 48 filters / 5x5 kernel / 2x2 stride conv3 = tf.layers.conv2d(conv2, 48, 5, 2, activation=tf.nn.relu) conv3 = tf.nn.dropout(conv3, 0.8) # conv1: 24 filters / 3x3 kernel / 1x1 stride conv4 = tf.layers.conv2d(conv3, 64, 3, 1, activation=tf.nn.relu) conv4 = tf.nn.dropout(conv4, 0.8) # conv1: 24 filters / 3x3 kernel / 1x1 stride conv5 = tf.layers.conv2d(conv4, 64, 3, 1, activation=tf.nn.relu) conv5 = tf.nn.dropout(conv5, 0.8) # conv3 = tf.contrib.layers.conv2d(conv2,48,5,2,activation_fn=tf.nn.relu) conv_caps1, conv_caps1_activation = cl.layers.primaryCaps( conv1, 64, 9, 2, [8, 1], method='norm') # conv_caps1_activation = tf.nn.dropout(conv_caps1_activation, 0.4) # tf.assign(conv_caps1[1:4],conv_caps1_activation) conv_caps2, conv_caps2_activation = cl.layers.conv2d( conv_caps1, conv_caps1_activation, 32, [8, 1], 9, 2, padding="valid", routing_method="EMRouting") # print ("conv2",conv_caps2) # print ("conv2a",conv_caps2_activation) # conv_caps3, conv_caps3_activation = cl.layers.conv2d(conv_caps2, conv_caps2_activation, 64, [4,1], 5, 2, padding="valid", name = "con22d", routing_method = "EMRouting" ) # conv_caps4, conv_caps4_activation = cl.layers.conv2d(conv_caps3, conv_caps3_activation, 64, [2,1], 2, 1, padding="valid", name = "con23d", routing_method = "EMRouting" ) # conv_caps3, conv_caps3_activation = cl.layers.primaryCaps(conv_caps1_activation,128,5,2,[32,1],method='norm') num_inputs = np.prod(cl.shape(conv_caps2)[1:4]) conv_caps2 = tf.reshape(conv_caps2, shape=[-1, num_inputs, 8, 1]) conv_caps2_activation = tf.reshape(conv_caps2_activation, shape=[-1, num_inputs]) poses1, probs1 = cl.layers.dense(conv_caps2, conv_caps2_activation, num_outputs=1024, out_caps_dims=[1, 1], routing_method="EMRouting") # num_inputs = np.prod(cl.shape(poses1)[1:4]) # poses1 = tf.reshape(poses1, shape=[-1, num_inputs, 16, 1]) # probs1 = tf.reshape(probs1, shape=[-1, num_inputs]) # poses2, probs2 = cl.layers.dense(poses1, # probs1, # num_outputs=1, # out_caps_dims=[1, 1], # routing_method="EMRouting", name = "dense2") # num_inputs = np.prod(cl.shape(poses2)[1:4]) # poses2 = tf.reshape(poses2, shape=[-1, num_inputs, 4, 1]) # probs2 = tf.reshape(probs2, shape=[-1, num_inputs]) # poses3, probs3 = cl.layers.dense(poses2, # probs2, # num_outputs=1, # out_caps_dims=[1, 1], # routing_method="EMRouting", name = "dense3") # num_inputs = np.prod(cl.shape(poses3)[1:4]) # poses3 = tf.reshape(poses3, shape=[-1, num_inputs, 2, 1]) # probs3 = tf.reshape(probs3, shape=[-1, num_inputs]) # poses4, probs4 = cl.layers.dense(poses3, # probs3, # num_outputs=1, # out_caps_dims=[4, 1], # routing_method="EMRouting", name = "dense4") # num_inputs = np.prod(cl.shape(poses1)[1:4]) # poses1 = tf.reshape(poses1, shape=[-1, num_inputs, num_inputs, 1]) # probs1 = tf.reshape(probs1, shape=[-1, num_inputs]) # poses2, probs2 = cl.layers.dense(poses1, # probs1, # num_outputs=1, # out_caps_dims=[4, 1], # routing_method="EMRouting") # print("O\P",probs2) self.y = (probs1) # self.y = tf.argmax(cl.softmax(probs4,axis=1), axis = 1) #Caps Net test end #Alex Net Testing # # 1st Layer: Conv (w ReLu) -> Lrn -> Pool # conv1 = conv(self.x, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1') # norm1 = lrn(conv1, 2, 1e-05, 0.75, name = 'norm1') # # pool1 = max_pool(norm1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1') # # 2nd Layer: Conv (w ReLu) -> Lrn -> Poolwith 2 groups # conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2') # norm2 = lrn(conv2, 2, 1e-05, 0.75, name = 'norm2') # # pool2 = max_pool(norm2, 3, 3, 2, 2, padding = 'VALID', name ='pool2') # # 3rd Layer: Conv (w ReLu) # conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3') # # 4th Layer: Conv (w ReLu) splitted into two groups # conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4') # # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups # conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5') # # pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5') # # 6th Layer: Flatten -> FC (w ReLu) -> Dropout # flattened = tf.reshape(conv5, [-1, 6*6*256]) # fc6 = fc(flattened, 6*6*256, 4096, name='fc6') # # dropout6 = dropout(fc6, self.KEEP_PROB) # # 7th Layer: FC (w ReLu) -> Dropout # fc7 = fc(fc6, 4096, 1164, relu = True, name = 'fc7') # fc8 = fc(fc7, 1164, 100, relu = True,name = 'fc8') # fc9 = fc(fc8, 100, 50, relu = True,name = 'fc9') # # fc10 = fc(fc6, 50, 10, relu = True,name = 'fc10') # # dropout7 = dropout(fc7, self.KEEP_PROB) # # 8th Layer: FC and return unscaled activations # # (for tf.nn.softmax_cross_entropy_with_logits) # self.y = fc(fc9, 50, num_out = 1, relu = True, name='fc11') #Alex Net Testing End #VGG Net Testing # conv1: 24 filters / 5x5 kernel / 2x2 stride # conv1 = tf.contrib.layers.conv2d(self.x,64, 3, 2, activation_fn = tf.nn.relu) # # conv2: 36 filters / 5x5 kernel / 2x2 stride # conv2 = tf.contrib.layers.conv2d(conv1, 64, 3, 2, activation_fn = tf.nn.relu) # # conv1: 48 filters / 5x5 kernel / 2x2 stride # conv3 = tf.contrib.layers.conv2d(conv2, 128, 3, 2, activation_fn = tf.nn.relu) # # conv1: 24 filters / 3x3 kernel / 1x1 stride # conv4 = tf.contrib.layers.conv2d(conv3, 128, 3, 1, activation_fn = tf.nn.relu) # # conv1: 24 filters / 3x3 kernel / 1x1 stride # conv5 = tf.contrib.layers.conv2d(conv4, 256, 3, 1, activation_fn = tf.nn.relu) # conv6 = tf.contrib.layers.conv2d(conv5, 256, 3, 1, activation_fn = tf.nn.relu) # conv7 = tf.contrib.layers.conv2d(conv6, 256, 3, 1, activation_fn = tf.nn.relu) # conv8 = tf.contrib.layers.conv2d(conv7, 512, 3, 1, activation_fn = tf.nn.relu) # conv9 = tf.contrib.layers.conv2d(conv8, 512, 3, 1, activation_fn = tf.nn.relu) # conv10 = tf.contrib.layers.conv2d(conv9, 512, 3, 1, activation_fn = tf.nn.relu) # conv11 = tf.contrib.layers.conv2d(conv10, 512, 3, 1, activation_fn = tf.nn.relu) # conv12 = tf.contrib.layers.conv2d(conv11, 512, 3, 1, activation_fn = tf.nn.relu) # conv13 = tf.contrib.layers.conv2d(conv12, 512, 3, 1, activation_fn = tf.nn.relu) # # To extract features # self.conv13 = conv13 # # fully connected layers # flattened = tf.contrib.layers.flatten(conv13) # # fc1 = tf.contrib.layers.fully_connected(flattened, 4096, activation_fn = tf.nn.relu) # fc2 = tf.contrib.layers.fully_connected(flattened, 1164, activation_fn = tf.nn.relu) # fc3 = tf.contrib.layers.fully_connected(fc2, 100, activation_fn = tf.nn.relu) # fc4 = tf.contrib.layers.fully_connected(fc3, 50, activation_fn = tf.nn.relu) # fc5 = tf.contrib.layers.fully_connected(fc4, 10, activation_fn = tf.nn.relu) # self.y = tf.contrib.layers.fully_connected(fc5, 1, activation_fn = None) # VGG Net End self.l1 = tf.reduce_mean(tf.abs(self.y_ - self.y)) # self.train_op = tf.train.AdamOptimizer(config.lr).minimize(self.l1) # globa_stp = tf.Variable(4, name='global_step', trainable= False) self.train_op = tf.train.AdamOptimizer().minimize(self.l1) #changed for CapsNet testing # self.train_op = tf.train.GradientDescentOptimizer(config.lr).minimize(self.l1) # Initialize self.sess.run(tf.global_variables_initializer())
def _leaky_routing(logits): leak_shape = cl.shape(logits) leak = tf.zeros(leak_shape[:-3] + [1, 1, 1]) leaky_logits = tf.concat([leak, logits], axis=-3) leaky_routing = cl.softmax(leaky_logits, axis=-3) return tf.split(leaky_routing, [1, leak_shape[-3]], axis=-3)[1]
def conv3d(inputs, activation, filters, out_caps_dims, kernel_size, strides, padding="valid", routing_method="EMRouting", name=None, reuse=None): """A 3D convolutional capsule layer. Args: inputs: A 7-D tensor with shape [batch_size, in_depth, in_height, in_width, in_channels] + in_caps_dims. activation: A 5-D tensor with shape [batch_size, in_depth, in_height, in_width, in_channels]. filters: Integer, the dimensionality of the output space (i.e. the number of filters in the convolution). out_caps_dims: A tuple/list of 2 integers, specifying the dimensions of output capsule, e.g. out_caps_dims=[4, 4] representing that each output capsule has shape [4, 4]. kernel_size: An integer or tuple/list of 3 integers, specifying the height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 3 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. padding: One of "valid" or "same" (case-insensitive), now only support "valid". routing_method: One of "EMRouting" or "DynamicRouting", the method of routing-by-agreement algorithm. name: String, a name for the operation (optional). reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: pose: A 7-D tensor with shape [batch_size, out_depth, out_height, out_width, out_channels] + out_caps_dims. activation: A 5-D tensor with shape [batch_size, out_depth, out_height, out_width, out_channels]. """ name = "conv1d" if name is None else name with tf.name_scope(name): input_shape = cl.shape(inputs) input_rank = len(input_shape) activation_rank = len(activation.shape) if input_rank != 7: raise ValueError('Inputs to `conv3d` should have rank 7. Received input rank:', str(input_rank)) if activation_rank != 5: raise ValueError('Activation to `conv3d` should have rank 5. Received input shape:', str(activation_rank)) if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size, kernel_size] elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 3: kernel_size = kernel_size else: raise ValueError('"kernel_size" should be an integer or tuple/list of 3 integers. Received:', str(kernel_size)) if isinstance(strides, int): strides = [strides, strides, strides] elif isinstance(strides, (list, tuple)) and len(strides) == 3: strides = strides else: raise ValueError('"strides" should be an integer or tuple/list of 3 integers. Received:', str(strides)) if not isinstance(out_caps_dims, (list, tuple)) or len(out_caps_dims) != 2: raise ValueError('"out_caps_dims" should be a tuple/list of 2 integers. Received:', str(out_caps_dims)) elif isinstance(out_caps_dims, tuple): out_caps_dims = list(out_caps_dims) # 1. space to batch batched = cl.space_to_batch_nd(inputs, kernel_size, strides) activation = cl.space_to_batch_nd(activation, kernel_size, strides) # 2. transforming vote = transforming(batched, num_outputs=filters, out_caps_dims=out_caps_dims) # 3. routing pose, activation = routing(vote, activation, method=routing_method) return pose, activation
def conv1d(inputs, activation, filters, out_caps_dims, kernel_size, stride, padding="valid", routing_method="EMRouting", name=None, reuse=None): """A 1D convolutional capsule layer (e.g. temporal convolution). Args: inputs: A 5-D tensor with shape [batch_size, in_width, in_channels] + in_caps_dims. activation: A 3-D tensor with shape [batch_size, in_width, in_channels]. kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, specifying the stride length of the convolution. Returns: pose: A 5-D tensor with shape [batch_size, out_width, out_channels] + out_caps_dims. activation: A 3-D tensor with shape [batch_size, out_width, out_channels]. """ name = "conv1d" if name is None else name with tf.variable_scope(name): input_shape = cl.shape(inputs) input_rank = len(input_shape) activation_rank = len(activation.shape) if input_rank != 5: raise ValueError('Inputs to `conv1d` should have rank 5. Received input rank:', str(input_rank)) if activation_rank != 3: raise ValueError('Activation to `conv1d` should have rank 3. Received input shape:', str(activation_rank)) if isinstance(kernel_size, int): kernel_size = [1, kernel_size] elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 1: kernel_size = [1, kernel_size[0]] else: raise ValueError('"kernel_size" should be an integer or tuple/list of 2 integers. Received:', str(kernel_size)) if isinstance(stride, int): strides = [1, stride] elif isinstance(stride, (list, tuple)) and len(stride) == 1: strides = [1, stride[0]] else: raise ValueError('"stride" should be an integer or tuple/list of a single integer. Received:', str(stride)) if not isinstance(out_caps_dims, (list, tuple)) or len(out_caps_dims) != 2: raise ValueError('"out_caps_dims" should be a tuple/list of 2 integers. Received:', str(out_caps_dims)) elif isinstance(out_caps_dims, tuple): out_caps_dims = list(out_caps_dims) inputs = tf.expand_dims(inputs, axis=1) activation = tf.expand_dims(activation, axis=1) pose, activation = conv2d(inputs, activation, filters=filters, out_caps_dims=out_caps_dims, kernel_size=kernel_size, strides=strides, padding=padding, routing_method=routing_method, name="convolution", reuse=reuse) pose = tf.squeeze(pose, axis=1) activation = tf.squeeze(activation, axis=1) return pose, activation
def create_network(self, inputs, labels): """ Setup capsule network. Args: inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels]. labels: Tensor or array with shape [batch_size]. Returns: poses: [batch_size, num_label, 16, 1]. probs: Tensor with shape [batch_size, num_label], the probability of entity presence. """ self.raw_imgs = inputs self.labels = labels with tf.variable_scope('Conv1_layer'): # Conv1, return with shape [batch_size, 20, 20, 256] inputs = tf.reshape( self.raw_imgs, shape=[-1, self.height, self.width, self.channels]) conv1 = tf.layers.conv2d(inputs, filters=256, kernel_size=9, strides=1, padding='VALID', activation=tf.nn.relu) with tf.variable_scope('PrimaryCaps_layer'): primaryCaps, activation = cl.layers.primaryCaps( conv1, filters=64, # MNIST 32 kernel_size=9, strides=2, out_caps_dims=[8, 1], method="norm") with tf.variable_scope('DigitCaps_layer'): routing_method = "EMRouting" num_inputs = np.prod(cl.shape(primaryCaps)[1:4]) primaryCaps = tf.reshape(primaryCaps, shape=[-1, num_inputs, 8, 1]) activation = tf.reshape(activation, shape=[-1, num_inputs]) self.poses, self.probs = cl.layers.dense( primaryCaps, activation, num_outputs=self.num_label, out_caps_dims=[16, 1], routing_method=routing_method) cl.summary.histogram('activation', self.probs, verbose=cfg.summary_verbose) # Decoder structure # Reconstructe the inputs with 3 FC layers with tf.variable_scope('Decoder'): labels = tf.one_hot(self.labels, depth=self.num_label, axis=-1, dtype=tf.float32) self.labels_one_hoted = tf.reshape(labels, (-1, self.num_label, 1, 1)) masked_caps = tf.multiply(self.poses, self.labels_one_hoted) num_inputs = np.prod(masked_caps.get_shape().as_list()[1:]) active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs)) fc1 = tf.layers.dense(active_caps, units=512, activation=tf.nn.relu) fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu) num_outputs = self.height * self.width * self.channels self.recon_imgs = tf.layers.dense(fc2, units=num_outputs, activation=tf.sigmoid) recon_imgs = tf.reshape( self.recon_imgs, shape=[-1, self.height, self.width, self.channels]) cl.summary.image('reconstruction_img', recon_imgs, verbose=cfg.summary_verbose) with tf.variable_scope('accuracy'): logits_idx = tf.to_int32( tf.argmax(cl.softmax(self.probs, axis=1), axis=1)) correct_prediction = tf.equal(tf.to_int32(self.labels), logits_idx) correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) self.accuracy = tf.reduce_mean( correct / tf.cast(tf.shape(self.probs)[0], tf.float32)) cl.summary.scalar('accuracy', self.accuracy, verbose=cfg.summary_verbose) return self.poses, self.probs
def conv2d(inputs, activation, filters, out_caps_dims, kernel_size, strides, padding="valid", routing_method="EMRouting", name=None, reuse=None): """A 2D convolutional capsule layer. Args: inputs: A 6-D tensor with shape [batch_size, in_height, in_width, in_channels] + in_caps_dims. activation: A 4-D tensor with shape [batch_size, in_height, in_width, in_channels]. filters: Integer, the dimensionality of the output space (i.e. the number of filters in the convolution). out_caps_dims: A tuple/list of 2 integers, specifying the dimensions of output capsule, e.g. out_caps_dims=[4, 4] representing that each output capsule has shape [4, 4]. kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width. Can be a single integer to specify the same value for all spatial dimensions. padding: One of "valid" or "same" (case-insensitive), now only support "valid". routing_method: One of "EMRouting" or "DynamicRouting", the method of routing-by-agreement algorithm. name: A string, the name of the layer. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: pose: A 6-D tensor with shape [batch_size, out_height, out_width, out_channels] + out_caps_dims. activation: A 4-D tensor with shape [batch_size, out_height, out_width, out_channels]. """ name = "conv2d" if name is None else name with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() input_shape = cl.shape(inputs) input_rank = len(input_shape) activation_rank = len(activation.shape) if not input_rank == 6: raise ValueError('Inputs to `conv2d` should have rank 6. Received inputs rank:', str(input_rank)) if not activation_rank == 4: raise ValueError('Activation to `conv2d` should have rank 4. Received activation rank:', str(activation_rank)) if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size, input_shape[3]] elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 2: kernel_size = [kernel_size[0], kernel_size[1], input_shape[3]] else: raise ValueError('"kernel_size" should be an integer or tuple/list of 2 integers. Received:', str(kernel_size)) if isinstance(strides, int): strides = [strides, strides, 1] elif isinstance(strides, (list, tuple)) and len(strides) == 2: strides = [strides[0], strides[1], 1] else: raise ValueError('"strides" should be an integer or tuple/list of 2 integers. Received:', str(kernel_size)) if not isinstance(out_caps_dims, (list, tuple)) or len(out_caps_dims) != 2: raise ValueError('"out_caps_dims" should be a tuple/list of 2 integers. Received:', str(out_caps_dims)) elif isinstance(out_caps_dims, tuple): out_caps_dims = list(out_caps_dims) # 1. space to batch # patching everything into [batch_size, out_height, out_width, in_channels] + in_caps_dims (batched) # and [batch_size, out_height, out_width, in_channels] (activation). batched = cl.space_to_batch_nd(inputs, kernel_size, strides) activation = cl.space_to_batch_nd(activation, kernel_size, strides) # 2. transforming # transforming to [batch_size, out_height, out_width, in_channels, out_channels/filters] + out_caps_dims vote = transforming(batched, num_outputs=filters, out_caps_dims=out_caps_dims) # 3. routing pose, activation = routing(vote, activation, method=routing_method) return pose, activation
def dynamicRouting(votes, num_routing=3, use_bias=True, leaky=True): """ Dynamic routing algorithm. See [Sabour et al., 2017](https://arxiv.org/abs/1710.09829). Args: votes: A 5-D or 7-D tensor with shape [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims. num_routing: Integer, number of routing iterations. use_bias: Boolean, whether the layer uses a bias. leaky: Boolean, whether the algorithm uses leaky routing. Returns: poses: A 4-D or 6-D tensor. probs: A 2-D or 4-D tensor. """ vote_shape = cl.shape(votes) logit_shape = vote_shape[:-2] + [1, 1] logits = tf.fill(logit_shape, 0.0) squash_on = -2 if vote_shape[-1] == 1 else [-2, -1] if use_bias: bias_shape = [1 for i in range(len(vote_shape) - 3)] + vote_shape[-3:] biases = tf.Variable(initial_value=tf.constant_initializer(0.1), name="biases", shape=bias_shape, dtype=tf.float32) def _body(i, logits, poses): if leaky: route = _leaky_routing(logits) else: route = tf.nn.softmax(logits, axis=-3) if use_bias: preactivate = cl.reduce_sum(route * votes, axis=-4, keepdims=True) + biases else: preactivate = cl.reduce_sum(route * votes, axis=-4, keepdims=True) pose = cl.ops.squash(preactivate, axis=squash_on) poses = poses.write(i, pose) if vote_shape[-1] == 1: distances = cl.matmul(votes, pose, transpose_a=True) else: diff = votes - pose distances = tf.linalg.trace(cl.matmul(diff, diff))[..., tf.newaxis, tf.newaxis] logits += distances return (i + 1, logits, poses) poses = tf.TensorArray(dtype=tf.float32, size=num_routing, clear_after_read=False) i = tf.constant(0, dtype=tf.int32) _, logits, poses = tf.while_loop(lambda i, logits, poses: i < num_routing, _body, loop_vars=[i, logits, poses], swap_memory=True) poses = tf.squeeze(poses.read(num_routing - 1), axis=-4) probs = tf.norm(poses, axis=[-2, -1]) return poses, probs
def batch_to_space_nd(input, spatial_shape, name=None): name = "batch_to_space_nd" if name is None else name with tf.name_scope(name): input_shape = cl.shape(input) shape = [-1] + spatial_shape + input_shape[1:] return tf.reshape(input, shape=shape)
def call(self, inputs): inputs, activation = inputs if self.coordinate_addition and len(inputs.shape) == 6 and len( activation.shape) == 4: vote = self.transforming(inputs) batch_size, in_height, in_width, in_channels, _, out_caps_height, out_caps_width = cl.shape( vote) num_inputs = in_height * in_width * in_channels zeros = np.zeros((in_height, out_caps_width - 1)) coord_offset_h = ((np.arange(in_height) + 0.5) / in_height).reshape([in_height, 1]) coord_offset_h = np.concatenate([zeros, coord_offset_h], axis=-1) zeros = np.zeros((out_caps_height - 1, out_caps_width)) coord_offset_h = np.stack([ np.concatenate([coord_offset_h[i:(i + 1), :], zeros], axis=0) for i in range(in_height) ], axis=0) coord_offset_h = coord_offset_h.reshape( (1, in_height, 1, 1, 1, out_caps_height, out_caps_width)) zeros = np.zeros((1, in_width)) coord_offset_w = ((np.arange(in_width) + 0.5) / in_width).reshape( [1, in_width]) coord_offset_w = np.concatenate( [zeros, coord_offset_w, zeros, zeros], axis=0) zeros = np.zeros((out_caps_height, out_caps_width - 1)) coord_offset_w = np.stack([ np.concatenate([zeros, coord_offset_w[:, i:(i + 1)]], axis=1) for i in range(in_width) ], axis=0) coord_offset_w = coord_offset_w.reshape( (1, 1, in_width, 1, 1, out_caps_height, out_caps_width)) vote = vote + tf.constant(coord_offset_h + coord_offset_w, dtype=tf.float32) vote = tf.reshape( vote, shape=[batch_size, num_inputs, self.num_outputs] + self.out_caps_dims) activation = tf.reshape(activation, shape=[batch_size, num_inputs]) elif len(inputs.shape) == 4 and len(activation.shape) == 2: vote = self.transforming(inputs) else: raise TypeError("Wrong rank for inputs or activation") pose, activation = self.routing(vote, activation) assert len(pose.shape) == 4 assert len(activation.shape) == 2 return (pose, activation)
def dense(inputs, activation, num_outputs, out_caps_dims, routing_method='EMRouting', routing_iter=3, coordinate_addition=False, reuse=None, name=None): """A fully connected capsule layer. Args: inputs: A 4-D tensor with shape [batch_size, num_inputs] + in_caps_dims or [batch_size, in_height, in_width, in_channels] + in_caps_dims activation: [batch_size, num_inputs] or [batch_size, in_height, in_width, in_channels] num_outputs: Integer, the number of output capsules in the layer. out_caps_dims: A list with two elements, pose shape of output capsules. routing_iter: Number of iterations during routing algorithm. Returns: pose: A 4-D tensor with shape [batch_size, num_outputs] + out_caps_dims activation: [batch_size, num_outputs] """ name = "dense" if name is None else name with tf.name_scope(name) as scope: if reuse: scope.reuse() if coordinate_addition and len(inputs.shape) == 6 and len(activation.shape) == 4: vote = transforming(inputs, num_outputs=num_outputs, out_caps_dims=out_caps_dims) with tf.name_scope("coodinate_addition"): batch_size, in_height, in_width, in_channels, _, out_caps_height, out_caps_width = cl.shape(vote) num_inputs = in_height * in_width * in_channels zeros = np.zeros((in_height, out_caps_width - 1)) coord_offset_h = ((np.arange(in_height) + 0.5) / in_height).reshape([in_height, 1]) coord_offset_h = np.concatenate([zeros, coord_offset_h], axis=-1) zeros = np.zeros((out_caps_height - 1, out_caps_width)) coord_offset_h = np.stack([np.concatenate([coord_offset_h[i:(i + 1), :], zeros], axis=0) for i in range(in_height)], axis=0) coord_offset_h = coord_offset_h.reshape((1, in_height, 1, 1, 1, out_caps_height, out_caps_width)) zeros = np.zeros((1, in_width)) coord_offset_w = ((np.arange(in_width) + 0.5) / in_width).reshape([1, in_width]) coord_offset_w = np.concatenate([zeros, coord_offset_w, zeros, zeros], axis=0) zeros = np.zeros((out_caps_height, out_caps_width - 1)) coord_offset_w = np.stack([np.concatenate([zeros, coord_offset_w[:, i:(i + 1)]], axis=1) for i in range(in_width)], axis=0) coord_offset_w = coord_offset_w.reshape((1, 1, in_width, 1, 1, out_caps_height, out_caps_width)) vote = vote + tf.constant(coord_offset_h + coord_offset_w, dtype=tf.float32) vote = tf.reshape(vote, shape=[batch_size, num_inputs, num_outputs] + out_caps_dims) activation = tf.reshape(activation, shape=[batch_size, num_inputs]) elif len(inputs.shape) == 4 and len(activation.shape) == 2: vote = transforming(inputs, num_outputs=num_outputs, out_caps_dims=out_caps_dims) else: raise TypeError("Wrong rank for inputs or activation") pose, activation = routing(vote, activation, routing_method, num_iter=routing_iter) # pose, activation = cl.core.gluing(vote, activation) assert len(pose.shape) == 4 assert len(activation.shape) == 2 return(pose, activation)
"kernel_size": 9, "strides": 1, "padding": "VALID", "activation": tf.nn.relu } net = tf.layers.conv2d(x, **conv_args) primary_args = { "filters": 32, "kernel_size": 9, "strides": 2, "out_caps_dims": [8, 1] } net, activation = cl.layers.primaryCaps(net, **primary_args) num_caps = np.prod(cl.shape(net)[1:4]) net = tf.reshape(net, shape=[-1, num_caps, 8, 1]) activation = tf.reshape(activation, shape=[-1, num_caps]) digit_args = { "num_outputs": n_classes, "out_caps_dims": [16, 1], "routing_method": "EMRouting" } pose, prob = cl.layers.dense(net, activation, **digit_args) prob_argmax=tf.nn.softmax(prob) T=tf.one_hot(y,depth=10) margin_loss = cl.losses.margin_loss(T, prob) cross_entropy=tf.nn.softmax_cross_entropy_with_logits(labels=T,logits=prob)