コード例 #1
0
def fully_connected(inputs,
                    activation,
                    num_outputs,
                    out_caps_shape,
                    routing_method='EMRouting',
                    reuse=None):
    '''A capsule fully connected layer.
    Args:
        inputs: A tensor with shape [batch_size, num_inputs] + in_caps_shape.
        activation: [batch_size, num_inputs]
        num_outputs: Integer, the number of output capsules in the layer.
        out_caps_shape: A list with two elements, pose shape of output capsules.
    Returns:
        pose: [batch_size, num_outputs] + out_caps_shape
        activation: [batch_size, num_outputs]
    '''
    in_pose_shape = inputs.get_shape().as_list()
    # it confuses me a lot. please refer to https://blog.csdn.net/loseinvain/article/details/78762739
    # in_pose_shape=tf.shape(inputs) # dynamic shape, i can not get its shape without feed_dict
    # num_inputs=in_pose_shape.eval()[1]
    num_inputs = 24 * 24 * 32  # 18432
    # with tf.Session() as sess:
    #     num_outputs=sess.run(tf.shape(inputs))
    # num_inputs=tf.unstack(tf.shape(num_outputs))[1]
    batch_size = in_pose_shape[0]
    T_size = get_transformation_matrix_shape(in_pose_shape[-2:],
                                             out_caps_shape)
    T_shape = [1, num_inputs, num_outputs] + T_size
    # T_matrix = tf.get_variable('transformation_matrix', shape=T_shape)
    T_matrix = tf.get_variable(name='transformation_matrix', shape=T_shape)
    T_matrix = tf.tile(T_matrix, [batch_size, 1, 1, 1, 1])
    inputs = tf.tile(tf.expand_dims(inputs, axis=2), [1, 1, num_outputs, 1, 1])
    with tf.variable_scope('transformation'):
        # vote: [batch_size, num_inputs, num_outputs] + out_caps_shape
        vote = tf.matmul(T_matrix, inputs)
    with tf.variable_scope('routing'):
        if routing_method == 'EMRouting':
            shape = [-1] + activation.get_shape().as_list()[1:] + [1, 1]
            activation = tf.reshape(activation, shape=shape)
            vote = tf.reshape(vote,
                              shape=[batch_size, num_inputs, num_outputs, -1])
            pose, activation = routing(vote, activation, num_outputs,
                                       out_caps_shape, routing_method)
            pose = tf.reshape(pose,
                              shape=[batch_size, num_outputs] + out_caps_shape)
            activation = tf.reshape(activation, shape=[batch_size, -1])
        elif routing_method == 'DynamicRouting':
            pose, _ = routing(vote,
                              activation,
                              num_outputs=num_outputs,
                              out_caps_shape=out_caps_shape,
                              method=routing_method)
            pose = tf.squeeze(pose, axis=1)
            activation = tf.squeeze(euclidean_norm(pose))
    return (pose, activation)
コード例 #2
0
ファイル: layers.py プロジェクト: 2018OCR/OCR
def fully_connected(inputs,
                    activation,
                    num_outputs,
                    out_caps_shape,
                    routing_method='DynamicRouting',
                    reuse=None):
    """
    A capsule fully connected layer.
    Args:
        inputs: A tensor with shape [batch_size, num_inputs] + in_caps_shape.
        activation: [batch_size, num_inputs]
        num_outputs: Integer, the number of output capsules in the layer.
        out_caps_shape: A list with two elements, pose shape of output capsules.
    Returns:
        pose: [batch_size, num_outputs] + out_caps_shape
        activation: [batch_size, num_outputs]
    """
    in_pose_shape = inputs.get_shape().as_list()
    num_inputs = in_pose_shape[1]
    batch_size = in_pose_shape[0]
    T_size = get_transformation_matrix_shape(in_pose_shape[-2:],
                                             out_caps_shape)
    T_shape = [1, num_inputs, num_outputs] + T_size
    T_matrix = tf.get_variable("transformation_matrix", shape=T_shape)
    T_matrix = tf.tile(T_matrix, [batch_size, 1, 1, 1, 1])
    inputs = tf.tile(tf.expand_dims(inputs, axis=2), [1, 1, num_outputs, 1, 1])
    with tf.variable_scope('transformation'):
        # vote: [batch_size, num_inputs, num_outputs] + out_caps_shape
        vote = tf.matmul(T_matrix, inputs)
    with tf.variable_scope('routing'):
        if routing_method == 'EMRouting':
            activation = tf.reshape(activation,
                                    shape=activation.get_shape().as_list() +
                                    [1, 1])
            vote = tf.reshape(vote,
                              shape=[batch_size, num_inputs, num_outputs, -1])
            pose, activation = routing(vote, activation, num_outputs,
                                       out_caps_shape, routing_method)
            pose = tf.reshape(pose,
                              shape=[batch_size, num_outputs] + out_caps_shape)
            activation = tf.reshape(activation, shape=[batch_size, -1])
        elif routing_method == 'DynamicRouting':
            pose, _ = routing(vote,
                              activation,
                              num_outputs=num_outputs,
                              out_caps_shape=out_caps_shape,
                              method=routing_method)
            pose = tf.squeeze(pose, axis=1)
            activation = tf.squeeze(euclidean_norm(pose))
    return pose, activation
コード例 #3
0
def conv2d(in_pose,
           activation,
           filters,
           out_caps_shape,
           kernel_size,
           strides=(1, 1),
           coordinate_addition=False,
           regularizer=None,
           reuse=None):
    '''A capsule convolutional layer.
    Args:
        in_pose: A tensor with shape [batch_size, in_height, in_width, in_channels] + in_caps_shape.
        activation: A tensor with shape [batch_size, in_height, in_width, in_channels]
        filters: ...
        out_caps_shape: ...
        kernel_size: ...
        strides: ...
        coordinate_addition: ...
        regularizer: apply regularization on a newly created variable and add the variable to the collection tf.GraphKeys.REGULARIZATION_LOSSES.
        reuse: ...
    Returns:
        out_pose: A tensor with shape [batch_size, out_height, out_height, out_channals] + out_caps_shape,
        out_activation: A tensor with shape [batch_size, out_height, out_height, out_channels]
    '''
    # do some preparation stuff
    in_pose_shape = in_pose.get_shape().as_list()
    in_caps_shape = in_pose_shape[-2:]
    batch_size = in_pose_shape[0]
    in_channels = in_pose_shape[3]

    T_size = get_transformation_matrix_shape(in_caps_shape, out_caps_shape)
    if isinstance(kernel_size, int):
        h_kernel_size = kernel_size
        w_kernel_size = kernel_size
    elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 2:
        h_kernel_size = kernel_size[0]
        w_kernel_size = kernel_size[1]
    if isinstance(strides, int):
        h_stride = strides
        w_stride = strides
    elif isinstance(strides, (list, tuple)) and len(strides) == 2:
        h_stride = strides[0]
        w_stride = strides[1]
    num_inputs = h_kernel_size * w_kernel_size * in_channels
    batch_shape = [batch_size, h_kernel_size, w_kernel_size, in_channels]
    T_shape = (1, num_inputs, filters) + tuple(T_size)

    T_matrix = tf.get_variable('transformation_matrix',
                               shape=T_shape,
                               regularizer=regularizer)
    T_matrix_batched = tf.tile(T_matrix, [batch_size, 1, 1, 1, 1])

    h_step = int((in_pose_shape[1] - h_kernel_size) / h_stride + 1)
    w_step = int((in_pose_shape[2] - w_kernel_size) / w_stride + 1)
    out_pose = []
    out_activation = []
    # start to do capsule convolution.
    # Note: there should be another way more computationally efficient to do this
    for i in range(h_step):
        col_pose = []
        col_prob = []
        h_s = i * h_stride
        h_e = h_s + h_kernel_size
        for j in range(w_step):
            with tf.variable_scope("transformation"):
                begin = [0, i * h_stride, j * w_stride, 0, 0, 0]
                size = batch_shape + in_caps_shape
                w_s = j * w_stride
                pose_sliced = in_pose[:, h_s:h_e,
                                      w_s:(w_s + w_kernel_size), :, :, :]
                pose_reshaped = tf.reshape(pose_sliced,
                                           shape=[batch_size, num_inputs, 1] +
                                           in_caps_shape)
                shape = [batch_size, num_inputs, filters] + in_caps_shape
                batch_pose = tf.multiply(pose_reshaped,
                                         tf.constant(1., shape=shape))
                vote = tf.reshape(tf.matmul(T_matrix_batched, batch_pose),
                                  shape=[batch_size, num_inputs, filters, -1])
                # do Coordinate Addition. Note: not yet completed
                if coordinate_addition:
                    x = j / w_step
                    y = i / h_step

            with tf.variable_scope("routing") as scope:
                if i > 0 or j > 0:
                    scope.reuse_variables()
                begin = [0, i * h_stride, j * w_stride, 0]
                size = [batch_size, h_kernel_size, w_kernel_size, in_channels]
                prob = tf.slice(activation, begin, size)
                prob = tf.reshape(prob, shape=[batch_size, -1, 1, 1])
                pose, prob = routing(vote,
                                     prob,
                                     filters,
                                     out_caps_shape,
                                     method="EMRouting",
                                     regularizer=regularizer)
            col_pose.append(pose)
            col_prob.append(prob)
        col_pose = tf.concat(col_pose, axis=2)
        col_prob = tf.concat(col_prob, axis=2)
        out_pose.append(col_pose)
        out_activation.append(col_prob)
    out_pose = tf.concat(out_pose, axis=1)
    out_activation = tf.concat(out_activation, axis=1)

    return (out_pose, out_activation)