def routing(input, b_IJ):
    W = tf.get_variable(
        'Weight',
        shape=(1, 2592, 320, 16, 1),
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=cfg.stddev))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 32, 1))
    input = tf.tile(input, [1, 1, 320, 1, 1])
    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 2592, 10, 32, 1])
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')
    for r_iter in range(cfg.iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            c_IJ = softmax(b_IJ, axis=2)
            if r_iter == cfg.iter_routing - 1:
                s_J = tf.multiply(c_IJ, u_hat)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
            elif r_iter < cfg.iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
                v_J_tiled = tf.tile(v_J, [1, 2592, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled,
                                         axis=3,
                                         keepdims=True)
                b_IJ += u_produce_v
    return (v_J)
Exemple #2
0
def dynamic_routing(shape, input, num_outputs=10, num_dims=16):
    """The Dynamic Routing Algorithm proposed by Sabour et al."""
    
    input_shape = shape
    W = tf.get_variable('Weight', shape=[1, input_shape[1], num_dims * num_outputs] + input_shape[-2:],
                        dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=stddev))
    biases = tf.get_variable('bias', shape=(1, 1, num_outputs, num_dims, 1))
    
    delta_IJ = tf.zeros([input_shape[0], input_shape[1], num_outputs, 1, 1], dtype=tf.dtypes.float32)

    input = tf.tile(input, [1, 1, num_dims * num_outputs, 1, 1])

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, input_shape[1], num_outputs, num_dims, 1])

    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    for r_iter in range(iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            gamma_IJ = softmax(delta_IJ, axis=2)

            if r_iter == iter_routing - 1:
                s_J = tf.multiply(gamma_IJ, u_hat)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
            elif r_iter < iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(gamma_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
                v_J_tiled = tf.tile(v_J, [1, input_shape[1], 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled, axis=3, keepdims=True)
                delta_IJ += u_produce_v

    return(v_J)
Exemple #3
0
def routing(input, b_IJ):
    # W - start of routing algorithm, initialization
    W = tf.get_variable('Weight', shape=(1, 1152, 160, 8, 1), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=cfg.stddev))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))

    # u_hat.png
    input = tf.tile(input, [1, 1, 160, 1, 1])

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 1152, 10, 16, 1])

    # In forward, u_hat_no_back_propogation = u_hat; in backward, no gradient passed back from u_hat_no_back_propogation to u_hat
    u_hat_no_back_propogation = tf.stop_gradient(u_hat, name='stop_gradient')

    # routing.png, cycle
    for r_iter in range(cfg.iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            c_IJ = softmax(b_IJ, axis=2)

            # last iteration, use u_hat for back-propogation
            if r_iter == cfg.iter_routing - 1:
                s_J = tf.multiply(c_IJ, u_hat)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
            elif r_iter < cfg.iter_routing - 1:  # Inner routing iterations, no back-propogation, so use u_hat_no_back_propogation
                s_J = tf.multiply(c_IJ, u_hat_no_back_propogation)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)
                v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_no_back_propogation * v_J_tiled, axis=3, keepdims=True)
                b_IJ += u_produce_v

    return(v_J)
Exemple #4
0
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, return tensor with shape [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256,
                                             kernel_size=9, stride=1,
                                             padding='VALID')

        # Primary Capsules layer, return tensor with shape [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)

        # DigitCaps layer, return shape [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsLayer(num_outputs=self.num_label, vec_len=16, with_routing=True, layer_type='FC')
            self.caps2 = digitCaps(caps1)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),
                                               axis=2, keepdims=True) + epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, self.num_label, 1)))
                self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            self.decoded = tf.contrib.layers.fully_connected(fc2,
                                                             num_outputs=self.height * self.width * self.channels,
                                                             activation_fn=tf.sigmoid)
Exemple #5
0
 def biuld_net(self):
    # gragh = tf.Graph()
    # with gragh.as_default():
                ###########
                ### set top conv
              top_con = CNNs(self.x,128,[9,1],2,"SAME",self.is_train)
              self.primary_cap = layers_vector(top_con,32,4,[9,1],2,self.is_train,shapes=[-1,self.next_length*8,16,1])
              # [-1,88*16,8,1]
                 #with tf.variable_scope("capsules_layers"):
              fc_function = tf.reshape(self.primary_cap,shape=(-1, self.primary_cap.shape[1].value,1, self.primary_cap.shape[-2].value,1))
                     #with tf.variable_scope("routing"):
              #[-1,88*16,1,8,1]
              blu = tf.constant( np.zeros([self.batch_size, self.primary_cap.shape[1].value,self.num_label,1,1]),dtype=tf.float32 )
              caps = routing(fc_function,blu,num_outputs=self.num_label ,num_dims=32)
              #### [120,37,8,1]
              top_conv_1 = CNNs(self.x,128,[7,1],2,"SAME",self.is_train)
              self.primary_cap_1 = layers_vector(top_conv_1,32,4,[7,1],2,self.is_train,shapes=[-1,self.next_length*16,8,1])
              fc_function_1 = tf.reshape(self.primary_cap_1,shape=(-1,self.primary_cap_1.shape[1].value,1,self.primary_cap_1.shape[-2].value,1))
              blu_1 = tf.constant(np.zeros([self.batch_size,self.primary_cap_1.shape[1].value,self.num_label,1,1]),dtype=tf.float32)
              with  tf.variable_scope("routint_1"):
                  caps_1 = routing(fc_function_1,blu_1,self.num_label,16)
              top_con_2 = CNNs(self.x,128,[5,1],2,'SAME',self.is_train)
              self.primary_cap_2 = layers_vector(top_con_2,32,4,[5,1],2,self.is_train,shapes=[-1,self.next_length*32,4,1])
              fc_function_2 = tf.reshape(self.primary_cap_2,shape=(-1,self.primary_cap_2.shape[1].value,1,self.primary_cap_2.shape[-2].value,1))
              blu_2 = tf.constant(np.zeros([self.batch_size,self.primary_cap_2.shape[1].value,self.num_label,1,1]),dtype=tf.float32)
              with tf.variable_scope("routing_2"):
                  caps_2 = routing(fc_function_2,blu_2,self.num_label,8)
              
              a = 3.0
              b = 1.0
              c = 1.0
              #  a = 3.0
              #  b = 1.0
              caps = tf.concat([a*caps,b*caps_1,c*caps_2],axis=3)
              # This is the best performance in our experiments.
              
              self.caps = tf.squeeze(caps,axis=1)
              v_length = tf.sqrt(reduce_sum(tf.square(self.caps),axis=2,keepdims=True)+eposilion)
              softmax_v = softmax(v_length,axis=1)
             #########[batch_size,num_label,1,1]
              argmax_idx = tf.to_int32(tf.argmax(softmax_v,axis=1))
              self.argmax_idx = tf.reshape(argmax_idx,shape=(self.batch_size,))
             ###
              self.masked_v = tf.multiply(tf.squeeze(self.caps),tf.reshape(self.y,(-1,self.num_label,1)))
              self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps),axis=2,keepdims=True)+eposilion)
             ########
             # decoder
              vector_j = tf.reshape(self.masked_v,shape=(self.batch_size,-1))
              fc1 = tf.contrib.layers.fully_connected(vector_j,num_outputs=256)
              fc1 = tf.contrib.layers.fully_connected(fc1,num_outputs=512)
              self.decode = tf.contrib.layers.fully_connected(fc1,num_outputs=self.length,activation_fn=tf.sigmoid)
def gradient_penalty(x, y, mask=None, norm=1., f_obj_to_img=None):
    """
    # x = interpolated real and fake images
    # y = scores from critic
    """
    grad_outputs = torch.ones(y.size()).cuda(
    ) if torch.cuda.is_available() else torch.ones(y.size())
    gradients = torch.autograd.grad(outputs=y, inputs=x,
                                    grad_outputs=grad_outputs,
                                    create_graph=True,
                                    retain_graph=True,
                                    only_inputs=True)[0]
    if mask is None:
        mask = torch.ones(gradients.shape, device=x.device)

    gp_grads = (gradients ** 2) * mask
    if f_obj_to_img is not None:
        avg_grads = []
        # take average of all patches for every image
        for i in range(max(f_obj_to_img) + 1):
            inds = (f_obj_to_img == i).nonzero()
            avg_grad = torch.mean(gp_grads[inds], dim=0)
            avg_grads.append(avg_grad)

        gp_grads = torch.cat(avg_grads)

    slopes = torch.sqrt(utils.reduce_sum(
        gp_grads, axis=[2, 3, 1])).view(gp_grads.shape[0], -1)
    gp_loss = torch.mean((slopes - norm) ** 2)

    return gp_loss
Exemple #7
0
def init_net_treecaps(feature_size, label_size):
    """Initialize an empty TreeCaps network."""
    top_a = 20
    top_b = 25
    num_conv = 8
    output_size = 128
    caps1_num_dims = 8
    caps1_num_caps = int(num_conv*output_size/caps1_num_dims)*top_a
    caps1_out_caps = label_size
    caps1_out_dims = 8

    with tf.name_scope('inputs'):
        nodes = tf.placeholder(tf.float32, shape=(None, None, feature_size), name='tree')
        children = tf.placeholder(tf.int32, shape=(None, None, None), name='children')

    with tf.name_scope('network'):  
        """The Primary Variable Capsule Layer."""
        primary_variable_caps = primary_variable_capsule_layer(num_conv, output_size, nodes, children, feature_size, caps1_num_dims)
        
        """The Primary Static Capsule Layer."""
        primary_static_caps = vts_routing(primary_variable_caps,top_a,top_b,caps1_num_caps,caps1_num_dims)        
        primary_static_caps = tf.reshape(primary_static_caps, shape=(batch_size, -1, 1, caps1_num_dims, 1))
        
        """The Code Capsule Layer."""
        #Get the input shape to the dynamic routing algorithm
        dr_shape = [batch_size,caps1_num_caps,1,caps1_num_dims,1]
        codeCaps = dynamic_routing(dr_shape, primary_static_caps, num_outputs=caps1_out_caps, num_dims=caps1_out_dims)
        codeCaps = tf.squeeze(codeCaps, axis=1)
        
        """Obtaining the classification output."""
        v_length = tf.sqrt(reduce_sum(tf.square(codeCaps),axis=2, keepdims=True) + 1e-9)
        out = tf.reshape(v_length,(-1,label_size))

    return nodes, children, out
Exemple #8
0
def squash(vector):
    '''Squashing function corresponding to Eq. 1
    Args:
        vector: A tensor with shape [batch_size, 1, num_caps, vec_len, 1] or [batch_size, num_caps, vec_len, 1].
    Returns:
        A tensor with the same shape as vector but squashed in 'vec_len' dimension.
    '''
    vec_squared_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)
    vec_squashed = scalar_factor * vector  # element-wise
    return(vec_squashed)
def squash(vector):
    '''Squashing function
    Args:
        vector: A tensor with shape [batch_size, 1, num_caps, vec_len, 1] or [batch_size, num_caps, vec_len, 1]
    Returns:
        A tensor with the same shape as vector but squashed in 'vec_len' dimension.
    '''
    squared_norm = reduce_sum(tf.square(vector), axis=-2, keepdims=True)
    scalar_factor = squared_norm / (1 + squared_norm) / tf.sqrt(squared_norm +
                                                                epsilon)
    return (scalar_factor * vector)
Exemple #10
0
    def squash(vector):
        '''
        Input: tensor with shape: [batch_size, 1, num_caps, vec_len, 1]
        
        Return: same shape. squashed in vec_len dimension
        '''

        vec_squashed_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
        scalar_factor = vec_squashed_norm / (
            1 + vec_squashed_norm) / tf.sqrt(vec_squashed_norm + epsilon)
        vec_squashed = scalar_factor * vector

        return (vec_squashed)
def evaluator_model(input):
    epsilon = 1e-9
    with tf.variable_scope('CapsuleNet', reuse=tf.AUTO_REUSE):
        with tf.variable_scope('Conv1_layer'):
            # conv1 = tf.contrib.layers.conv2d(input, num_outputs=128, kernel_size=9, stride=1, padding='VALID')
            # conv1 = tf.contrib.layers.conv2d(conv1, num_outputs=256, kernel_size=5, stride=1, padding='VALID')
            conv1 = tf.contrib.layers.conv2d(input,
                                             num_outputs=512,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')

        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=16,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=8, stride=2)

        with tf.variable_scope('SecondaryCaps_Layer'):
            DigitCaps = CapsLayer(num_outputs=10,
                                  vec_len=32,
                                  with_routing=True,
                                  layer_type='FC')
            Caps2 = DigitCaps(caps1)
            v_length = tf.sqrt(
                reduce_sum(tf.square(Caps2), axis=2, keepdims=True) + epsilon,
                name='v_length')
            print(v_length)
        #
        # with  tf.variable_scope('Masking'):
        #     masked_v = tf.multiply(tf.squeeze(Caps2), tf.reshape(y, (-1, 10, 1)), name='masked_v')
        #     print('Masked_V: ', masked_v)
        #
        # with tf.variable_scope('Decoder'):
        #     vector_j = tf.reshape(masked_v, shape=(cfg.batch_size, -1))
        #     fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
        #     assert fc1.get_shape() == [cfg.batch_size, 512]
        #     fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
        #     assert fc2.get_shape() == [cfg.batch_size, 1024]
        #     decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)

    return v_length, Caps2
def discriminator(input, isTrain=True, reuse=False):
    epsilon = 1e-9
    with tf.variable_scope('discriminator') as scope:
        if reuse:
            labels = tf.constant(0, shape=[
                cfg.batch_size,
            ])
        else:
            labels = tf.constant(1, shape=[
                cfg.batch_size,
            ])
        Y = tf.one_hot(labels, depth=2, axis=1, dtype=tf.float32)
        if reuse:
            scope.reuse_variables()
        with tf.variable_scope('Conv1_layer'):
            conv1 = tf.contrib.layers.conv2d(input,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsLayer(num_outputs=2,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            caps2 = digitCaps(caps1)  # batch size x 2 x 16 x 1
            v_length = tf.sqrt(
                reduce_sum(tf.square(caps2), axis=2, keepdims=True) + epsilon)

        max_l = tf.square(tf.maximum(0., cfg.m_plus - v_length))
        max_r = tf.square(tf.maximum(0., v_length - cfg.m_minus))
        max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1))
        max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1))
        T_c = Y
        L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r
        margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))
        return margin_loss
Exemple #13
0
def evaluate(results, num_batches, evaluate_fn):
    num_batches = tools.reduce_sum(num_batches)
    results = tools.collect_results_gpu(results, num_batches.item())

    rank = tools.get_dist_info()[0]
    if rank == 0:
        all_embeds = list(map(lambda r: r[0], results))
        all_labels = list(map(lambda r: r[1], results))
        all_embeds = {
            k: torch.cat(tuple(map(lambda r: r[k], all_embeds)), dim=0)
            for k in all_embeds[0].keys()
        }
        all_labels = torch.cat(all_labels, dim=0)
        metrics = evaluate_fn(all_embeds, all_labels)
        early_stop_criterion = torch.tensor([metrics['criterion']],
                                            device='cuda')
    else:
        metrics = None
        early_stop_criterion = torch.tensor([0.], device='cuda')

    # early_stop_criterion is used for all ranks, so broadcast it
    early_stop_criterion = tools.broadcast(early_stop_criterion, 0)
    return metrics, early_stop_criterion
Exemple #14
0
    def forward(self, f, b, mask=None):
        """ Contextual attention layer implementation.
            Contextual attention is first introduced in publication:
            Generative Image Inpainting with Contextual Attention, Yu et al.
        Args:
            f: Input feature to match (foreground).
            b: Input feature for match (background).
            mask: Input mask for b, indicating patches not available.
            ksize: Kernel size for contextual attention.
            stride: Stride for extracting patches from b.
            rate: Dilation for matching.
            softmax_scale: Scaled softmax for attention.
        Returns:
            torch.tensor: output
        """
        # get shapes
        raw_int_fs = list(f.size())   # b*c*h*w
        raw_int_bs = list(b.size())   # b*c*h*w

        # extract patches from background with stride and rate
        kernel = 2 * self.rate
        # raw_w is extracted for reconstruction
        raw_w = utils.extract_image_patches(b, ksizes=[kernel, kernel],
                                      strides=[self.rate*self.stride,
                                               self.rate*self.stride],
                                      rates=[1, 1],
                                      padding='same') # [N, C*k*k, L]
        # raw_shape: [N, C, k, k, L] [4, 192, 4, 4, 1024]
        raw_w = raw_w.view(raw_int_bs[0], raw_int_bs[1], kernel, kernel, -1)
        raw_w = raw_w.permute(0, 4, 1, 2, 3)    # raw_shape: [N, L, C, k, k]
        raw_w_groups = torch.split(raw_w, 1, dim=0)

        # downscaling foreground option: downscaling both foreground and
        # background for matching and use original background for reconstruction.
        f = F.interpolate(f, scale_factor=1./self.rate, mode='nearest')
        b = F.interpolate(b, scale_factor=1./self.rate, mode='nearest')
        int_fs = list(f.size())     # b*c*h*w
        int_bs = list(b.size())
        f_groups = torch.split(f, 1, dim=0)  # split tensors along the batch dimension
        # w shape: [N, C*k*k, L]
        w = utils.extract_image_patches(b, ksizes=[self.ksize, self.ksize],
                                  strides=[self.stride, self.stride],
                                  rates=[1, 1],
                                  padding='same')
        # w shape: [N, C, k, k, L]
        w = w.view(int_bs[0], int_bs[1], self.ksize, self.ksize, -1)
        w = w.permute(0, 4, 1, 2, 3)    # w shape: [N, L, C, k, k]
        w_groups = torch.split(w, 1, dim=0)

        # process mask
        mask = F.interpolate(mask, scale_factor=1./self.rate, mode='nearest')
        int_ms = list(mask.size())
        # m shape: [N, C*k*k, L]
        m = utils.extract_image_patches(mask, ksizes=[self.ksize, self.ksize],
                                  strides=[self.stride, self.stride],
                                  rates=[1, 1],
                                  padding='same')

        # m shape: [N, C, k, k, L]
        m = m.view(int_ms[0], int_ms[1], self.ksize, self.ksize, -1)
        m = m.permute(0, 4, 1, 2, 3)    # m shape: [N, L, C, k, k]
        m = m[0]    # m shape: [L, C, k, k]
        # mm shape: [L, 1, 1, 1]
        mm = (utils.reduce_mean(m, axis=[1, 2, 3], keepdim=True)==0.).to(torch.float32)
        mm = mm.permute(1, 0, 2, 3) # mm shape: [1, L, 1, 1]

        y = []
        offsets = []
        k = self.fuse_k
        scale = self.softmax_scale    # to fit the PyTorch tensor image value range
        fuse_weight = torch.eye(k).view(1, 1, k, k)  # 1*1*k*k
        if self.use_cuda:
            fuse_weight = fuse_weight.cuda()

        for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups):
            '''
            O => output channel as a conv filter
            I => input channel as a conv filter
            xi : separated tensor along batch dimension of front; (B=1, C=128, H=32, W=32)
            wi : separated patch tensor along batch dimension of back; (B=1, O=32*32, I=128, KH=3, KW=3)
            raw_wi : separated tensor along batch dimension of back; (B=1, I=32*32, O=128, KH=4, KW=4)
            '''
            # conv for compare
            escape_NaN = torch.FloatTensor([1e-4])
            if self.use_cuda:
                escape_NaN = escape_NaN.cuda()
            wi = wi[0]  # [L, C, k, k]
            max_wi = torch.sqrt(utils.reduce_sum(torch.pow(wi, 2) + escape_NaN, axis=[1, 2, 3], keepdim=True))
            wi_normed = wi / max_wi
            # xi shape: [1, C, H, W], yi shape: [1, L, H, W]
            xi = utils.same_padding(xi, [self.ksize, self.ksize], [1, 1], [1, 1])  # xi: 1*c*H*W
            yi = F.conv2d(xi, wi_normed, stride=1)   # [1, L, H, W]
            # conv implementation for fuse scores to encourage large patches
            if self.fuse:
                # make all of depth to spatial resolution
                yi = yi.view(1, 1, int_bs[2]*int_bs[3], int_fs[2]*int_fs[3])  # (B=1, I=1, H=32*32, W=32*32)
                yi = utils.same_padding(yi, [k, k], [1, 1], [1, 1])
                yi = F.conv2d(yi, fuse_weight, stride=1)  # (B=1, C=1, H=32*32, W=32*32)
                yi = yi.contiguous().view(1, int_bs[2], int_bs[3], int_fs[2], int_fs[3])  # (B=1, 32, 32, 32, 32)
                yi = yi.permute(0, 2, 1, 4, 3)
                yi = yi.contiguous().view(1, 1, int_bs[2]*int_bs[3], int_fs[2]*int_fs[3])
                yi = utils.same_padding(yi, [k, k], [1, 1], [1, 1])
                yi = F.conv2d(yi, fuse_weight, stride=1)
                yi = yi.contiguous().view(1, int_bs[3], int_bs[2], int_fs[3], int_fs[2])
                yi = yi.permute(0, 2, 1, 4, 3).contiguous()
            yi = yi.view(1, int_bs[2] * int_bs[3], int_fs[2], int_fs[3])  # (B=1, C=32*32, H=32, W=32)
            # softmax to match
            yi = yi * mm
            yi = F.softmax(yi*scale, dim=1)
            yi = yi * mm  # [1, L, H, W]

            offset = torch.argmax(yi, dim=1, keepdim=True)  # 1*1*H*W

            if int_bs != int_fs:
                # Normalize the offset value to match foreground dimension
                times = float(int_fs[2] * int_fs[3]) / float(int_bs[2] * int_bs[3])
                offset = ((offset + 1).float() * times - 1).to(torch.int64)
            offset = torch.cat([offset//int_fs[3], offset%int_fs[3]], dim=1)  # 1*2*H*W

            # deconv for patch pasting
            wi_center = raw_wi[0]
            # yi = F.pad(yi, [0, 1, 0, 1])    # here may need conv_transpose same padding
            yi = F.conv_transpose2d(yi, wi_center, stride=self.rate, padding=1) / 4.  # (B=1, C=128, H=64, W=64)
            y.append(yi)
            offsets.append(offset)

        y = torch.cat(y, dim=0)  # back to the mini-batch
        y.contiguous().view(raw_int_fs)

        return y
Exemple #15
0
    def forward(self,
                f,
                b,
                mask=None,
                ksize=3,
                stride=1,
                rate=1,
                fuse_k=3,
                softmax_scale=10.,
                training=True,
                fuse=True):
        """ Contextual attention layer implementation.

        Contextual attention is first introduced in publication:
        Generative Image Inpainting with Contextual Attention, Yu et al.

        Args:
        f: Input feature to match (foreground).
        b: Input feature for match (background).
        mask: Input mask for b, indicating patches not available.
        ksize: Kernel size for contextual attention.
        stride: Stride for extracting patches from t.
        rate: Dilation for matching.
        softmax_scale: Scaled softmax for attention.
        training: Indicating if current graph is training or inference.

        Returns:
        tf.Tensor: output
        """

        # get shapes of foreground (f) and background (b)
        raw_fs = f.shape
        # print("RAW FS: " + str(raw_fs))
        raw_int_fs = list(f.shape)
        raw_int_bs = list(b.shape)

        # extract 3x3 patches from background with stride and rate
        kernel = 2 * rate
        raw_w = self.extract_image_patches(b, kernel, rate * stride)

        # Reshape raw_w to match pytorch conv weights shape
        raw_w = torch.reshape(
            raw_w, [raw_int_bs[0], -1, raw_int_bs[1], kernel, kernel
                    ])  # b x in_ch (h * w) x out_ch (c) x k x k

        # downscaling foreground option: downscaling both foreground and
        # background for matching and use original background for reconstruction.
        f = F.interpolate(f, scale_factor=1. / rate, mode='nearest')
        b = F.interpolate(
            b,
            size=[int(raw_int_bs[2] / rate),
                  int(raw_int_bs[3] / rate)],
            mode='nearest')

        # get shape of foreground then split on the batch dimension
        fs = f.shape
        int_fs = list(f.shape)
        f_groups = torch.split(f, 1, dim=0)

        # print("F GROUPS: " + str(f_groups[0].shape))

        bs = b.shape
        int_bs = list(b.shape)

        # extract w then reshape to weight shape of functional conv2d of pytorch
        w = self.extract_image_patches(b, ksize, stride)
        # reshape to b x in_ch (h * w) x out_ch (c) x k x k
        # print("INT FS: " + str(int_fs))
        w = torch.reshape(w, [int_fs[0], -1, int_fs[1], ksize, ksize])

        # print("W: " + str(w.shape))
        # process mask
        if mask is None:
            mask = torch.zeros([bs[0], 1, bs[2], bs[3]]).cuda()
        else:
            # print("DOWNSAMPLE MEN")
            mask = F.interpolate(mask, scale_factor=1. / rate, mode='nearest')

        m = self.extract_image_patches(mask, ksize, stride)

        # make mask have the shape of (b x c x hw x k x k)
        # print("m = " + str(mask.shape))
        if (mask.shape[0] > 1):
            m = torch.reshape(m, [mask.shape[0], 1, -1, ksize, ksize])
        else:
            m = torch.reshape(m, [1, 1, -1, ksize, ksize])
        # m = m[0]
        # print("MY M: " + str(m.shape))
        # create batch for mm
        mm = []
        for i in range(m.shape[0]):
            mm.append(utils.reduce_mean(m[i], axis=[0, 2, 3], keep_dims=True))

        mm = torch.cat(mm)

        # print("mm: " + str(mm.shape))
        w_groups = torch.split(w, 1, dim=0)
        raw_w_groups = torch.split(raw_w, 1, dim=0)
        y = []
        offsets = []
        k = fuse_k
        scale = softmax_scale
        fuse_weight = utils.to_var(torch.reshape(torch.eye(k), [1, 1, k, k]))

        for xi, wi, raw_wi, mi in zip(f_groups, w_groups, raw_w_groups, mm):
            """
            # Conv per batch
            # VARIABLES:
            # - xi: input to the conv; tensors from foreground (f_groups)
            # - wi: weights for training; image patches from the background (w_groups): 
            # - raw_wi: patches from the background (raw_w_groups)
            """
            # conv for compare
            wi = wi[0]  #

            wi_normed = wi / \
                torch.max(torch.sqrt(utils.reduce_sum(
                    wi ** 2, axis=[0, 2, 3])), torch.FloatTensor([1e-4]).cuda())

            # print("wi_normed: " + str(wi_normed.shape))
            # print("xi:" + str(xi.shape))
            yi = F.conv2d(xi, wi_normed, stride=1, padding=1)
            # print("yi: " + str(yi.shape))
            # wi_normed = wi / torch.max(torch.sqrt(torch.sum(torch.square()))) #l2 norm
            # conv implementation for fuse scores to encourage large patches
            if fuse:
                # b x c x f(hw) x b(hw)
                yi = torch.reshape(yi, [1, 1, fs[2] * fs[3], bs[2] * bs[3]])
                # print("yi: " + str(yi.shape))
                yi = F.conv2d(yi, fuse_weight, stride=1, padding=1)
                yi = torch.reshape(yi, [1, fs[2], fs[3], bs[2], bs[3]])
                yi = yi.permute(0, 2, 1, 4, 3)
                yi = torch.reshape(yi, [1, 1, fs[2] * fs[3], bs[2] * bs[3]])
                # print("yi: " + str(yi.shape))
                yi = F.conv2d(yi, fuse_weight, stride=1, padding=1)
                yi = torch.reshape(yi, [1, fs[3], fs[2], bs[3], bs[2]])
                yi = yi.permute(0, 2, 1, 4, 3)
                # print("yi inside fuse: " + str(yi.shape))
                # print("yi: " + str(yi.shape))

            yi = torch.reshape(yi, [1, bs[2] * bs[3], fs[2], fs[3]])
            # print("yi: " + str(yi.shape))
            # softmax to match
            yi = yi * mi
            # print("hey")
            yi = F.softmax(yi * scale, dim=1)
            yi = yi * mi  # mask

            _, offset = torch.max(yi, dim=1)
            offset = torch.stack([offset // fs[3], offset % fs[3]], dim=-1)

            # deconv for patch pasting
            # 3.1 paste center
            wi_center = raw_wi[0]
            yi = F.conv_transpose2d(yi, wi_center, stride=rate, padding=1) / 4.
            y.append(yi)
            offsets.append(offset)

        y = torch.cat(y, dim=0)

        offsets = torch.cat(offsets, dim=0)
        offsets = torch.reshape(offsets,
                                [int_bs[0]] + [2] + int_bs[2:])  # skip channel

        # case1: visualize optical flow: minus current position
        # height
        h_add = utils.to_var(
            torch.reshape(torch.arange(bs[2]), [1, 1, bs[2], 1]))
        h_add = h_add.expand([bs[0], 1, bs[2], bs[3]])

        # width
        w_add = utils.to_var(
            torch.reshape(torch.arange(bs[3]), [1, 1, 1, bs[3]]))
        w_add = w_add.expand([bs[0], 1, bs[2], bs[3]])

        # concat on channel
        offsets = offsets - torch.cat([h_add, w_add], dim=1)

        # to flow image
        flow = helper.flow_to_image(
            offsets.permute(0, 2, 3, 1).data.cpu().numpy())
        flow = torch.from_numpy(flow).permute(0, 3, 1, 2)

        # case2: visualize which pixels are attended
        # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32))
        if rate != 1:
            flow = F.interpolate(flow, scale_factor=rate, mode='nearest')

        out = self.final_layers(y)
        return out, flow
Exemple #16
0
    def routing(input, b_IJ):
        '''Arg: input_tensor: [batch_size, num_caps_l = 1152, 1, len(u_i)=8, 1]
        
           Return: [batch_size, num_caps_l_plus_one, len(v_j)=16, 1]
           
           u_i represents the vector output of capsule i in the layer l, and
           v_j the vector output of capsule j in the layer l+1.
        '''

        # W: [1, num_caps_i, num_caps_j*len_v_j, len_u_j, 1]
        W = tf.get_variable(
            'Weight',
            shape=(1, 1152, 160, 8, 1),
            dtype=tf.float32,
            initializer=tf.random_normal_initializer(stddev=cfg.stddev))
        biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))

        # cal u_hat
        '''
        Since tf.matmul is a time-consuming op,
        A better solution is using element-wise multiply, reduce_sum and reshape
        ops instead. Matmul [a, b] x [b, c] is equal to a series ops as
        element-wise multiply [a*c, b] * [a*c, b], reduce_sum at axis=1 and
        reshape to [a, c]
        '''
        '''
        tf.tile create a new tensor by replicating input multiples times
        output tensor's i_th dimension has input.dims(i)*multiples[i]
        elements and the value of input are replicated multiples[i] times along
        the i_th dimension
        
        Example: [a b c d] by [2] output [a b c d a b c d]
        '''

        # input_tensor: [batch_size, num_caps_l = 1152, 1, len(u_i)=8, 1]
        input = tf.tile(input, [1, 1, 160, 1, 1])
        # Validate if input shape
        assert input.get_shape() == [cfg.batch_size, 1152, 160, 8, 1]

        u_hat = tf.reduce_sum(W * input, axis=3,
                              keepdims=True)  # Element-wise sum
        u_hat = tf.reshape(u_hat, shape=[-1, 1152, 10, 16, 1])
        #check size
        assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1]

        # During forward pass, u_hat_stopped == u_hat
        # No update during backprop. no gradient pass either
        u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

        for r_iter in range(cfg.iter_routing):
            with tf.variable('iter_' + str(r_iter)):
                #[batch_size, 1152, 10, 1, 1]
                c_IJ = softmax(b_IJ, axis=2)

                if r_iter == cfg.iter_routing - 1:  #last iteration: we use u_hat
                    s_J = tf.multiply(c_IJ, u_hat)
                    s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                    assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]

                    v_J = squash(s_J)
                    assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
                elif r_iter < cfg.iter_routing - 1:
                    s_J = tf.multiply(c_IJ, u_hat_stopped)
                    s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                    v_J = squash(s_J)

                    # Reshape and tile v_J from [batch_size, 1, 10, 16, 1]
                    # to match with u_hat_stopped: [batch_size, 1152, 10,16, 1]
                    # b_IJ += u_hat_stopped^T * v_J
                    v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])

                    # v_J_tiled: [batch_size, 1152, 10,16, 1]
                    # u_hat_stopped: [batch_size,1152,10,16,1]
                    u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled,
                                             axis=3,
                                             keepdims=True)
                    assert u_produce_v.get_shape() == [
                        cfg.batch_size, 1152, 10, 1, 1
                    ]

                    # b_IJ +=
                    b_IJ += u_produce_v

        return (v_J)
Exemple #17
0
def discriminator(input, isTrain=True, reuse=False):
    epsilon = 1e-9
    if isTrain:
        with tf.variable_scope('discriminator') as scope:
            if reuse:
                labels = tf.constant(0, shape=[
                    cfg.batch_size,
                ])
            else:
                labels = tf.constant(1, shape=[
                    cfg.batch_size,
                ])
            Y = tf.one_hot(labels, depth=2, axis=1, dtype=tf.float32)
            X = input

        if reuse:
            scope.reuse_variables()
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            """changing the num_outputs to 2 from 10"""
            digitCaps = CapsLayer(num_outputs=2,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            caps2 = digitCaps(caps1)
            v_length = tf.sqrt(
                reduce_sum(tf.square(caps2), axis=2, keepdims=True) + epsilon)
        """Loss """
        max_l = tf.square(tf.maximum(0., cfg.m_plus - v_length))
        # max_r = max(0, ||v_c||-m_minus)^2
        max_r = tf.square(tf.maximum(0., v_length - cfg.m_minus))
        """changing assert value to be [batch, 2, 1, 1] from [batch, 10, 1, 1]"""
        assert max_l.get_shape() == [cfg.batch_size, 2, 1, 1]

        # reshape: [batch_size, 10, 1, 1] => [batch_size, 10]
        max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1))
        max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1))

        # calc T_c: [batch_size, 10]
        # T_c = Y, is my understanding correct? Try it.
        T_c = Y
        # [batch_size, 10], element-wise multiply
        L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r

        margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))
        return margin_loss
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256,
                                             kernel_size=9, stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC')
            self.caps2 = digitCaps(caps1)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),
                                               axis=2, keepdims=True) + epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)
                self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)))
                self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)
Exemple #19
0
	def __init__(self, is_training = True):
		self.graph = tf.Graph()

		with self.graph.as_default():
			if is_training:
				self.X, self.labels = get_batch_data(cfg.dataset, cfg.batch_size, cfg.num_threads)
				self.Y = tf.one_hot(self.labels, depth = 10, axis = 1, dtype = tf.float32) #depth = 10 for 10 classes

				self.build_arch()
				self.loss()
				self.summary()

				self.global_step = tf.Variable(0,name='global_step',trainable=False)
				self.optimizer = tf.train.AdamOptimizer()
				self.train_op = self.optimizer.minimize(self.total_loss, global_step = self.global_step)

			else:
				#Which is either Testing or Validation
				self.X = tf.placeholder(tf.float32, shape = (cfg.batch_size,28,28,1)) # 28 by 28 pixel and 1 channel
				self.labels = tf.placeholder(tf.int32, shape = (cfg.batch_size, ))
				self.Y = tf.reshape(self.labels, shape = (cfg.batch_size, 10, 1))
				self.build_arch()

		tf.logging.info('Seting up the main structure')

		def build_arch(self):
			with tf.variable_scope('Conv1_layer'):
				# Conv1_layer:
				# Input [batch_size, 20, 20, 256]
				conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID')
				assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

			# Primary Cap Layer
			# Output: [batch_size, 6, 6, 32, 8-Dim tensor]
			# i.e: [cfg.batch_size, 1152, 8, 1]
			with tf.variable_scope('PrimaryCaps_layer'):
				primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV')
				caps1 = primaryCaps(conv1,kernel_size=9,stride=2)
				assert caps1.get_shape() == [cfg.batch_size,1152,8,1]

			with tf.variable_scope('DigitCaps_layer'):
				digitCaps = CapsLayer(num_outputs=10, vec_len=16,with_routing=True,layer_type='FC')
				self.caps2 = digitCaps(caps1) # Don't understand 

			# REVIEW WHAT's MASKING
		    with tf.variable_scope('Masking'):
				# calculate ||v_c||, then softmax(||v_c||)
				# [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
				self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),axis=2,keepdims=True)+epsilon)
				self.softmax_v = softmax(self.v_length, axis=1)
				assert self.softmax_v == [cfg.batch_size, 10, 1, 1]

			    # Pick the index with the max softmax val of the 10 caps
			    # [batch_size, 10, 1 ,1] => [batch_size] (index)
				self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
				assert self.argmax_idx.get_shape() == [cfg.batch_size, 1,1]
				self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, ))

				# WHAT's MASK WITH Y
				if not cfg.mask_with_y:
					# indexing
					masked_v = []
					for batch_size in range(cfg.batch_size):
						v = self.caps[batch_size][se;f.argmax_idx[batch_size], :]
						masked_v.append(tf.reshape(v,shape=(1,1,16,1)))

					self.masked_v = tf.concat(masked_v, axis=0)
					assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
				else:
					# MASK WITH TRUE LABEL
					self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y,(-1,10,1)))
					self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),axis=2,keepdims=True)+epsilon)

			with tf.variable_scope('Decoder'):
Exemple #20
0
  def build_arch(self):
    with tf.variable_scope('Conv1_layer'):
      conv1 = contrib.layers.conv2d(self.X, num_outputs=256,
                                    kernel_size=9, stride=1,
                                    padding="VALID")
      assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

    # Primary Capsules layer, return [batch_size, 1152, 8, 1]
    with tf.variable_scope('PrimaryCaps_layer'):
      primaryCaps = CapsLayer(num_outputs=32, vec_len=8,
                              with_routing=False, layer_type='CONV')
      caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
      assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

    # DigitCaps layer, return [batch_size, 10, 16, 1]
    with tf.variable_scope('DigitCaps_layer'):
      digitCaps = CapsLayer(num_outputs=10, vec_len=16,
                            with_routing=True, layer_type='FC')
      self.caps2 = digitCaps(caps1)

    # Decoder structure in Fig. 2
    # 1. Do masking, how:
    with tf.variable_scope("Masking"):
      self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),
                                         axis=2,
                                         keepdims=True) + epsilon)
      self.softmax_v = softmax(self.v_length, axis=1)
      assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

      self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
      assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
      self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, ))

      # Method 1.
      if not cfg.mask_with_y:
        masked_v = []
        for batch_size in range(cfg.batch_size):
          v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
          masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

        self.masked_v = tf.concat(masked_v, axis=0)
        assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
      # Method 2. masking with true label, default model
      else:
        self.masked_v = tf.multiply(tf.squeeze(self.caps2),
                                    tf.reshape(self.Y, (-1, 10, 1)))
        self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),
                                           axis=2,
                                           keepdims=True) + epsilon)

    # 2. Reconstruct the MNIST images with 3 FC layers
    # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
    with tf.variable_scope('Decoder'):
      fully_connected = contrib.layers.fully_connected
      vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
      fc1 = fully_connected(vector_j, num_outputs=512)
      assert fc1.get_shape() == [cfg.batch_size, 512]
      fc2 = fully_connected(fc1, num_outpus=1024)
      assert fc2.get_shape() == [cfg.batch_size, 1024]
      self.decoded = fully_connected(fc2, num_outputs=784,
                                     activation_fn=tf.sigmoid)
def squash(vector):
    vec_squared_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
    scalar_factor = vec_squared_norm / (
        1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)
    vec_squashed = scalar_factor * vector  # element-wise
    return (vec_squashed)
Exemple #22
0
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            """changing the num_outputs to 2 from 10"""
            digitCaps = CapsLayer(num_outputs=2,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            self.caps2 = digitCaps(caps1)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        """since we have only two output capsules, we don't need masking because  we are not using any reconstruction thus commenting:"""
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            """changing assert value to be [batch, 2, 1, 1] from [batch, 10, 1, 1]"""
            assert self.softmax_v.get_shape() == [cfg.batch_size, 2, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx,
                                         shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)
                self.masked_v = tf.multiply(tf.squeeze(self.caps2),
                                            tf.reshape(self.Y, (-1, 10, 1)))
                self.v_length = tf.sqrt(
                    reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                    epsilon)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                fc2, num_outputs=784, activation_fn=tf.sigmoid)
Exemple #23
0
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, return tensor with shape [batch_size, 20, 20, 256],第一层 卷积层输入:28x28图像(单色)输出:20x20x256张量
            '''
            第一层 卷积层
                输入:28x28图像(单色)
                输出:20x20x256张量
                参数:20992
            卷积层检测2D图像的基本特征。在CapsNet中,卷积层有256个步长为1的9x9x1核,使用ReLU激活。
            '''
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            # print("第一次cnn",conv1)

        with tf.variable_scope('PrimaryCaps_layer'):
            # Primary Capsules layer, return tensor with shape [batch_size, 1152, 8, 1]
            '''
            第二层 PrimaryCaps层
            		输入:20x20x256张量
            		输出:6x6x8x32张量
            		参数:5308672
            这一层包含32个主胶囊,接受卷积层检测到的基本特征,生成特征的组合。这一层的32个主胶囊本质上和卷积层很相似。
            每个胶囊将8个9x9x256卷积核应用到20x20x256输入张量,因而生成6x6x8输出张量。
            由于总共有32个胶囊,输出为6x6x8x32张量。
          '''
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            # print("第二层 PrimaryCaps层",caps1)

        with tf.variable_scope('DigitCaps_layer'):
            # DigitCaps layer, return shape [batch_size, 10, 16, 1]
            '''
            第三层 DigitCaps层
                输入:6x6x8x32张量
                输出:16x10矩阵
                参数:1497600
            这一层包含10个数字胶囊,每个胶囊对应一个数字。每个胶囊接受一个6x6x8x32张量作为输入。你可以把它看成6x6x32的8维向量,也就是1152输入向量。在胶囊内部,每个输入向量通过8x16权重矩阵将8维输入空间映射到16维胶囊输出空间。
          '''
            digitCaps = CapsLayer(num_outputs=self.num_label,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            self.caps2 = digitCaps(caps1)
            # print("第三层 DigitCaps层",self.caps2)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                epsilon)
            # print("self.v_length",self.v_length)
            #计算 v 向量的模
            self.softmax_v = softmax(self.v_length, axis=1)
            # print("self.softmax_v",self.softmax_v)
            # 对每个低层胶囊i而言,所有权重cij的总和等于1。
            # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            # print("self.argmax_idx",self.argmax_idx)
            # 获取最佳的预测id
            # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx,
                                         shape=(cfg.batch_size, ))
            # print("self.argmax_idx",self.argmax_idx)

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    # print("v",v)
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                # print("self.masked_v",self.masked_v )
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                self.masked_v = tf.multiply(
                    tf.squeeze(self.caps2),
                    tf.reshape(self.Y, (-1, self.num_label, 1)))
                self.v_length = tf.sqrt(
                    reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                    epsilon)
                # print("self.masked_v2",self.masked_v)
                # print("self.v_length2",self.v_length)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            '''
            第四层 第一全连接层
                输入:16x10
                输出:512
                参数:82432
            低层的每个输出加权后传导至全连接层的每个神经元作为输入。每个神经元同时具备一个偏置项。
            16x10输入全部传导至这一层的512个神经元中的每个神经元。
            '''
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            '''
            第五层 第二全连接层
                输入:512
                输出:1024
                参数:525312
            '''
            self.decoded = tf.contrib.layers.fully_connected(
                fc2,
                num_outputs=self.height * self.width * self.channels,
                activation_fn=tf.sigmoid)
            '''
Exemple #24
0
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            self.W = tf.get_variable(
                'W',
                shape=[9, 9, 1, 256],
                initializer=tf.contrib.layers.xavier_initializer())
            self.W = fix(self.W)
            self.biases = tf.get_variable('biases',
                                          shape=[256],
                                          initializer=tf.zeros_initializer())
            self.biases = fix(self.biases)

            self.conv1 = tf.nn.relu(
                tf.nn.conv2d(
                    self.X, self.W, strides=[1, 1, 1, 1], padding='VALID') +
                self.biases)
            self.conv1 = fix(self.conv1)

            assert self.conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            self.primaryCaps = CapsLayer(num_outputs=32,
                                         vec_len=8,
                                         with_routing=False,
                                         layer_type='CONV')
            self.caps1 = self.primaryCaps(self.conv1, kernel_size=9, stride=2)
            assert self.caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            self.digitCaps = CapsLayer(num_outputs=10,
                                       vec_len=16,
                                       with_routing=True,
                                       layer_type='FC')
            self.caps2 = self.digitCaps(self.caps1)

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx,
                                         shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)
                self.masked_v = tf.multiply(tf.squeeze(self.caps2),
                                            tf.reshape(self.Y, (-1, 10, 1)))
                self.v_length = tf.sqrt(
                    reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                    epsilon)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            self.fc1 = tf.contrib.layers.fully_connected(vector_j,
                                                         num_outputs=512)
            assert self.fc1.get_shape() == [cfg.batch_size, 512]
            self.fc2 = tf.contrib.layers.fully_connected(self.fc1,
                                                         num_outputs=1024)
            assert self.fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                self.fc2, num_outputs=784, activation_fn=tf.sigmoid)
def routing(vote,
            activation=None,
            num_outputs=32,
            out_caps_shape=[4, 4],
            method='EMRouting',
            num_iter=3,
            regularizer=None):
    ''' Routing-by-agreement algorithm.
    Args:
        alias H = out_caps_shape[0]*out_caps_shape[1].

        vote: [batch_size, num_inputs, num_outputs, H].
        activation: [batch_size, num_inputs, 1, 1].
        num_outputs: ...
        out_caps_shape: ...
        method: method for updating coupling coefficients between vote and pose['EMRouting', 'DynamicRouting'].
        num_iter: the number of routing iteration.
        regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
                will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.

    Returns:
        pose: [batch_size, 1, 1, num_outputs] + out_caps_shape.
        activation: [batch_size, 1, 1, num_outputs].
    '''
    if num_iter == 0:  # no dynamic routing
        s = reduce_sum(vote, axis=1, keepdims=True)
        pose = squash(s)
        return pose, activation

    vote_stopped = tf.stop_gradient(vote, name="stop_gradient")
    batch_size = vote.shape[0].value
    if method == 'EMRouting':
        shape = vote.get_shape().as_list()[:3] + [1]
        # R: [batch_size, num_inputs, num_outputs, 1]
        R = tf.constant(np.ones(shape, dtype=np.float32) / num_outputs)
        for t_iter in range(num_iter):
            with tf.variable_scope('M-STEP') as scope:
                if t_iter > 0:
                    scope.reuse_variables()
                # It's no need to do the `E-STEP` in the last iteration
                if t_iter == num_iter - 1:
                    pose, stddev, activation_prime = M_step(
                        R, activation, vote)
                    break
                else:
                    pose, stddev, activation_prime = M_step(
                        R, activation, vote_stopped)
            with tf.variable_scope('E-STEP'):
                R = E_step(pose, stddev, activation_prime, vote_stopped)
        pose = tf.reshape(pose,
                          shape=[batch_size, 1, 1, num_outputs] +
                          out_caps_shape)
        activation = tf.reshape(activation_prime, shape=[batch_size, 1, 1, -1])
        return (pose, activation)
    elif method == 'DynamicRouting':
        B = tf.constant(
            np.zeros([batch_size, vote.shape[1].value, num_outputs, 1, 1],
                     dtype=np.float32))
        for r_iter in range(num_iter):
            with tf.variable_scope('iter_' + str(r_iter)):
                coef = softmax(B, axis=2)
                if r_iter == num_iter - 1:
                    s = reduce_sum(tf.multiply(coef, vote),
                                   axis=1,
                                   keepdims=True)
                    pose = squash(s)
                else:
                    s = reduce_sum(tf.multiply(coef, vote_stopped),
                                   axis=1,
                                   keepdims=True)
                    pose = squash(s)
                    shape = [batch_size, vote.shape[1].value, num_outputs
                             ] + out_caps_shape
                    pose = tf.multiply(pose, tf.constant(1., shape=shape))
                    B += tf.matmul(vote_stopped, pose, transpose_a=True)
        return (pose, activation)

    else:
        raise Exception('Invalid routing method!', method)
    def build_arch(self):
        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=9,
                                             stride=1,
                                             padding='VALID')
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsLayer(num_outputs=10,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            self.caps2 = digitCaps(caps1)
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx,
                                         shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    #masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                #self.masked_v = tf.concat(masked_v, axis=0)
            # assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)
                #self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)))
                self.v_length = tf.sqrt(
                    reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                    epsilon)
        with tf.variable_scope('acc'):
            self.labels = tf.to_int32(tf.argmax(self.Y, axis=1))
            correct_prediction = tf.equal(tf.to_int32(self.labels),
                                          self.argmax_idx)
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32)) * 100
Exemple #27
0
def routing(l_input, b_IJ, num_outputs=10, num_dims=16):
    """
    :param l_input:  A Tensor with [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1] shape,
                    num_caps_l是前一层输出的capsule的数量
    :param b_IJ:   A Tensor whth [batch_size,num_caps_l,num_caps_l_plus_1,1,1] shape,
                    代表两层的capsule的关系,是不是向量的方向?
    :param num_outputs: 本层输出的capsule的数量
    :param num_dims:    capsule的维度
    :return:
            A Tensor of shape [batch_size, num_caps_l,num_caps_l_plus_1, length(v_j)=16, 1]
            representing the vector output `v_j` in the layer l+1
    Notes:
        u_i represents the vector output of capsule i in the layer l
        v_j represents the vector output of capsule j in the layer l+1.

        矩阵相乘操作tf.matmul比较耗费时间,可以用一系列操作代替。[a,b]@[b,c]等同于以下操作:
        (1)[a,b]--->[a*c,b],用np.tile或tp.tile实现
        (2)[b,c]--->[b,c*a]--->转置成[c*a,b]
        (3)[a*c,b]*[c*a,b]
        (4)reduce_sum at axis = 1
        (5) reshape to [a,c]
    """
    input_shape = get_shape(l_input)
    W = tf.get_variable(
        'Weight',
        shape=[1, input_shape[1], num_dims * num_outputs] + input_shape[-2:],
        dtype=tf.float32,
        initializer=tf.random_normal_initializer(stddev=cfg.stddev))
    biases = tf.get_variable('bias', shape=(1, 1, num_outputs, num_dims, 1))
    l_input = tf.tile(l_input, [1, 1, num_dims * num_outputs, 1, 1])
    """
    W的形状是[1,1152,160,8,1],代表它要表达每张图片1152个输入capsule与160个输出capsule的向量值的关系
    input的形状是[128,1152,1,8,1],代表的是128张图片,每张图片输出1152个capsule,每个capsule的维数的长度是8
        input记录第l层的每个capsule的具体取值
    u_hat的形状是[128,1152,160,1,1]或者[128,1152,10,16,1],
        代表128张图片,每张图片中,第l层的每个capsule对应第l+1层的capsule的向量值,只记录第l层的capsule的个数,不记录取值
    """
    u_hat = reduce_sum(W * l_input, axis=3, keepdims=True)
    assert u_hat.get_shape() == [128, 1152, 160, 1, 1]

    u_hat = tf.reshape(u_hat,
                       shape=[-1, input_shape[1], num_outputs, num_dims, 1])
    assert u_hat.get_shape() == [128, 1152, 10, 16, 1]

    # assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1]

    # In forward, u_hat_stopped = u_hat; in backward, no gradient passed back from u_hat_stopped to u_hat
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    # line 3,for r iterations do
    for r_iter in range(cfg.iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            # line 4:
            # => [batch_size, 1152, 10, 1, 1]
            c_IJ = softmax(b_IJ, axis=2)

            # At last iteration, use `u_hat` in order to receive gradients from the following graph
            if r_iter == cfg.iter_routing - 1:
                # line 5:
                # weighting u_hat with c_IJ, element-wise in the last two dims
                # => [batch_size, 1152, 10, 16, 1]
                s_J = tf.multiply(c_IJ, u_hat)
                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                # assert s_J.get_shape() == [cfg.batch_size, 1, num_outputs, num_dims, 1]

                # line 6:
                # squash using Eq.1,
                v_J = squash(s_J)
                # assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
            elif r_iter < cfg.iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)

                # line 7:
                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 1152, 10, 16, 1]
                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                # batch_size dim, resulting in [1, 1152, 10, 1, 1]
                v_J_tiled = tf.tile(v_J, [1, input_shape[1], 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled,
                                         axis=3,
                                         keepdims=True)
                # assert u_produce_v.get_shape() == [cfg.batch_size, 1152, 10, 1, 1]

                # b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
                b_IJ += u_produce_v

    return (v_J)
Exemple #28
0
def routing(input, b_IJ):
    ''' The routing algorithm.

    Args:
        input: A Tensor with [batch_size, num_caps_l=128, 1, length(u_i)=8, 1]
               shape, num_caps_l meaning the number of capsule in the layer l.
    Returns:
        A Tensor of shape [batch_size, num_caps_l_plus_1, length(v_j)=16, 1]
        representing the vector output `v_j` in the layer l+1
    Notes:
        u_i represents the vector output of capsule i in the layer l, and
        v_j the vector output of capsule j in the layer l+1.
     '''

    # W: [1, num_caps_i, num_caps_j * len_v_j, len_u_j, 1]
    W = tf.get_variable('Weight', shape=(1, 128, 160, 8, 1), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=cfg.stddev))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))

    # Eq.2, calc u_hat
    # Since tf.matmul is a time-consuming op,
    # A better solution is using element-wise multiply, reduce_sum and reshape
    # ops instead. Matmul [a, b] x [b, c] is equal to a series ops as
    # element-wise multiply [a*c, b] * [a*c, b], reduce_sum at axis=1 and
    # reshape to [a, c]
    input = tf.tile(input, [1, 1, 160, 1, 1])
    #assert input.get_shape() == [cfg.batch_size, 128, 160, 8, 1]

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 128, 10, 16, 1])
    assert u_hat.get_shape() == [cfg.batch_size, 128, 10, 16, 1]

    # In forward, u_hat_stopped = u_hat; in backward, no gradient passed back from u_hat_stopped to u_hat
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    # line 3,for r iterations do
    for r_iter in range(cfg.iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            # line 4:
            # => [batch_size, 128, 10, 1, 1]
            c_IJ = softmax(b_IJ, axis=2)

            # At last iteration, use `u_hat` in order to receive gradients from the following graph
            if r_iter == cfg.iter_routing - 1:
                # line 5:
                # weighting u_hat with c_IJ, element-wise in the last two dims
                # => [batch_size, 128, 10, 16, 1]
                s_J = tf.multiply(c_IJ, u_hat)
                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]

                # line 6:
                # squash using Eq.1,
                v_J = squash(s_J)
                assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
            elif r_iter < cfg.iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)

                # line 7:
                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 128, 10, 16, 1]
                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                # batch_size dim, resulting in [1, 128, 10, 1, 1]
                v_J_tiled = tf.tile(v_J, [1, 128, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled, axis=3, keepdims=True)
                assert u_produce_v.get_shape() == [cfg.batch_size, 128, 10, 1, 1]

                # b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
                b_IJ += u_produce_v

    return(v_J)
Exemple #29
0
    def cnn(self):
        """CNN模型"""
        embedding_inputs = self.input_embedding()
        filter_sizes = [[1, 300], [2, 300], [3, 300], [5, 300]]
        global all_conv
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("cnn%s" % filter_size[0]):
                # filter_shape=[filter_size[0],cfg.embedding_dim,1,cfg.num_filters]
                filter_shape = [filter_size[0], cfg.embedding_dim, 1, filter_size[1]]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W')
                conv = tf.nn.conv2d(
                    embedding_inputs,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                conv = tf.reshape(conv, shape=[-1, filter_size[1], conv.shape[1], 1])
                if i == 0:
                    all_conv = conv
                else:
                    all_conv = tf.concat([all_conv, conv], axis=2)

        digitCaps = CapsLayer(num_outputs=cfg.num_classes, vec_len=cfg.vec_len, with_routing=True, layer_type='FC')
        self.caps2 = digitCaps(all_conv)
        print("self.caps2",self.caps2)

        # self.cap_flatten=tf.reshape(self.caps2,[-1,cfg.num_classes*cfg.vec_len])    #映射成一个 num_filters_total 维的特征向量
        # print("self.cap_flatten", self.cap_flatten.shape)

        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2),
                                               axis=2, keepdims=True) + epsilon)
            # print("self.v_length",self.v_length)
            # 计算 v 向量的模
            self.softmax_v = softmax(self.v_length, axis=1)
            # print("self.softmax_v",self.softmax_v)
            # 对每个低层胶囊i而言,所有权重cij的总和等于1。
            # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            # print("self.argmax_idx",self.argmax_idx)
            # 获取最佳的预测id
            # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size,))
            # print("self.argmax_idx",self.argmax_idx)
            # Method 1.
            if not cfg.mask_with_y:
                self.masked_v=tf.reshape(self.caps2,(-1,cfg.num_classes,cfg.vec_len))
                # # c). indexing
                # # It's not easy to understand the indexing process with argmax_idx
                # # as we are 3-dim animal
                # masked_v = []
                # for batch_size in range(cfg.batch_size):
                #     v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                #     # print("v",v)
                #     masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))
                #
                # self.masked_v = tf.concat(masked_v, axis=0)
                # # print("self.masked_v",self.masked_v )
                # assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.input_y, (-1, cfg.num_classes, 1)))
                '''
                请注意,它在训练时仅使用正确的DigitCap向量,忽略不正确的DigitCap,取出正确的DigitCap向量
                '''
                self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon)
                print("self.masked_v2", self.masked_v)
                # print("self.v_length2",self.v_length)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]

        with tf.name_scope("score"):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            self.logits = tf.layers.dense(vector_j, cfg.num_classes, name='fc2')
            # self.y_pred = tf.contrib.layers.fully_connected(vector_j,
            #                                                 num_outputs=cfg.num_classes,
            #                                                 activation_fn=tf.sigmoid)

            # 输出层,分类器
        # self.logits = tf.layers.dense(cur_layer, cfg.num_classes, name='fc2')
        self.logits_softmax = tf.nn.softmax(self.logits)
        # self.logits1 = tf.nn.local_response_normalization(self.logits,dim = 0)
        # print("self.logits", self.logits.shape)
        self.y_pred = tf.argmax(self.logits_softmax, 1)  # 预测类别
        # print("self.y_pred",self.y_pred.shape)

        with tf.name_scope("loss"):
            # 使用优化方式,损失函数,交叉熵
            # 1. The margin loss

            # [batch_size, 10, 1, 1]
            # max_l = max(0, m_plus-||v_c||)^2
            max_l = tf.square(tf.maximum(0., cfg.m_plus - self.v_length))
            # max_r = max(0, ||v_c||-m_minus)^2
            max_r = tf.square(tf.maximum(0., self.v_length - cfg.m_minus))
            '''
            当正确DigitCap预测正确标签的概率大于0.9时,损失函数为零,当概率小于0.9时,损失函数不为零。
            '''
            assert max_l.get_shape() == [cfg.batch_size, cfg.num_classes, 1, 1]

            # reshape: [batch_size, 10, 1, 1] => [batch_size, 10]
            max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1))
            max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1))

            # calc T_c: [batch_size, 10]
            # T_c = Y, is my understanding correct? Try it.
            T_c = self.input_y
            # [batch_size, 10], element-wise multiply
            L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r

            self.margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))

            # 2. The reconstruction loss
            # print("self.input_y", self.input_y)
            # orgin = tf.reshape(self.input_y, shape=(cfg.batch_size, -1))
            # print("self.y_pred",self.y_pred)
            # print("orgin",orgin)
            squared = tf.square(self.logits_softmax - self.input_y)
            self.reconstruction_err = tf.reduce_mean(squared)

            # 3. Total loss
            # The paper uses sum of squared error as reconstruction error, but we
            # have used reduce_mean in `# 2 The reconstruction loss` to calculate
            # mean squared error. In order to keep in line with the paper,the
            # regularization scale should be 0.0005*10=0.005
            self.loss = self.margin_loss + cfg.regularization_scale * self.reconstruction_err
            # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
            # self.loss = tf.reduce_mean(cross_entropy)
        with tf.name_scope("optimize"):
            # 优化器
            self.optim = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate).minimize(self.loss)
        with tf.name_scope("accuracy"):
            correct_pred = tf.equal(self.y_pred, tf.argmax(self.input_y, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_pred, "float"), name="accuracy")
Exemple #30
0
    def build_arch(self):

        with tf.variable_scope('Test'):
            self.testConst = tf.constant(1.0, name='testConst')

        with tf.variable_scope('Conv1_layer'):
            # Conv1, [batch_size, 20, 20, 256]
            print('shape of self x : ', self.X.shape)
            conv1 = tf.contrib.layers.conv2d(self.X,
                                             num_outputs=256,
                                             kernel_size=cfg.image_size - 19,
                                             stride=1,
                                             padding='VALID')
            print('shape asdf asdf: ', conv1.get_shape())
            assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256]

        # Primary Capsules layer, return [batch_size, 1152, 8, 1]
        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps = CapsLayer(num_outputs=32,
                                    vec_len=8,
                                    with_routing=False,
                                    layer_type='CONV')
            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)
            assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1]

        # DigitCaps layer, return [batch_size, 10, 16, 1]
        with tf.variable_scope('DigitCaps_layer'):
            digitCaps = CapsLayer(num_outputs=10,
                                  vec_len=16,
                                  with_routing=True,
                                  layer_type='FC')
            self.caps2 = digitCaps(caps1)

            #### ASDF ####
            self.v_J = digitCaps.v_J
            self.W = digitCaps.W
            self.b_IJ = digitCaps.b_IJ
            self.s_J = digitCaps.s_J
            self.c_IJ = digitCaps.c_IJ
            self.u_hat = digitCaps.u_hat
            self.biases = digitCaps.biases
            #### END ASDF ####

        # Decoder structure in Fig. 2
        # 1. Do masking, how:
        with tf.variable_scope('Masking'):
            # a). calc ||v_c||, then do softmax(||v_c||)
            # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1]
            self.v_length = tf.sqrt(
                reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                epsilon)
            self.softmax_v = softmax(self.v_length, axis=1)
            assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1]

            # b). pick out the index of max softmax val of the 10 caps
            # [batch_size, 10, 1, 1] => [batch_size] (index)
            self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1))
            assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1]
            self.argmax_idx = tf.reshape(self.argmax_idx,
                                         shape=(cfg.batch_size, ))

            # Method 1.
            if not cfg.mask_with_y:
                # c). indexing
                # It's not easy to understand the indexing process with argmax_idx
                # as we are 3-dim animal
                masked_v = []
                for batch_size in range(cfg.batch_size):
                    v = self.caps2[batch_size][self.argmax_idx[batch_size], :]
                    masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1)))

                self.masked_v = tf.concat(masked_v, axis=0)
                assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1]
            # Method 2. masking with true label, default mode
            else:
                # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)
                self.masked_v = tf.multiply(tf.squeeze(self.caps2),
                                            tf.reshape(self.Y, (-1, 10, 1)))
                self.v_length = tf.sqrt(
                    reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) +
                    epsilon)

        # 2. Reconstructe the MNIST images with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1))
            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)
            assert fc1.get_shape() == [cfg.batch_size, 512]
            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)
            assert fc2.get_shape() == [cfg.batch_size, 1024]
            self.decoded = tf.contrib.layers.fully_connected(
                fc2,
                num_outputs=cfg.image_size_flatten,
                activation_fn=tf.sigmoid)
Exemple #31
0
    def forward(self, b, f, mask=None):
        """
        :param b: Input feature for match (background) - known region.
        :param f: Input feature to match (foreground) - missing region.
        :param mask: Input mask for b, indicating patches not available.
        :return:
        """

        # get shapes
        f_shape_raw = list(f.size())  # batch_size * c * h * w
        b_shape_raw = list(b.size())  # batch_size * c * h * w

        kernel_size = 2 * self.rate

        # extract patches from background with stride, padding and dilation
        # raw_w is extracted for reconstruction
        raw_w = self.extract_patches(b,
                                     kernel_size,
                                     self.rate * self.stride,
                                     self.dilation,
                                     padding='valid')  # [batch_size, C*k*k, L]

        raw_w = raw_w.view(b_shape_raw[0], b_shape_raw[1], kernel_size,
                           kernel_size, -1)
        raw_w = raw_w.permute(0, 4, 1, 2,
                              3)  # b_weights shape: [batch_size, L, C, k, k]

        # tuple of tensors with size [L, C, k, k] with len = batch_size
        raw_w_groups = torch.split(raw_w, 1, dim=0)

        # downscaling foreground option: downscaling both foreground and
        # background for matching and use original background for reconstruction.
        f = F.interpolate(f, scale_factor=1. / self.rate, mode='nearest')
        b = F.interpolate(b, scale_factor=1. / self.rate, mode='nearest')

        f_shape = list(f.size())  # b*c*h*w
        b_shape = list(b.size())

        # split tensors along the batch dimension
        # tuple of tensors with size [C, h, w] with len = batch_size
        f_groups = torch.split(
            f, 1, dim=0)  # split tensors along the batch dimension

        # w shape: [N, C*k*k, L]
        w = self.extract_patches(b, self.ksize, self.stride, 1, padding='same')

        # w shape: [N, C, k, k, L]
        w = w.view(b_shape[0], b_shape[1], self.ksize, self.ksize, -1)
        w = w.permute(0, 4, 1, 2, 3)  # w shape: [N, L, C, k, k]
        w_groups = torch.split(w, 1, dim=0)

        if mask is None:
            mask = torch.zeros(f_shape[0], 1, f_shape[2], f_shape[3])
            if self.device is not None:
                mask = mask.to(self.device)
        else:
            mask_scale = mask.size()[3] // f_shape[3]

            # downscale to match f shape
            mask = F.interpolate(mask,
                                 scale_factor=1 / mask_scale,
                                 mode='nearest')
            # mask = F.avg_pool2d(mask, kernel_size=4, stride=mask_scale)

        m_shape = list(mask.size())  # c * h * w
        m = self.extract_patches(mask,
                                 self.ksize,
                                 self.stride,
                                 1,
                                 padding='same')  # [batch_size, k*k, L]

        m = m.view(m_shape[0], m_shape[1], self.ksize, self.ksize,
                   -1)  # [batch_size, 1, k, k, L]
        m = m.permute(0, 4, 1, 2, 3)  # m shape: [batch_size, L, C, k, k]
        # m = m[0]  # m shape: [L, C, k, k]

        # 0 for patches where all values are 0
        # 1 for patches with non-zero mean
        # mm shape: [batch_size, L, 1, 1, 1]

        mm = (reduce_mean(m, axis=[2, 3, 4],
                          keepdim=True) == 1.).to(torch.float32)
        # mm shape: [batch_size, 1, L, 1, 1]
        mm = mm.permute(0, 2, 1, 3, 4)

        y = []
        offsets = []
        k = self.fuse_k
        scale = self.softmax_scale  # to fit the PyTorch tensor image value range
        # Diagonal matrix with shape k * k
        fuse_weight = torch.eye(k).view(1, 1, k, k)  # 1*1*k*k
        if self.device:
            fuse_weight = fuse_weight.to(self.device)
        EPS = torch.FloatTensor([1e-4]).to(self.device)
        for xi, wi, raw_wi, mi in zip(f_groups, w_groups, raw_w_groups, mm):
            """
            O => output channel as a conv filter
            I => input channel as a conv filter
            xi : separated tensor along batch dimension of front; (B=1, C=128, H=32, W=32)
            wi : separated patch tensor along batch dimension of back; (B=1, O=32*32, I=128, KH=3, KW=3)
            raw_wi : separated tensor along batch dimension of back; (B=1, I=32*32, O=128, KH=4, KW=4)
            """
            # Normalizing weight tensor

            wi = wi.squeeze(0)
            wi_norm = torch.sqrt(
                reduce_sum(torch.pow(wi, 2) + EPS,
                           axis=[1, 2, 3],
                           keepdim=True))
            wi_normed = wi / wi_norm

            # xi shape: [1, C, H, W], yi shape: [1, L, H, W]
            xi_pad = same_padding(xi.shape[0], xi.shape[1],
                                  [self.ksize, self.ksize], [1, 1], [1, 1])
            yi = F.conv2d(xi, wi_normed, stride=1,
                          padding=xi_pad)  # [1, L, H, W]

            # conv implementation for fuse scores to encourage large patches
            if self.fuse:
                # make all of depth to spatial resolution
                # Convolution with diagonal shaped kernel №1
                yi = yi.view(1, 1, b_shape[2] * b_shape[3], f_shape[2] *
                             f_shape[3])  # (B=1, I=1, H=32*32, W=32*32)
                yi_pad = same_padding(yi.shape[0], yi.shape[1], [k, k], [1, 1],
                                      [1, 1])
                yi = F.conv2d(yi, fuse_weight, stride=1,
                              padding=yi_pad)  # (B=1, C=1, H=32*32, W=32*32)

                # Convolution with diagonal shaped kernel №2
                yi = yi.contiguous().view(1, b_shape[2], b_shape[3],
                                          f_shape[2],
                                          f_shape[3])  # (B=1, 32, 32, 32, 32)
                yi = yi.permute(0, 2, 1, 4, 3)
                yi = yi.contiguous().view(1, 1, b_shape[2] * b_shape[3],
                                          f_shape[2] * f_shape[3])
                yi_pad = same_padding(yi.shape[0], yi.shape[1], [k, k], [1, 1],
                                      [1, 1])
                yi = F.conv2d(yi, fuse_weight, stride=1, padding=yi_pad)

                yi = yi.contiguous().view(1, b_shape[3], b_shape[2],
                                          f_shape[3], f_shape[2])
                yi = yi.permute(0, 2, 1, 4, 3).contiguous()

            yi = yi.view(1, b_shape[2] * b_shape[3], f_shape[2],
                         f_shape[3])  # (B=1, C=32*32, H=32, W=32)
            # softmax to match
            yi = yi * mi
            yi = F.softmax(yi * scale, dim=1)
            yi = yi * mi  # [1, L, H, W]
            offset = torch.argmax(yi, dim=1, keepdim=True)  # 1*1*H*W
            if b_shape != f_shape:
                # Normalize the offset value to match foreground dimension
                times = float(f_shape[2] * f_shape[3]) / float(
                    b_shape[2] * b_shape[3])
                offset = ((offset + 1).float() * times - 1).to(torch.int64)
            offset = torch.cat([offset // f_shape[3], offset % b_shape[3]],
                               dim=1)  # 1*2*H*W
            # deconv for patch pasting
            wi_center = raw_wi[0]

            # yi = F.pad(yi, [0, 1, 0, 1])    # here may need conv_transpose same padding
            yi = F.conv_transpose2d(yi, wi_center, stride=self.rate,
                                    padding=1) / 4.  # (B=1, C=128, H=64, W=64)
            y.append(yi)
            offsets.append(offset)

        y = torch.cat(y, dim=0)  # back to the mini-batch
        y.contiguous().view(f_shape_raw)

        offsets = torch.cat(offsets, dim=0)
        offsets = offsets.view(f_shape[0], 2, *f_shape[2:])

        # case1: visualize optical flow: minus current position
        h_add = torch.arange(f_shape[2]).view([1, 1, f_shape[2], 1]).expand(
            f_shape[0], -1, -1, f_shape[3])
        w_add = torch.arange(f_shape[3]).view([1, 1, 1, f_shape[3]]).expand(
            f_shape[0], -1, f_shape[2], -1)
        ref_coordinate = torch.cat([h_add, w_add], dim=1)
        ref_coordinate = ref_coordinate.to(self.device)

        offsets = offsets - ref_coordinate
        flow = torch.from_numpy(
            self.flow_to_image(offsets.permute(0, 2, 3,
                                               1).cpu().data.numpy())) / 255.
        flow = flow.permute(0, 3, 1, 2)
        flow = flow.to(self.device)

        if self.rate != 1:
            flow = F.interpolate(flow,
                                 scale_factor=self.rate * 4,
                                 mode='nearest')

        return y, flow