Example #1
0
 def f_fc(a, b, weight, bias):
     x = a * b
     fc = nd.FullyConnected(x, weight, bias, num_hidden=32)
     return fc
    def backward(self):
        """Run backward on the current executor."""
        #self.curr_execgrp.backward()
        # softmax
        self.get_each_gpu_label()

        self.logit = nd.exp(self.fc_output)[:]
        self.logit /= self.global_sum_fc.reshape((self.batchsize, 1))[:]
        self.grad[:] = self.logit[:]  #.copy() #[:] #.copy()
        #assert self.data_of_cur_gpu.size > 0
        if self.data_of_cur_gpu.size > 0:
            self.grad[self.data_of_cur_gpu, self.label_of_cur_gpu] -= 1.0
            self.loss[self.data_of_cur_gpu] = -nd.log(
                nd.maximum(
                    self.logit[self.data_of_cur_gpu, self.label_of_cur_gpu],
                    1e-32))[:]
        else:
            #print(self.data_of_cur_gpu)
            pass

        # margin
        if self.data_of_cur_gpu.size > 0:
            grad_fc = self.pick_fc_of_cur_gpu
            grad_fc.attach_grad()
            with autograd.record():
                s = self.margin_loss(grad_fc)
            s.backward(self.grad[self.data_of_cur_gpu, self.label_of_cur_gpu])
            self.grad[
                self.data_of_cur_gpu,
                self.label_of_cur_gpu] = grad_fc.grad.copy()  #[:] #.copy()
        self.pick_fc_of_cur_gpu = None

        # fc
        self.data_batch.attach_grad()
        #self.weight.attach_grad()
        self.weight_norm.attach_grad()
        self.bias.attach_grad()
        with autograd.record():
            no_bias = True
            if no_bias:
                nd.FullyConnected(data=self.data_batch,
                                  weight=self.weight_norm,
                                  no_bias=True,
                                  num_hidden=self.classes,
                                  out=self.fc_output)
            else:
                nd.FullyConnected(data=self.data_batch,
                                  weight=self.weight_norm,
                                  bias=self.bias,
                                  num_hidden=self.classes,
                                  out=self.fc_output)
        self.fc_output.backward(self.grad)
        self.return_feature_grad = self.data_batch.grad.copy(
        )  #[:] #.copy() #[:] #.copy()

        #self.weight_grad += self.weight.grad
        self.weight_temp_grad[:] = self.weight_norm.grad[:]
        #self.bias_grad += self.bias.grad

        # allreduce grad
        self.return_feature_grad = self.allreduce('return_feature_grad',
                                                  self.return_feature_grad)
        assert len(self.return_feature_grad), "rank:{}, grad".format(self.rank)
        #print('all feature grad:', self.return_feature_grad)
        self.return_each_gpu_grad = self.return_feature_grad[
            self.each_gpu_batchsize * self.rank:self.each_gpu_batchsize *
            (self.rank + 1)]

        # l2-norm
        self.weight.attach_grad()
        with autograd.record():
            s2 = nd.L2Normalization(self.weight, mode='instance')
        s2.backward(self.weight_temp_grad)  #weight_grad)
        self.weight_grad += self.weight.grad
Example #3
0
    def network(
        X,
        drop_rate=0.0
    ):  # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        # builtin The BatchNorm function moving_mean, moving_var does not work.
        C_H1 = nd.Activation(
            data=nd.BatchNorm(data=nd.Convolution(data=X,
                                                  weight=W1,
                                                  bias=B1,
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  num_filter=60),
                              gamma=gamma1,
                              beta=beta1,
                              moving_mean=ma1,
                              moving_var=mv1,
                              momentum=0.9,
                              fix_gamma=False,
                              use_global_stats=True),
            act_type="relu"
        )  # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30)
        P_H1 = nd.Pooling(
            data=C_H1, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2 = nd.Activation(
            data=nd.BatchNorm(data=nd.Convolution(data=P_H1,
                                                  weight=W2,
                                                  bias=B2,
                                                  kernel=(6, 6),
                                                  stride=(1, 1),
                                                  num_filter=30),
                              gamma=gamma2,
                              beta=beta2,
                              moving_mean=ma2,
                              moving_var=mv2,
                              momentum=0.9,
                              fix_gamma=False,
                              use_global_stats=True),
            act_type="relu"
        )  # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2 = nd.Pooling(
            data=C_H2, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)
        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected(
            data=P_H2, weight=W3, bias=B3, num_hidden=120),
                                          gamma=gamma3,
                                          beta=beta3,
                                          moving_mean=ma3,
                                          moving_var=mv3,
                                          momentum=0.9,
                                          fix_gamma=False,
                                          use_global_stats=True),
                             act_type="relu")
        F_H1 = nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected(
            data=F_H1, weight=W4, bias=B4, num_hidden=64),
                                          gamma=gamma4,
                                          beta=beta4,
                                          moving_mean=ma4,
                                          moving_var=mv4,
                                          momentum=0.9,
                                          fix_gamma=False,
                                          use_global_stats=True),
                             act_type="relu")
        F_H2 = nd.Dropout(data=F_H2, p=drop_rate)
        #softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        out = nd.FullyConnected(data=F_H2, weight=W5, bias=B5, num_hidden=10)
        return out
Example #4
0
    def network(
        X,
        is_training=True,
        drop_rate=0.0
    ):  # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        C_H1 = nd.Activation(
            data=BatchNorm(nd.Convolution(data=X,
                                          weight=W1,
                                          bias=B1,
                                          kernel=(3, 3),
                                          stride=(1, 1),
                                          num_filter=60),
                           gamma1,
                           beta1,
                           scope_name=0,
                           is_training=is_training),
            act_type="relu"
        )  # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30)
        P_H1 = nd.Pooling(
            data=C_H1, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2 = nd.Activation(
            data=BatchNorm(nd.Convolution(data=P_H1,
                                          weight=W2,
                                          bias=B2,
                                          kernel=(6, 6),
                                          stride=(1, 1),
                                          num_filter=30),
                           gamma2,
                           beta2,
                           scope_name=1,
                           is_training=is_training),
            act_type="relu"
        )  # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2 = nd.Pooling(
            data=C_H2, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)
        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 = nd.Activation(BatchNorm(nd.FullyConnected(data=P_H2,
                                                         weight=W3,
                                                         bias=B3,
                                                         num_hidden=120),
                                       gamma3,
                                       beta3,
                                       scope_name=2,
                                       is_training=is_training),
                             act_type="relu")
        F_H1 = nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 = nd.Activation(BatchNorm(nd.FullyConnected(data=F_H1,
                                                         weight=W4,
                                                         bias=B4,
                                                         num_hidden=64),
                                       gamma4,
                                       beta4,
                                       scope_name=3,
                                       is_training=is_training),
                             act_type="relu")
        F_H2 = nd.Dropout(data=F_H2, p=drop_rate)
        softmax_Y = nd.softmax(
            nd.FullyConnected(data=F_H2, weight=W5, bias=B5, num_hidden=10))
        return softmax_Y
Example #5
0
import numpy as np