Esempio n. 1
0
 def __init__(self, class_num):
     self.train = True
     self.loss = 0
     self.conv1 = ConvolutionLayer(kernel=(3, 3),
                                   channels=32,
                                   stride=(1, 1),
                                   padding=(0, 0))
     self.conv2 = ConvolutionLayer(kernel=(3, 3),
                                   channels=64,
                                   stride=(2, 2),
                                   padding=(1, 1))
     self.conv3 = ConvolutionLayer(kernel=(3, 3),
                                   channels=64,
                                   stride=(1, 1),
                                   padding=(0, 0))
     self.conv4 = ConvolutionLayer(kernel=(3, 3),
                                   channels=128,
                                   stride=(2, 2),
                                   padding=(1, 1))
     self.conv5 = ConvolutionLayer(kernel=(3, 3),
                                   channels=128,
                                   stride=(1, 1),
                                   padding=(0, 0))
     self.fc1 = FullyConnectLayer(channels=(3200, 100))
     self.fc2 = FullyConnectLayer(channels=(100, class_num))
     self.bn1 = BatchNormalization2d(32)
     self.bn2 = BatchNormalization2d(64)
     self.bn3 = BatchNormalization2d(128)
     self.dropout1 = DropOut()
Esempio n. 2
0
    def __init__(self, input_dim, features_nonezero, adj,hidden1=32, embedding_dim=16):
        super(GAE, self).__init__()

        self.input_dim = input_dim
        self.hidden1 = hidden1
        self.embedding_dim = embedding_dim
        self.features_nonezero = features_nonezero
        indices= np.array(adj[0])
        values = np.array(adj[1])
        dense_shape = np.array(adj[2])
        sparse_adj = tf.SparseTensor(indices = indices,
                                        values = values,
                                        dense_shape = dense_shape)
        self.adj = tf.cast(sparse_adj, dtype=tf.float32)
        # GAE encoder with 1 sparseconvolution layer, 1 convolution layers
        self.conv1 = ConvolutionSparseLayer(self.input_dim,
                                            self.hidden1,
                                            self.adj,
                                            self.features_nonezero, 
                                            dropout=0, activation='relu',
                                            input_shape=(None,self.input_dim))
        self.conv2 = ConvolutionLayer(self.hidden1,
                                        self.embedding_dim,
                                        self.adj,
                                        dropout=0, activation='relu')
        
        # GAE decoder
        self.reconstruct = InnerProductDecoder(self.embedding_dim,dropout=0,act=tf.nn.sigmoid)
Esempio n. 3
0
    def get_network(self):
        self._read_config()

        input_layer = None
        layers = []

        prev_layer = None
        for data in self._layers:
            if data["type"] == "input":
                input_size = self._input_size * self._input_size
                output_size = int(data["output_size"])
                layer = InputLayer(input_size, output_size)
            elif data["type"] == "dense":
                if "output_size" in data:
                    output_size = int(data["output_size"])
                else:
                    output_size = self._output_size
                activation_function_str = data["af"]
                activation_function = self._lookup_activation_function(
                    activation_function_str)
                activation_function_d = self._lookup_activation_function_d(
                    activation_function_str)
                learning_rate = float(data["la"])
                layer = DenseLayer(prev_layer.get_output_shape(), output_size,
                                   activation_function, activation_function_d,
                                   learning_rate)
            elif data["type"] == "convolution":
                if prev_layer == None:
                    input_shape = (self._input_size, self._input_size, 1)
                else:
                    input_shape = prev_layer.get_output_shape()
                kernel_n = int(data["kernel_n"])
                kernel_m = int(data["kernel_m"])
                channels_out = int(data["channels"])
                output_shape = (kernel_n, kernel_m, channels_out)
                v_stride = int(data["stride_n"])
                h_stride = int(data["stride_m"])
                padding = int(data["padding"])
                la = float(data["la"])
                layer = ConvolutionLayer(input_shape, output_shape, h_stride,
                                         v_stride, padding, la)
            if input_layer == None:
                input_layer = layer
            else:
                layers.append(layer)
            prev_layer = layer

        network = Network(input_layer, layers)
        return network
Esempio n. 4
0
    def __init__(self, layers, decay=0.001, learning_rate=0.01):
        mapping = {
            "input": lambda x: InputLayer(x),
            "fc": lambda x: FullyConnectedLayer(x),
            "convolution": lambda x: ConvolutionLayer(x),
            "pool": lambda x: PoolingLayer(x),
            "squaredloss": lambda x: SquaredLossLayer(x),
            "softmax": lambda x: SoftmaxLossLayer(x),
            "relu": lambda x: ReLuLayer(x),
        }

        self.layers = []
        self.decay = decay

        self.learning_rate = learning_rate
        prev_layer = None

        for layer in layers:
            layer["input_shape"] = layer.get("input_shape",
                                             None) or prev_layer.output_shape
            layer["decay"] = self.decay
            layer = mapping[layer["type"]](layer)
            self.layers.append(layer)
            prev_layer = layer
Esempio n. 5
0
    def __init__(self,
                 input_size=(1, 28, 28),
                 activation_type=ActivationType.ReLU,
                 hidden_size=50,
                 output_size=10):

        # basic paramters
        self.__activation_type__ = activation_type
        self.params = {}
        self.layers = []

        # set layers
        channel_num = input_size[0]
        for i, param in enumerate([
            {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 2,
                'stride': 1
            },
            {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'pre_node_num': 64 * 4 * 4,
                'next_node_num': hidden_size
            },
            {
                'pre_node_num': hidden_size,
                'next_node_num': output_size
            },
        ]):

            # layer 1 ~ 6 Convolution Layer & ReLU Layer
            if i + 1 in range(1, 7):
                # create convolution layer
                convolution_layer = ConvolutionLayer(
                    index=i + 1,
                    activation_type=self.__activation_type__,
                    filter_num=param['filter_num'],
                    channel_num=channel_num,
                    filter_size=param['filter_size'],
                    stride=param['stride'],
                    padding=param['pad'])
                self.layers.append(convolution_layer)
                self.layers.append(
                    self.activationLayerFromType(activation_type, index=i + 1))
                # layer 2, 4, 6 Pooling Layer
                if i + 1 in (2, 4, 6):
                    self.layers.append(
                        PoolingLayer(index=i + 1, pool_h=2, pool_w=2,
                                     stride=2))
                # update next channel num
                channel_num = convolution_layer.filter_num
                layer = convolution_layer

            # layer 7, 8 Hidden Layer & ReLU Layer & Dropout Layer
            if i + 1 in (7, 8):
                hidden_layer = HiddenLayer(
                    index=i + 1,
                    activation_type=self.__activation_type__,
                    pre_node_num=param['pre_node_num'],
                    next_node_num=param['next_node_num'])
                self.layers.append(hidden_layer)
                if i + 1 == 7:
                    self.layers.append(
                        self.activationLayerFromType(activation_type,
                                                     index=i + 1))
                self.layers.append(DropoutLayer(index=i + 1,
                                                dropout_ratio=0.5))
                layer = hidden_layer

            # set W,b
            self.params['W{}'.format(i + 1)] = layer.W
            self.params['b{}'.format(i + 1)] = layer.b

            print('layer {} created'.format(i + 1))

            if Config.IS_DEBUG:
                print('W{} shape : {}'.format(
                    i + 1, self.params['W{}'.format(i + 1)].shape))
                print('b{} shape : {}'.format(
                    i + 1, self.params['W{}'.format(i + 1)].shape))

        # output created layer structures
        for layer in self.layers:
            print(layer.name)

        # keep weight required layer indexes
        self.weight_layer_indexes = []
        for j, layer in enumerate(self.layers):
            if isinstance(layer, (ConvolutionLayer, HiddenLayer)):
                self.weight_layer_indexes.append(j)
        self.debug('weight_layer_indexes {}'.format(self.weight_layer_indexes))

        print('{} layers created'.format(len(self.layers)))
        # last layer SoftmaxWithLoss Layer
        self.lastLayer = SoftmaxWithLossLayer()
Esempio n. 6
0
    def __init__(self,
                 input_size=(1, 28, 28),
                 activation_type=ActivationType.ReLU,
                 filter_num=30,
                 filter_size=5,
                 filter_padding=0,
                 filter_stride=1,
                 hidden_size=100,
                 output_size=10):

        self.__activation_type__ = activation_type

        convolution_output_size =\
            (input_size[1] - filter_size + 2*filter_padding) /\
            filter_stride + 1
        pooling_output_size =\
            int(filter_num *
                (convolution_output_size / 2) * (convolution_output_size / 2))

        convolution_layer_1 = ConvolutionLayer(
            index=1,
            activation_type=self.__activation_type__,
            filter_num=filter_num,
            channel_num=input_size[0],
            filter_size=filter_size)

        # set params
        self.params = {}
        hidden_layer_1 = HiddenLayer(index=1,
                                     activation_type=self.__activation_type__,
                                     pre_node_num=pooling_output_size,
                                     next_node_num=hidden_size)
        hidden_layer_2 = HiddenLayer(index=2,
                                     activation_type=self.__activation_type__,
                                     pre_node_num=hidden_size,
                                     next_node_num=output_size)

        self.params['W1'] = convolution_layer_1.W
        self.params['b1'] = convolution_layer_1.b
        self.params['W2'] = hidden_layer_1.W
        self.params['b2'] = hidden_layer_1.b
        self.params['W3'] = hidden_layer_2.W
        self.params['b3'] = hidden_layer_2.b

        # save init weights
        self.init_weights = []
        self.init_weights.append(self.params['W1'])
        self.init_weights.append(self.params['W2'])
        self.init_weights.append(self.params['W3'])

        # set layers
        self.layers = OrderedDict()
        self.layers['Convolution1'] = convolution_layer_1
        self.layers['ReLU1'] = self.activationLayer(index=1)
        self.layers['Pooling1'] = PoolingLayer(index=1,
                                               pool_h=2,
                                               pool_w=2,
                                               stride=2)
        self.layers['Hidden1'] = hidden_layer_1
        self.layers['ReLU2'] = self.activationLayer(index=2)
        self.layers['Hidden2'] = hidden_layer_2
        self.lastLayer = SoftmaxWithLossLayer()
Esempio n. 7
0
    def train(self, train_data, dev_data, test_data, maxlen):
        # tr = tracker.SummaryTracker()
        rng = np.random.RandomState(3435)
        train_x, train_y = self.shared_dataset(train_data)
        dev_x, dev_y = self.shared_dataset(dev_data)
        test_x, test_y = self.shared_dataset(test_data)
        test_len = len(test_data[0])
        n_train_batches = len(train_data[0]) // self.batch_size
        n_val_batches = len(dev_data[0]) // self.batch_size
        n_test_batches = test_len // self.batch_size
        input_width = self.hidden_sizes[0]
        x = T.matrix('x')
        y = T.ivector('y')
        index = T.lscalar()
        Words = theano.shared(value=self.word_vectors,
                              name="Words",
                              borrow=True)
        layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
            (self.batch_size, maxlen, input_width))
        lstm = LSTM(dim=input_width,
                    batch_size=self.batch_size,
                    number_step=maxlen,
                    params=self.lstm_params)
        leyer0_output = lstm.feed_foward(layer0_input)
        conv_outputs = list()
        conv_nnets = list()
        params = list()
        output = T.cast(layer0_input.flatten(), dtype=floatX)
        conv_input = output.reshape((self.batch_size, 1, maxlen, input_width))
        for fter in self.filter_sizes:
            pheight = maxlen - fter + 1
            conv = ConvolutionLayer(rng=rng,
                                    filter_shape=(self.kernel, 1, fter,
                                                  input_width),
                                    input_shape=(self.batch_size, 1, maxlen,
                                                 input_width),
                                    poolsize=(pheight, 1),
                                    name="conv" + str(fter))
            #=>batch size * 1 * 100 * width
            output = conv.predict(conv_input)
            layer1_input = output.flatten(2)
            params += conv.params
            conv_outputs.append(layer1_input)
            conv_nnets.append(conv)
        conv_nnets_output = T.concatenate(conv_outputs, axis=1)
        # lstm.mean_pooling_input(leyer0_output)
        hidden_layer = HiddenLayer(
            rng,
            hidden_sizes=[self.kernel * 3, self.hidden_sizes[0]],
            input_vectors=conv_nnets_output,
            activation=utils.Tanh,
            name="Hidden_Tanh")
        hidden_layer.predict()
        hidden_layer_relu = HiddenLayer(
            rng,
            hidden_sizes=[self.hidden_sizes[0], self.hidden_sizes[0]],
            input_vectors=hidden_layer.output)
        hidden_layer_relu.predict()
        # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
        full_connect = FullConnectLayer(
            rng,
            layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]],
            input_vector=hidden_layer_relu.output)
        full_connect.predict()

        cost = full_connect.negative_log_likelihood(y)
        params += hidden_layer.params + hidden_layer_relu.params + full_connect.params
        # params = hidden_layer.params + hidden_layer_relu.params + full_connect.params
        params_length = len(params)
        #init value for e_grad time 0, e_delta time 0 and delta at time 0
        e_grad, e_delta_prev, delta = self.init_hyper_values(params_length)
        # e_grad_d, e_delta_prev_d, delta_d = self.init_hyper_values(params_length, name="D")
        #apply gradient
        grads = T.grad(cost, params)
        #dropout hidden layer
        # hidden_layer_dropout.dropout()
        # hidden_layer_dropout.predict()
        # full_connect.setInput(hidden_layer_dropout.output)
        # full_connect.predict()
        # cost_d = full_connect.negative_log_likelihood(y)
        #apply gradient to cost_d
        e_grad, e_delta_prev, delta = self.adadelta(grads, e_grad,
                                                    e_delta_prev)
        # e_grad_d, e_delta_prev_d, delta_d = self.adadelta(grads_d, e_grad_d, e_delta_prev_d, delta_d)
        # grads_d = T.grad(cost_d, params)
        grads = delta
        # grad_d = delta_d
        updates = [(p, p - d) for p, d in zip(params, grads)]
        # updates = [(p, p - d - d_) for p, d, d_ in zip(params, grads, grads_d)]
        # updates = [(p, p - self.learning_rate * d) for p, d in zip(params, grads)]
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x:
                train_x[(index * self.batch_size):((index + 1) *
                                                   self.batch_size)],
                y:
                train_y[(index * self.batch_size):((index + 1) *
                                                   self.batch_size)]
            })
        val_model = theano.function(
            [index],
            full_connect.errors(y),
            givens={
                x:
                dev_x[index * self.batch_size:(index + 1) * self.batch_size],
                y:
                dev_y[index * self.batch_size:(index + 1) * self.batch_size],
            })
        test_model = theano.function(
            inputs=[index],
            outputs=full_connect.errors(y),
            givens={
                x:
                test_x[index * self.batch_size:(index + 1) * self.batch_size],
                y:
                test_y[index * self.batch_size:(index + 1) * self.batch_size]
            })
        validation_frequency = min(n_train_batches, self.patience // 2)
        val_batch_lost = 1.
        best_batch_lost = 1.
        best_test_lost = 1.
        stop_count = 0
        epoch = 0
        done_loop = False
        current_time_step = 0
        improve_threshold = 0.995
        iter_list = range(n_train_batches)
        while (epoch < self.epochs and done_loop is not True):
            epoch_cost_train = 0.
            epoch += 1
            batch_train = 0
            print("Start epoch: %i" % epoch)
            start = time.time()
            random.shuffle(iter_list)
            for mini_batch, m_b_i in zip(iter_list, xrange(n_train_batches)):
                current_time_step = (epoch - 1) * n_train_batches + m_b_i
                epoch_cost_train += train_model(mini_batch)
                batch_train += 1
                if (current_time_step + 1) % validation_frequency == 0:
                    val_losses = [val_model(i) for i in xrange(n_val_batches)]
                    val_losses = np.array(val_losses)
                    val_batch_lost = np.mean(val_losses)
                    if val_batch_lost < best_batch_lost:
                        if best_batch_lost * improve_threshold > val_batch_lost:
                            self.patience = max(
                                self.patience,
                                current_time_step * self.patience_frq)
                            best_batch_lost = val_batch_lost
                            # test it on the test set
                            test_losses = [
                                test_model(i) for i in range(n_test_batches)
                            ]
                            current_test_lost = np.mean(test_losses)
                            print((
                                'epoch %i minibatch %i test accuracy of %i example is: %.5f'
                            ) % (epoch, m_b_i, test_len,
                                 (1 - current_test_lost) * 100.))
                            if best_test_lost > current_test_lost:
                                best_test_lost = current_test_lost
                if self.patience <= current_time_step:
                    print(self.patience)
                    done_loop = True
                    break
            print('epoch: %i, training time: %.2f secs; with avg cost: %.5f' %
                  (epoch, time.time() - start, epoch_cost_train / batch_train))
        print('Best test accuracy is: %.5f' % (1 - best_test_lost))
        utils.save_layer_params(lstm, 'lstm_cb')
        utils.save_layer_params(hidden_layer, 'hidden_cb')
        utils.save_layer_params(hidden_layer_relu, 'hidden_relu_cb')
        utils.save_layer_params(full_connect, 'full_connect_cb')
        for index, conv in enumerate(conv_nnets):
            utils.save_layer_params(conv, 'convolution_%s' % index)
Esempio n. 8
0
    def build_test_model(self, data):
        rng = np.random.RandomState(3435)
        lstm_params, hidden_params, hidden_relu_params, full_connect_params, convs = self.load_trained_params(
        )
        data_x, data_y, maxlen = data
        test_len = len(data_x)
        n_test_batches = test_len // self.batch_size
        x = T.matrix('x')
        y = T.ivector('y')
        index = T.lscalar()
        Words = theano.shared(value=self.word_vectors,
                              name="Words",
                              borrow=True)
        input_width = self.hidden_sizes[0]
        layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
            (self.batch_size, maxlen, input_width))
        lstm = LSTM(dim=input_width,
                    batch_size=self.batch_size,
                    number_step=maxlen,
                    params=lstm_params)
        leyer0_output = lstm.feed_foward(layer0_input)
        conv_outputs = list()
        conv_nnets = list()
        params = list()
        output = T.cast(layer0_input.flatten(), dtype=floatX)
        conv_input = output.reshape((self.batch_size, 1, maxlen, input_width))
        for it, p_conv in enumerate(convs):
            pheight = maxlen - self.filter_sizes[it] + 1
            conv = ConvolutionLayer(rng=rng,
                                    filter_shape=(self.kernel, 1,
                                                  self.filter_sizes[it],
                                                  input_width),
                                    input_shape=(self.batch_size, 1, maxlen,
                                                 input_width),
                                    poolsize=(pheight, 1),
                                    name="conv" + str(self.filter_sizes[it]),
                                    W=p_conv[0],
                                    b=p_conv[1])
            #=>batch size * 1 * 100 * width
            output = conv.predict(conv_input)
            layer1_input = output.flatten(2)
            params += conv.params
            conv_outputs.append(layer1_input)
            conv_nnets.append(conv)
        conv_nnets_output = T.concatenate(conv_outputs, axis=1)
        hidden_layer = HiddenLayer(
            rng,
            hidden_sizes=[self.kernel * 3, self.hidden_sizes[0]],
            input_vectors=conv_nnets_output,
            activation=utils.Tanh,
            name="Hidden_Tanh",
            W=hidden_params[0],
            b=hidden_params[1])
        hidden_layer.predict()
        hidden_layer_relu = HiddenLayer(
            rng,
            hidden_sizes=[self.hidden_sizes[0], self.hidden_sizes[0]],
            input_vectors=hidden_layer.output,
            W=hidden_relu_params[0],
            b=hidden_relu_params[1])
        hidden_layer_relu.predict()
        # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
        full_connect = FullConnectLayer(
            rng,
            layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]],
            input_vector=hidden_layer_relu.output,
            W=full_connect_params[0],
            b=full_connect_params[1])
        full_connect.predict()
        test_data_x = theano.shared(np.asarray(data_x, dtype=floatX),
                                    borrow=True)
        test_data_y = theano.shared(np.asarray(data_y, dtype='int32'),
                                    borrow=True)

        errors = 0.
        if test_len == 1:
            test_model = theano.function(
                [index],
                outputs=full_connect.get_predict(),
                on_unused_input='ignore',
                givens={
                    x:
                    test_data_x[index * self.batch_size:(index + 1) *
                                self.batch_size],
                    y:
                    test_data_y[index * self.batch_size:(index + 1) *
                                self.batch_size]
                })
            index = 0
            avg_errors = test_model(index)
        else:
            test_model = theano.function(
                [index],
                outputs=full_connect.errors(y),
                givens={
                    x:
                    test_data_x[index * self.batch_size:(index + 1) *
                                self.batch_size],
                    y:
                    test_data_y[index * self.batch_size:(index + 1) *
                                self.batch_size]
                })
            for i in xrange(n_test_batches):
                errors += test_model(i)
            avg_errors = errors / n_test_batches
        return avg_errors
Esempio n. 9
0
class CNN:
    def __init__(self, class_num):
        self.train = True
        self.loss = 0
        self.conv1 = ConvolutionLayer(kernel=(3, 3),
                                      channels=32,
                                      stride=(1, 1),
                                      padding=(0, 0))
        self.conv2 = ConvolutionLayer(kernel=(3, 3),
                                      channels=64,
                                      stride=(2, 2),
                                      padding=(1, 1))
        self.conv3 = ConvolutionLayer(kernel=(3, 3),
                                      channels=64,
                                      stride=(1, 1),
                                      padding=(0, 0))
        self.conv4 = ConvolutionLayer(kernel=(3, 3),
                                      channels=128,
                                      stride=(2, 2),
                                      padding=(1, 1))
        self.conv5 = ConvolutionLayer(kernel=(3, 3),
                                      channels=128,
                                      stride=(1, 1),
                                      padding=(0, 0))
        self.fc1 = FullyConnectLayer(channels=(3200, 100))
        self.fc2 = FullyConnectLayer(channels=(100, class_num))
        self.bn1 = BatchNormalization2d(32)
        self.bn2 = BatchNormalization2d(64)
        self.bn3 = BatchNormalization2d(128)
        self.dropout1 = DropOut()

    def forward(self, x):
        batch = x.shape[0]
        for bn in [self.bn1, self.bn2, self.bn3, self.dropout1]:
            bn.train = self.train

        self.block1 = [self.conv1.forward, self.bn1.forward, self.relu]
        self.block2 = [
            self.conv2.forward, self.conv3.forward, self.bn2.forward,
            self.relu, self.dropout1.forward
        ]
        self.block3 = [
            self.conv4.forward, self.conv5.forward, self.bn3.forward, self.relu
        ]
        self.block4 = [self.fc1.forward, self.relu, self.fc2.forward]

        for block in [self.block1, self.block2, self.block3]:
            for item in block:
                x = item(x)
                # print(x[0].reshape(-1))
        x = x.reshape(batch, -1)
        for item in self.block4:
            x = item(x)
            # print(x[0].reshape(-1))
        return x

    def backward(self, err):
        err = self.fc2.backward(err)
        err *= np.where(self.fc2.inputs > 0, 1.0, 0.0)
        err = self.fc1.backward(err)
        err *= np.where(self.fc1.inputs > 0, 1.0, 0.0)
        err = err.reshape(128, 128, 5, 5)
        err = self.bn3.backward(err)
        err = self.conv5.backward(err)
        err = self.conv4.backward(err)
        err = self.dropout1.backward(err)
        err *= np.where(self.conv4.inputs > 0, 1.0, 0.0)
        err = self.bn2.backward(err)
        err = self.conv3.backward(err)
        err = self.conv2.backward(err)
        err *= np.where(self.conv2.inputs > 0, 1.0, 0.0)
        err = self.bn1.backward(err)
        _ = self.conv1.backward(err)

    def save(self, saved_dir):
        np.save(saved_dir + 'conv1.npy', self.conv1.weight)
        np.save(saved_dir + 'conv2.npy', self.conv2.weight)
        np.save(saved_dir + 'conv3.npy', self.conv3.weight)
        np.save(saved_dir + 'conv4.npy', self.conv4.weight)
        np.save(saved_dir + 'conv5.npy', self.conv5.weight)
        np.save(saved_dir + 'fc1.npy', self.fc1.weight)
        np.save(saved_dir + 'fc2.npy', self.fc2.weight)
        np.save(saved_dir + 'bn.npy', np.array([self.bn1.gamma, self.bn1.beta, self.bn2.gamma, self.bn2.beta, \
                                                self.bn3.gamma, self.bn3.beta]))

    def load(self, load_dir):
        self.conv1.weight = np.load(load_dir + 'conv1.npy', allow_pickle=True)
        self.conv2.weight = np.load(load_dir + 'conv2.npy', allow_pickle=True)
        self.conv3.weight = np.load(load_dir + 'conv3.npy', allow_pickle=True)
        self.conv4.weight = np.load(load_dir + 'conv4.npy', allow_pickle=True)
        self.conv5.weight = np.load(load_dir + 'conv5.npy', allow_pickle=True)
        self.fc1.weight = np.load(load_dir + 'fc1.npy', allow_pickle=True)
        self.fc2.weight = np.load(load_dir + 'fc2.npy', allow_pickle=True)
        bn_paras = np.load(load_dir + 'bn.npy', allow_pickle=True)
        self.bn1.gamma = bn_paras[0]
        self.bn1.beta = bn_paras[1]
        self.bn2.gamma = bn_paras[2]
        self.bn2.beta = bn_paras[3]
        self.bn3.gamma = bn_paras[4]
        self.bn3.beta = bn_paras[5]

    @staticmethod
    def relu(x):
        return np.where(x > 0, x, 0)

    @staticmethod
    def maxpooling(x):
        res = np.zeros(
            (x.shape[0], x.shape[1], x.shape[2] // 2, x.shape[3] // 2))
        for a in range(res.shape[0]):
            for b in range(res.shape[1]):
                for c in range(res.shape[2]):
                    for d in range(res.shape[3]):
                        res[a, b, c, d] = np.max(x[a, b, c * 2 + 2,
                                                   d * 2 + 2].reshape(-1))
        return res

    @staticmethod
    def loss_(vector, label):
        _loss = np.zeros_like(vector)
        vector_ = np.zeros_like(vector)
        loss_c = np.zeros_like(vector)
        for i in range(vector.shape[0]):
            for j in range(vector.shape[1]):
                vector_[i, j] = np.exp(vector[i, j]) / sum(np.exp(vector[i]))
                _loss[i, j] += -(vector_[i, j])
            try:
                _loss[i, int(label[i])] += (1.0 + 2 *
                                            (vector_[i, int(label[i])]))
                loss_c[i, int(label[i])] += -log(vector_[i, int(label[i])])
            except ValueError:
                print(vector[i])
                exit()
        return _loss, loss_c