Beispiel #1
0
    def forward(self, X, train=False):
        gamma1, gamma2 = self.model['gamma1'], self.model['gamma2']
        beta1, beta2 = self.model['beta1'], self.model['beta2']

        u1, u2 = None, None
        bn1_cache, bn2_cache = None, None

        # First layer
        h1, h1_cache = l.fc_forward(X, self.model['W1'], self.model['b1'])
        bn1_cache = (self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'])
        h1, bn1_cache, run_mean, run_var = l.bn_forward(h1, gamma1, beta1, bn1_cache, train=train)
        h1, nl_cache1 = self.forward_nonlin(h1)

        self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'] = run_mean, run_var

        if train:
            h1, u1 = l.dropout_forward(h1, self.p_dropout)

        # Second layer
        h2, h2_cache = l.fc_forward(h1, self.model['W2'], self.model['b2'])
        bn2_cache = (self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'])
        h2, bn2_cache, run_mean, run_var = l.bn_forward(h2, gamma2, beta2, bn2_cache, train=train)
        h2, nl_cache2 = self.forward_nonlin(h2)

        self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'] = run_mean, run_var

        if train:
            h2, u2 = l.dropout_forward(h2, self.p_dropout)

        # Third layer
        score, score_cache = l.fc_forward(h2, self.model['W3'], self.model['b3'])

        cache = (X, h1_cache, h2_cache, score_cache, nl_cache1, nl_cache2, u1, u2, bn1_cache, bn2_cache)

        return score, cache
Beispiel #2
0
    def forward(self, X, train=False):

        self.model['W1'] = np.floor(self.model['W1'] * (2 ** 8)) / (2 ** 8)
        self.model['W2'] = np.floor(self.model['W2'] * (2**8)) / (2 ** 8)
        self.model['b3'] = np.floor(self.model['b3'] * (2 ** 8)) / (2 ** 8)
        self.model['b2'] = np.floor(self.model['b2'] * (2**8)) / (2 ** 8)
        # Conv-1
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)
        print("h1 finish")

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        print("hpool finish")

        hpool = np.transpose(hpool, [0, 2, 3, 1])
        h2 = hpool.ravel().reshape(X.shape[0], -1)  # 先攤平後分成64組
        print("h2 finish")

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        h3, nl_cache3 = l.relu_forward(h3)
        print("h3 finish")

        # Softmax
        score, score_cache = l.fc_forward(h3, self.model['W3'], self.model['b3'])
        print("score finish")

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3)
Beispiel #3
0
    def forward(self, X, train=True):
        gamma1, gamma2, gamma3 = self.model['gamma1'], self.model[
            'gamma2'], self.model['gamma3']
        beta1, beta2, beta3 = self.model['beta1'], self.model[
            'beta2'], self.model['beta3']

        u1, u2, u3 = None, None, None
        bn1_cache, pool_cache, bn3_cache = None, None, None

        # Conv-1
        #print(X.shape)
        #print(self.model['W1'].shape)
        #print(self.model['b1'].shape)
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])

        #bn1_cache = (self.bn_caches['bn1_mean'],self.bn_caches['bn1_var'])
        #h1,bn1_cache,run_mean,run_var = l.conv_bn_forward(h1,gamma1,beta1,bn1_cache,train=train)
        h1, nl_cache1 = l.relu_forward(h1)
        #print('h1 shape',h1.shape)

        #self.bn_caches['bn1_mean'],self.bn_caches['bn1_var']=run_mean,run_var

        #if train:
        #   h1 ,u1 = l.dropout_forward(h1,self.p_dropout)

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        #print('hpool shape',hpool.shape)
        #bn2_cache = (self.bn_caches['bn2_mean'],self.bn_caches['bn2_var'])
        #hpool,pool_cache,run_mean,run_var = l.conv_bn_forward(hpool,gamma2,beta2,bn2_cache,train= train)
        h2 = hpool.ravel().reshape(X.shape[0], -1)
        #print('h2 shape',h2.shape)

        #self.bn_caches['bn2_mean'],self.bn_caches['bn2_var'] = run_mean,run_var

        #if train:
        #  h2,u2 = l.dropout_forward(h2,self.p_dropout)

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        #bn3_cache  = (self.bn_caches['bn3_mean'],self.bn_caches['bn3_var'])
        #h3, bn3_cache, run_mean, run_var = l.bn_forward(h3, gamma3, beta3, bn3_cache, train=train)
        h3, nl_cache3 = l.relu_forward(h3)
        #print('h3 shape',h3.shape)

        #self.bn_caches['bn3_mean'],self.bn_caches['bn3_var']= run_mean,run_var

        #if train:
        #    h3,u3 = l.dropout_forward(h3,self.p_dropout)

        # Softmax
        # forth layer
        score, score_cache = l.fc_forward(h3, self.model['W3'],
                                          self.model['b3'])
        #print('score shape',score.shape)
        #print('score',score)
        #return score, (X, h1_cache,h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3,u1,u2,u3,bn1_cache,pool_cache,bn3_cache)

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool,
                       nl_cache1, nl_cache3)
Beispiel #4
0
    def forward(self, X, h_old, train=True):
        m = self.model
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        X = np.column_stack((h_old, X_one_hot))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X_prime = np.column_stack((hr * h_old, X_one_hot))
        hh, hh_cache = l.fc_forward(X_prime, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        h = (1. - hz) * h_old + hz * hh

        y, y_cache = l.fc_forward(h, Wy, by)

        cache = (
            X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache,
            hh, hh_cache, hh_tanh_cache, h, y_cache
        )

        if not train:
            y = util.softmax(y)

        return y, h, cache
Beispiel #5
0
    def forward(self, X, h_old, train=True):
        m = self.model
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        X = np.column_stack((h_old, X_one_hot))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X_prime = np.column_stack((hr * h_old, X_one_hot))
        hh, hh_cache = l.fc_forward(X_prime, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        h = (1. - hz) * h_old + hz * hh

        y, y_cache = l.fc_forward(h, Wy, by)

        cache = (
            X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache,
            hh, hh_cache, hh_tanh_cache, h, y_cache
        )

        if not train:
            y = util.softmax(y)

        return y, h, cache
Beispiel #6
0
    def forward(self, X, train=False):
        gamma1, gamma2 = self.model['gamma1'], self.model['gamma2']
        beta1, beta2 = self.model['beta1'], self.model['beta2']

        u1, u2 = None, None
        bn1_cache, bn2_cache = None, None

        # First layer
        h1, h1_cache = l.fc_forward(X, self.model['W1'], self.model['b1'])
        bn1_cache = (self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'])
        h1, bn1_cache, run_mean, run_var = l.bn_forward(h1, gamma1, beta1, bn1_cache, train=train)
        h1, nl_cache1 = self.forward_nonlin(h1)

        self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'] = run_mean, run_var

        if train:
            h1, u1 = l.dropout_forward(h1, self.p_dropout)

        # Second layer
        h2, h2_cache = l.fc_forward(h1, self.model['W2'], self.model['b2'])
        bn2_cache = (self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'])
        h2, bn2_cache, run_mean, run_var = l.bn_forward(h2, gamma2, beta2, bn2_cache, train=train)
        h2, nl_cache2 = self.forward_nonlin(h2)

        self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'] = run_mean, run_var

        if train:
            h2, u2 = l.dropout_forward(h2, self.p_dropout)

        # Third layer
        score, score_cache = l.fc_forward(h2, self.model['W3'], self.model['b3'])

        cache = (X, h1_cache, h2_cache, score_cache, nl_cache1, nl_cache2, u1, u2, bn1_cache, bn2_cache)

        return score, cache
Beispiel #7
0
    def forward(self, X, train=False):
        # Conv-1
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)
        # print(h1.shape)

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        # print(hpool.shape)
        # print(hpool[0])
        # np.savetxt('hpool.txt', hpool[0])
        # input()

        hpool = np.transpose(hpool, [0, 2, 3, 1])
        h2 = hpool.ravel().reshape(X.shape[0], -1)  # 先攤平後分成64組
        # print(h2.shape)
        # print(h2[0])
        # np.savetxt('h2.txt', h2[0])
        # input()

        # print(hpool.shape)
        # print(hpool[0,0,:,:])
        # print(h1.shape)
        # print(hpool.shape)
        # print(h2.shape)
        # print(len(self.model['W1']))
        # print(len(h1), len(h1_cache))
        # print(len(h1), len(nl_cache1))
        # print(len(hpool), len(hpool_cache))
        # "X.shape[0] = 64"

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        h3, nl_cache3 = l.relu_forward(h3)
        # print(h3.shape)
        # print(h3[0])
        # input()

        # Softmax
        score, score_cache = l.fc_forward(h3, self.model['W3'],
                                          self.model['b3'])

        # print('h1' + str(h1.shape))
        # print('W1' + str(self.model['W1'].shape))
        # print('b1' + str(self.model['b1'].shape))
        # print('hpool' + str(hpool.shape))
        # print('h2' + str(h2.shape))
        # print('W2' + str(self.model['W2'].shape))
        # print('b2' + str(self.model['b2'].shape))
        # print('h3' + str(h3.shape))
        # print('W2' + str(self.model['W3'].shape))
        # print('b3' + str(self.model['b3'].shape))
        # print('score' + str(score.shape))
        # input()

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool,
                       nl_cache1, nl_cache3)
Beispiel #8
0
    def forward(self, X, train=True):
        gamma1, gamma2, gamma3, gamma4, gamma5 = \
                 self.model['gamma1'],self.model['gamma2'], \
                 self.model['gamma3'],self.model['gamma4'], \
                 self.model['gamma5']
        beta1, beta2, beta3, beta4, beta5 = \
            self.model['beta1'], self.model['beta2'],\
            self.model['beta3'], self.model['beta4'],\
            self.model['beta5']

        u1, u2, u3, u4, u5, u6 = None, None, None,None,None, None
        bn1_cache, bn2_cache, bn3_cache, bn4_cache, bn5_cache = None, None, None,None,None

        '''Convolutional layer - 1'''
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)

        '''Pool -1'''
        hpool1, hpool1_cache = l.maxpool_forward(h1)

        '''Conv -2'''
        h2, h2_cache = l.conv_forward(hpool1, self.model['W2'], self.model['b2'])
        h2, nl_cache2 = l.relu_forward(h2)

        '''Pool- 2'''
        hpool2, hpool2_cache = l.maxpool_forward(h2)



        '''reshape to Fully-connected layer'''
        hpool2_ = hpool2.ravel().reshape(X.shape[0],-1)

        '''FC -1'''
        h4, h4_cache = l.fc_forward(hpool2_, self.model['W4'], self.model['b4'])
        bn4_cache = (self.bn_caches['bn4_mean'], self.bn_caches['bn4_var'])
        h4, bn4_cache, run_mean, run_var = l.bn_forward(h4, gamma4, beta4, bn4_cache, train=train)
        h4, nl_cache4 = l.relu_forward(h4)
        self.bn_caches['bn4_mean'], self.bn_caches['bn4_var'] = run_mean,run_var


        '''FC -2'''
        h5, h5_cache = l.fc_forward(h4, self.model['W5'], self.model['b5'])
        bn5_cache = (self.bn_caches['bn5_mean'], self.bn_caches['bn5_var'])
        h5, bn5_cache, run_mean, run_var = l.bn_forward(h5, gamma5, beta5, bn5_cache,train=train)
        h5, nl_cache5 = l.relu_forward(h5)
        self.bn_caches['bn5_mean'], self.bn_caches['bn5_var'] = run_mean, run_var


        '''Output layer'''
        score, score_cache = l.fc_forward(h5, self.model['W6'], self.model['b6'])
        return score, (X, h1_cache, h2_cache,  h4_cache, h5_cache, score_cache,
                       hpool1_cache, hpool1, hpool2_cache, hpool2,
                       nl_cache1, nl_cache2,  nl_cache4, nl_cache5,
                        bn4_cache,bn5_cache
                       )
Beispiel #9
0
    def forward(self, X, iter, train=False):
        if self.multilevel:
            if iter != 0 and iter % self.multi_step == 0 and iter < self.multi_step * self.multi_times + 1:
                self.doubleLayers()
        cache = dict(X=X)
        h = X
        prev = 0
        #h, cache['c_cache'] = l.conv_forward(h, self.model['Wc'], self.model['bc'])
        h, cache['h_caches'], cache['nl_caches'] = \
            l.fcrelu_forward(h, self.model['Ws'], self.model['bs'], hypo=self.hypo)
        if train and self.doDropout:
            h, cache['u1'] = l.dropout_forward(h, self.p_dropout)

        for i in range(1, self.num_layers + 1):
            temp = h
            #print(np.max(h))
            if self.leapfrog:
                h, cache['h_cache'+str(i)], cache['nl_cache'+str(i)] = \
                    l.leap_forward(h, prev, self.model['W'+str(i)], self.model['b'+str(i)], self.hypo, i == 1)
            else:
                h, cache['h_cache'+str(i)], cache['nl_cache'+str(i)] = \
                    l.fcrelu_forward(h, self.model['W'+str(i)], self.model['b'+str(i)], hypo=self.hypo)
            prev = temp

        cache['finalh'] = h
        score, cache['score_cache'] = l.fc_forward(h, self.model['Wf'],
                                                   self.model['bf'])

        return score, cache
Beispiel #10
0
    def forward(self, X, train=False):
        # Conv-1
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        h2 = hpool.ravel().reshape(X.shape[0], -1)

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        h3, nl_cache3 = l.relu_forward(h3)

        # Softmax
        score, score_cache = l.fc_forward(h3, self.model['W3'], self.model['b3'])

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3)
Beispiel #11
0
    def forward(self, X, train=False):
        # Conv-1
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        h2 = hpool.ravel().reshape(X.shape[0], -1)

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        h3, nl_cache3 = l.relu_forward(h3)

        # Softmax
        score, score_cache = l.fc_forward(h3, self.model['W3'],
                                          self.model['b3'])

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool,
                       nl_cache1, nl_cache3)
Beispiel #12
0
    def forward(self, X, state, train=True):
        m = self.model
        Wf, Wi, Wc, Wo, Wy = m['Wf'], m['Wi'], m['Wc'], m['Wo'], m['Wy']
        bf, bi, bc, bo, by = m['bf'], m['bi'], m['bc'], m['bo'], m['by']

        h_old, c_old = state

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        X = np.column_stack((h_old, X_one_hot))

        hf, hf_cache = l.fc_forward(X, Wf, bf)
        hf, hf_sigm_cache = l.sigmoid_forward(hf)

        hi, hi_cache = l.fc_forward(X, Wi, bi)
        hi, hi_sigm_cache = l.sigmoid_forward(hi)

        ho, ho_cache = l.fc_forward(X, Wo, bo)
        ho, ho_sigm_cache = l.sigmoid_forward(ho)

        hc, hc_cache = l.fc_forward(X, Wc, bc)
        hc, hc_tanh_cache = l.tanh_forward(hc)

        c = hf * c_old + hi * hc
        c, c_tanh_cache = l.tanh_forward(c)

        h = ho * c

        y, y_cache = l.fc_forward(h, Wy, by)

        cache = (
            X, hf, hi, ho, hc, hf_cache, hf_sigm_cache, hi_cache, hi_sigm_cache, ho_cache,
            ho_sigm_cache, hc_cache, hc_tanh_cache, c_old, c, c_tanh_cache, y_cache
        )

        if not train:
            y = util.softmax(y)

        return y, (h, c), cache
Beispiel #13
0
    def forward(self, X, state, train=True):
        m = self.model
        Wf, Wi, Wc, Wo, Wy = m['Wf'], m['Wi'], m['Wc'], m['Wo'], m['Wy']
        bf, bi, bc, bo, by = m['bf'], m['bi'], m['bc'], m['bo'], m['by']

        h_old, c_old = state

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        X = np.column_stack((h_old, X_one_hot))

        hf, hf_cache = l.fc_forward(X, Wf, bf)
        hf, hf_sigm_cache = l.sigmoid_forward(hf)

        hi, hi_cache = l.fc_forward(X, Wi, bi)
        hi, hi_sigm_cache = l.sigmoid_forward(hi)

        ho, ho_cache = l.fc_forward(X, Wo, bo)
        ho, ho_sigm_cache = l.sigmoid_forward(ho)

        hc, hc_cache = l.fc_forward(X, Wc, bc)
        hc, hc_tanh_cache = l.tanh_forward(hc)

        c = hf * c_old + hi * hc
        c, c_tanh_cache = l.tanh_forward(c)

        h = ho * c

        y, y_cache = l.fc_forward(h, Wy, by)

        cache = (
            X, hf, hi, ho, hc, hf_cache, hf_sigm_cache, hi_cache, hi_sigm_cache, ho_cache,
            ho_sigm_cache, hc_cache, hc_tanh_cache, c_old, c, c_tanh_cache, y_cache
        )

        if not train:
            y = util.softmax(y)

        return y, (h, c), cache
Beispiel #14
0
    def forward(self, X, train=False):
        u3 = None
        
        # Conv-1
        h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
        h1, nl_cache1 = l.relu_forward(h1)

        # Pool-1
        hpool, hpool_cache = l.maxpool_forward(h1)
        h2 = hpool.ravel().reshape(X.shape[0], -1)

        # FC-7
        h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
        h3, nl_cache3 = l.relu_forward(h3)
        
        #add dropout in fully connected layer
        if train:
                h3, u3 = l.dropout_forward(h3, self.p_dropout[0])

        # Softmax
        score, score_cache = l.fc_forward(h3, self.model['W3'], self.model['b3'])

        return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3, u3)
Beispiel #15
0
    def forward(self, X, h, train=True):
        Wxh, Whh, Why = self.model['Wxh'], self.model['Whh'], self.model['Why']
        bh, by = self.model['bh'], self.model['by']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        hprev = h.copy()

        h, h_cache = l.tanh_forward(X_one_hot @ Wxh + hprev @ Whh + bh)
        y, y_cache = l.fc_forward(h, Why, by)

        cache = (X_one_hot, Whh, h, hprev, y, h_cache, y_cache)

        if not train:
            y = util.softmax(y)

        return y, h, cache
Beispiel #16
0
    def forward(self, X, h, train=True):
        Wxh, Whh, Why = self.model['Wxh'], self.model['Whh'], self.model['Why']
        bh, by = self.model['bh'], self.model['by']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        hprev = h.copy()

        h, h_cache = l.tanh_forward(X_one_hot @ Wxh + hprev @ Whh + bh)
        y, y_cache = l.fc_forward(h, Why, by)

        cache = (X_one_hot, Whh, h, hprev, y, h_cache, y_cache)

        if not train:
            y = util.softmax(y)

        return y, h, cache
Beispiel #17
0
    def forward(self, X, train=False):
        if self.nlayer == 2:
            gamma1 = self.model['gamma1']
            beta1 = self.model['beta1']
            
            u1, bn1_cache = None, None
            
            # First layer
            h1, h1_cache = l.fc_forward(X, self.model['W1'], self.model['b1'])
            bn1_cache = (self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'])
            h1, bn1_cache, run_mean, run_var = l.bn_forward(h1, gamma1, beta1, bn1_cache, train=train)
            h1, nl_cache1 = self.forward_nonlin(h1)
            
            if train:
                h1, u1 = l.dropout_forward(h1, self.p_dropout[0])
            
            # Last layer
            score, score_cache = l.fc_forward(h1, self.model['W4'], self.model['b4'])

            cache = (X, h1_cache, score_cache, nl_cache1, u1, bn1_cache)
            
        if self.nlayer == 3:
            gamma1, gamma2 = self.model['gamma1'], self.model['gamma2']
            beta1, beta2 = self.model['beta1'], self.model['beta2']

            u1, u2 = None, None
            bn1_cache, bn2_cache = None, None

            # First layer
            h1, h1_cache = l.fc_forward(X, self.model['W1'], self.model['b1'])
            bn1_cache = (self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'])
            h1, bn1_cache, run_mean, run_var = l.bn_forward(h1, gamma1, beta1, bn1_cache, train=train)
            h1, nl_cache1 = self.forward_nonlin(h1)

            self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'] = run_mean, run_var
            
            if train:
                h1, u1 = l.dropout_forward(h1, self.p_dropout[0])

            # Second layer
            h2, h2_cache = l.fc_forward(h1, self.model['W2'], self.model['b2'])
            bn2_cache = (self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'])
            h2, bn2_cache, run_mean, run_var = l.bn_forward(h2, gamma2, beta2, bn2_cache, train=train)
            h2, nl_cache2 = self.forward_nonlin(h2)

            self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'] = run_mean, run_var

            if train:
                h2, u2 = l.dropout_forward(h2, self.p_dropout[1])

            # Third layer
            score, score_cache = l.fc_forward(h2, self.model['W4'], self.model['b4'])

            cache = (X, h1_cache, h2_cache, score_cache, nl_cache1, nl_cache2, u1, u2, bn1_cache, bn2_cache)
            
        if self.nlayer == 4:
            gamma1, gamma2, gamma3 = self.model['gamma1'], self.model['gamma2'], self.model['gamma3']
            beta1, beta2, beta3 = self.model['beta1'], self.model['beta2'], self.model['beta3']

            u1, u2, u3 = None, None, None
            bn1_cache, bn2_cache, bn3_cache = None, None, None

            # First layer
            h1, h1_cache = l.fc_forward(X, self.model['W1'], self.model['b1'])
            bn1_cache = (self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'])
            h1, bn1_cache, run_mean, run_var = l.bn_forward(h1, gamma1, beta1, bn1_cache, train=train)
            h1, nl_cache1 = self.forward_nonlin(h1)

            self.bn_caches['bn1_mean'], self.bn_caches['bn1_var'] = run_mean, run_var
            
            if train:
                h1, u1 = l.dropout_forward(h1, self.p_dropout[0])

            # Second layer
            h2, h2_cache = l.fc_forward(h1, self.model['W2'], self.model['b2'])
            bn2_cache = (self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'])
            h2, bn2_cache, run_mean, run_var = l.bn_forward(h2, gamma2, beta2, bn2_cache, train=train)
            h2, nl_cache2 = self.forward_nonlin(h2)

            self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'] = run_mean, run_var

            if train:
                h2, u2 = l.dropout_forward(h2, self.p_dropout[1])
            
            # Third layer
            h3, h3_cache = l.fc_forward(h1, self.model['W3'], self.model['b3'])
            bn3_cache = (self.bn_caches['bn3_mean'], self.bn_caches['bn3_var'])
            h3, bn3_cache, run_mean, run_var = l.bn_forward(h3, gamma3, beta3, bn3_cache, train=train)
            h3, nl_cache3 = self.forward_nonlin(h3)

            self.bn_caches['bn3_mean'], self.bn_caches['bn3_var'] = run_mean, run_var

            if train:
                h3, u3 = l.dropout_forward(h3, self.p_dropout[2])

            # Third layer
            score, score_cache = l.fc_forward(h3, self.model['W4'], self.model['b4'])

            cache = (X, h1_cache, h2_cache, h3_cache, score_cache, nl_cache1, nl_cache2, nl_cache3, u1, u2, u3, bn1_cache, bn2_cache, bn3_cache)

        return score, cache