def backward(self, y_pred, y_train, cache): X, h1_cache, h2_cache, score_cache, nl_cache1, nl_cache2, u1, u2, bn1_cache, bn2_cache = cache # Output layer grad_y = self.dloss_funs[self.loss](y_pred, y_train) # Third layer dh2, dW3, db3 = l.fc_backward(grad_y, score_cache) dW3 += reg.dl2_reg(self.model['W3'], self.lam) dh2 = self.backward_nonlin(dh2, nl_cache2) dh2 = l.dropout_backward(dh2, u2) dh2, dgamma2, dbeta2 = l.bn_backward(dh2, bn2_cache) # Second layer dh1, dW2, db2 = l.fc_backward(dh2, h2_cache) dW2 += reg.dl2_reg(self.model['W2'], self.lam) dh1 = self.backward_nonlin(dh1, nl_cache1) dh1 = l.dropout_backward(dh1, u1) dh1, dgamma1, dbeta1 = l.bn_backward(dh1, bn1_cache) # First layer _, dW1, db1 = l.fc_backward(dh1, h1_cache) dW1 += reg.dl2_reg(self.model['W1'], self.lam) grad = dict( W1=dW1, W2=dW2, W3=dW3, b1=db1, b2=db2, b3=db3, gamma1=dgamma1, gamma2=dgamma2, beta1=dbeta1, beta2=dbeta2 ) return grad
def backward(self, y_pred, y_train, cache): X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3, u3 = cache # Output layer grad_y = self.dloss_funs[self.loss](y_pred, y_train) # FC-7 dh3, dW3, db3 = l.fc_backward(grad_y, score_cache) dh3 = self.backward_nonlin(dh3, nl_cache3) dh3 = l.dropout_backward(dh3, u3) dh2, dW2, db2 = l.fc_backward(dh3, h3_cache) dh2 = dh2.ravel().reshape(hpool.shape) # Pool-1 dpool = l.maxpool_backward(dh2, hpool_cache) # Conv-1 dh1 = self.backward_nonlin(dpool, nl_cache1) dX, dW1, db1 = l.conv_backward(dh1, h1_cache) grad = dict( W1=dW1, W2=dW2, W3=dW3, b1=db1, b2=db2, b3=db3 ) return grad
def backward(self, y_pred, y_train, cache, iter): num_layers = self.num_layers # Output layer grad_y = self.dloss_funs[self.loss](y_pred, y_train) # Fourth layer dh, dW, db = l.fc_backward(grad_y, cache['score_cache']) grad = dict() grad['Wf'] = dW + reg.dl2_reg(self.model['Wf'], self.lam) grad['bf'] = db dprevH = 0 for i in range(num_layers, 0, -1): if self.leapfrog: dh, dprevH, dW, db = l.leap_backward( dh, dprevH, cache['h_cache' + str(i)], cache['nl_cache' + str(i)], i == num_layers, self.hypo) else: dh, dW, db = l.fcrelu_backward( dh, cache['h_cache' + str(i)], cache['nl_cache' + str(i)], antisymmetric=self.antisymmetric, hypo=self.hypo) if not self.antisymmetric and not self.leapfrog: dW += reg.dl2_reg(self.model['W' + str(i)], self.lam) grad['W' + str(i)] = dW grad['b' + str(i)] = db if self.doDropout: dh = l.dropout_backward(dh, cache['u1']) dh, dW, db = l.fcrelu_backward(dh, cache['h_caches'], cache['nl_caches'], antisymmetric=self.antisymmetric, hypo=self.hypo) grad['Ws'] = dW + reg.dl2_reg(self.model['Ws'], self.lam) grad['bs'] = db #dh, dW, db = l.conv_backward(dh, cache['c_cache']) #grad['Wc'] = dW #grad['bs'] = db if self.freezeLastLayer or self.freezeClassificationLayer: grad['Wf'] = 0 grad['bf'] = 0 if self.weights_fixed: grad['Wf'] = 0 grad['bf'] = 0 grad['Ws'] = 0 grad['bs'] = 0 return grad