def backprop(self, dLdA, A, X, M, Ws=[]): """Run backprop for the activation gradients in dLdA. The lengths (i.e. len()) of the lists of arrays dLdA, A, and M should all be self.layer_count. The shapes of dLdA[i] and A[i] should be the same for all i. The shape of M[i] should match the shape of A[i-1] for i from 1 to (self.layer_count - 1). The shape of M[0] should match the shape of X. Weight array list Ws defaults to self.layer_weights(). """ if (len(Ws) == 0): Ws = self.layer_weights() dLdWs = [] dLdX = [] for i in range((self.layer_count-1),-1,-1): if (i == 0): # First layer receives X as input Xi = M[i] * lnf.bias(X, self.bias_val) else: # Other layers receive previous layer's activations as input Xi = M[i] * lnf.bias(A[i-1], self.bias_val) # BP current grads onto current layer's weights and inputs Bi = self.layers[i].backprop(dLdA[i], A[i], Xi, Ws[i]) # Rescale BP-ed input grads to account for dropout mask Bi['dLdX'] = M[i] * Bi['dLdX'] if (i == 0): # BP-ed input grads at first layer are grads on X dLdX = lnf.unbias(Bi['dLdX']) else: # BP-ed input grads at other layers should be addded to # whatever grads were already there (e.g. DEV gradients) dLdA[i-1] = dLdA[i-1] + lnf.unbias(Bi['dLdX']) # Record the BP-ed gradients on current layer's inbound weights dLdWs.append(Bi['dLdW']) dLdWs.reverse() return {'dLdWs': dLdWs, 'dLdX': dLdX}