def backprop(self, dLdA, A, X, M, Ws=[]): """Run backprop for the activation gradients in dLdA. The lengths (i.e. len()) of the lists of arrays dLdA, A, and M should all be self.layer_count. The shapes of dLdA[i] and A[i] should be the same for all i. The shape of M[i] should match the shape of A[i-1] for i from 1 to (self.layer_count - 1). The shape of M[0] should match the shape of X. Weight array list Ws defaults to self.layer_weights(). """ if (len(Ws) == 0): Ws = self.layer_weights() dLdWs = [] dLdX = [] for i in range((self.layer_count-1),-1,-1): if (i == 0): # First layer receives X as input Xi = M[i] * lnf.bias(X, self.bias_val) else: # Other layers receive previous layer's activations as input Xi = M[i] * lnf.bias(A[i-1], self.bias_val) # BP current grads onto current layer's weights and inputs Bi = self.layers[i].backprop(dLdA[i], A[i], Xi, Ws[i]) # Rescale BP-ed input grads to account for dropout mask Bi['dLdX'] = M[i] * Bi['dLdX'] if (i == 0): # BP-ed input grads at first layer are grads on X dLdX = lnf.unbias(Bi['dLdX']) else: # BP-ed input grads at other layers should be addded to # whatever grads were already there (e.g. DEV gradients) dLdA[i-1] = dLdA[i-1] + lnf.unbias(Bi['dLdX']) # Record the BP-ed gradients on current layer's inbound weights dLdWs.append(Bi['dLdW']) dLdWs.reverse() return {'dLdWs': dLdWs, 'dLdX': dLdX}
def feedforward(self, X, M=[], Ws=[]): """Feedforward for inputs X with drop masks M and layer weights Ws.""" if (len(M) == 0): # If no masks are given, use drop-free feedforward M = self.get_drop_masks(X.shape[0],0,0) if (len(Ws) == 0): # Default to this network's current per-layer weights Ws = self.layer_weights() A = [] for i in range(self.layer_count): if (i == 0): # First layer receives X as input Xi = M[i] * lnf.bias(X, self.bias_val) else: # Other layers receive previous layer's activations as input Xi = M[i] * lnf.bias(A[i-1], self.bias_val) # Perform feedforward through the i'th network layer Ai = self.layers[i].feedforward(Xi, Ws[i]) A.append(Ai['post']) return A