def dev_loss(self, X, Y, M, Ws=[]): """Compute DEV-regularized loss for inputs X with target outputs Y. This loss function computes a combination of standard output loss (e.g. for classification/regression) and Dropout Ensemble Variance regularization loss. X should be a list of 'dev_reps' input arrays, where 'dev_reps' is the number of times each input will be pushed through a droppy network when computing the DEV regularizer. M should be a list of lists of per-layer dropout masks, matched to size of the input arrays in X. Y should contain the target outputs for X[0], for which inputs will be pushed through a drop-free network. """ if (len(Ws) == 0): Ws = self.layer_weights() dev_reps = len(X) # Compute activations for observations in X A = [self.feedforward(X[i], M[i], Ws) for i in range(dev_reps)] # Compute loss and gradient for output-layer activations, for the # (should be) drop free feedforward of X[0]. O = self.out_loss(A[0][-1], Y) # Make list of activation gradients dLdA = [[gp.zeros(Aj.shape) for Aj in A[0]] \ for i in range(dev_reps)] dLdA[0][-1] = O['dL'] # Compute DEV regularizer loss and gradients Ld = 0.0 for i in range(self.layer_count): dev_type = self.dev_types[i] dev_lam = self.dev_lams[i] if (dev_lam > 0.0000001): Ai = [A[j][i] for j in range(dev_reps)] Di = lnf.dev_loss(Ai, dev_type, 0) Ld = Ld + (dev_lam * Di['L']) for j in range(dev_reps): dLdA[j][i] = dLdA[j][i] + (dev_lam * Di['dLdA'][j]) # Backpropagate gradients for each DEV rep B = {'dLdWs': [gp.zeros(W.shape) for W in Ws]} for i in range(dev_reps): Bi = self.backprop(dLdA[i], A[i], X[i], M[i], Ws) for j in range(self.layer_count): B['dLdWs'][j] = B['dLdWs'][j] + Bi['dLdWs'][j] # Compute parameter regularization loss and gradients R = self.reg_loss(Ws) # Combine output loss, DEV loss, and regularization loss L = [O['L'], Ld, R['L']] # Combine output loss gradient and regularization gradient dLdWs = [(dWb + dWr) for (dWb, dWr) in zip(B['dLdWs'], R['dLdWs'])] return {'L': L, 'dLdWs': dLdWs}