예제 #1
0
    def dev_loss(self, X, Y, M, Ws=[]):
        """Compute DEV-regularized loss for inputs X with target outputs Y.

        This loss function computes a combination of standard output loss
        (e.g. for classification/regression) and Dropout Ensemble Variance
        regularization loss. X should be a list of 'dev_reps' input arrays,
        where 'dev_reps' is the number of times each input will be pushed
        through a droppy network when computing the DEV regularizer. M should
        be a list of lists of per-layer dropout masks, matched to size of the
        input arrays in X. Y should contain the target outputs for X[0], for
        which inputs will be pushed through a drop-free network.
        """
        if (len(Ws) == 0):
            Ws = self.layer_weights()
        dev_reps = len(X)
        # Compute activations for observations in X
        A = [self.feedforward(X[i], M[i], Ws) for i in range(dev_reps)]
        # Compute loss and gradient for output-layer activations, for the
        # (should be) drop free feedforward of X[0].
        O = self.out_loss(A[0][-1], Y)
        # Make list of activation gradients
        dLdA = [[gp.zeros(Aj.shape) for Aj in A[0]] \
                for i in range(dev_reps)]
        dLdA[0][-1] = O['dL']
        # Compute DEV regularizer loss and gradients
        Ld = 0.0
        for i in range(self.layer_count):
            dev_type = self.dev_types[i]
            dev_lam = self.dev_lams[i]
            if (dev_lam > 0.0000001):
                Ai = [A[j][i] for j in range(dev_reps)]
                Di = lnf.dev_loss(Ai, dev_type, 0)
                Ld = Ld + (dev_lam * Di['L'])
                for j in range(dev_reps):
                    dLdA[j][i] = dLdA[j][i] + (dev_lam * Di['dLdA'][j])
        # Backpropagate gradients for each DEV rep
        B = {'dLdWs': [gp.zeros(W.shape) for W in Ws]}
        for i in range(dev_reps):
            Bi = self.backprop(dLdA[i], A[i], X[i], M[i], Ws)
            for j in range(self.layer_count):
                B['dLdWs'][j] = B['dLdWs'][j] + Bi['dLdWs'][j]
        # Compute parameter regularization loss and gradients
        R = self.reg_loss(Ws)
        # Combine output loss, DEV loss, and regularization loss
        L = [O['L'], Ld, R['L']]
        # Combine output loss gradient and regularization gradient
        dLdWs = [(dWb + dWr) for (dWb, dWr) in zip(B['dLdWs'], R['dLdWs'])]
        return {'L': L, 'dLdWs': dLdWs}