예제 #1
0
    def backprop(self, dLdA, A, X, M, Ws=[]):
        """Run backprop for the activation gradients in dLdA.

        The lengths (i.e. len()) of the lists of arrays dLdA, A, and M should
        all be self.layer_count. The shapes of dLdA[i] and A[i] should be the
        same for all i. The shape of M[i] should match the shape of A[i-1] for
        i from 1 to (self.layer_count - 1). The shape of M[0] should match the
        shape of X. Weight array list Ws defaults to self.layer_weights().
        """
        if (len(Ws) == 0):
            Ws = self.layer_weights()
        dLdWs = []
        dLdX = []
        for i in range((self.layer_count-1),-1,-1):
            if (i == 0):
                # First layer receives X as input
                Xi = M[i] * lnf.bias(X, self.bias_val)
            else:
                # Other layers receive previous layer's activations as input
                Xi = M[i] * lnf.bias(A[i-1], self.bias_val)
            # BP current grads onto current layer's weights and inputs
            Bi = self.layers[i].backprop(dLdA[i], A[i], Xi, Ws[i])
            # Rescale BP-ed input grads to account for dropout mask
            Bi['dLdX'] = M[i] * Bi['dLdX']
            if (i == 0):
                # BP-ed input grads at first layer are grads on X
                dLdX = lnf.unbias(Bi['dLdX'])
            else:
                # BP-ed input grads at other layers should be addded to
                # whatever grads were already there (e.g. DEV gradients)
                dLdA[i-1] = dLdA[i-1] + lnf.unbias(Bi['dLdX'])
            # Record the BP-ed gradients on current layer's inbound weights
            dLdWs.append(Bi['dLdW'])
        dLdWs.reverse()
        return {'dLdWs': dLdWs, 'dLdX': dLdX}
예제 #2
0
 def feedforward(self, X, M=[], Ws=[]):
     """Feedforward for inputs X with drop masks M and layer weights Ws."""
     if (len(M) == 0):
         # If no masks are given, use drop-free feedforward
         M = self.get_drop_masks(X.shape[0],0,0)
     if (len(Ws) == 0):
         # Default to this network's current per-layer weights
         Ws = self.layer_weights()
     A = []
     for i in range(self.layer_count):
         if (i == 0):
             # First layer receives X as input
             Xi = M[i] * lnf.bias(X, self.bias_val)
         else:
             # Other layers receive previous layer's activations as input
             Xi = M[i] * lnf.bias(A[i-1], self.bias_val)
         # Perform feedforward through the i'th network layer
         Ai = self.layers[i].feedforward(Xi, Ws[i])
         A.append(Ai['post'])
     return A