def forward_propagate_dropout(self, inputBatch, upToLayer = None ): """ Perform a (possibly partial) forward pass through the network. Updates self.state which, on a full forward pass, holds the input followed by each hidden layer's activation and finally the net input incident on the output layer. For a full forward pass, we return the actual output unit activations. In a partial forward pass we return None. reference: IMPROVING DEEP NEURAL NETWORKS FOR LVCSR USING RECTIFIED LINEAR UNITS AND DROPOUT """ if upToLayer == None: #work through all layers upToLayer = len(self.weights) self.state = [inputBatch] for i in range(min(len(self.weights) - 1, upToLayer)): if self.dropouts[i] > 0: mask = gp.rand(*self.weights[i].shape) > self.dropouts[i] dropoutMultiplier = 1.0/(1.0-self.dropouts[i]) curActs = self.hidden_activation_functions[i].activate(gp.dot(dropoutMultiplier*self.state[-1], mask * self.weights[i]) + self.biases[i]) else: curActs = self.hidden_activation_functions[i].activate(gp.dot(self.state[-1], self.weights[i]) + self.biases[i]) #apply dropout on hidden units #if self.dropouts[i+1] > 0: curActs = curActs * (gp.rand(*curActs.shape) > self.dropouts[i+1]) self.state.append(curActs) if upToLayer >= len(self.weights): self.state.append(gp.dot(self.state[-1], self.weights[-1]) + self.biases[-1]) self.acts = self.output_activation_function.activate(self.state[-1]) return self.acts #we didn't reach the output units # To return the first set of hidden activations, we would set # upToLayer to 1. return self.state[upToLayer]
def CD1(visiable, weights, visible_bias, hidden_bias, visible_unit = Binary(), hidden_unit = Binary(), dropout=None): """ Using Gaussian hidden units hasn't been tested. By assuming the visible units are Binary, ReLU, or Gaussian and the hidden units are Binary or ReLU this function becomes quite simple. """ #Positive phase hidden = hidden_unit.activate(gp.dot(visiable, weights) + hidden_bias) hidden_sampled = hidden_unit.sampleStates(hidden) #Negative phase v2 = visible_unit.activate(gp.dot(hidden_sampled, weights.T) + visible_bias) h2 = hidden_unit.activate(gp.dot(v2, weights) + hidden_bias) #calculate gradients gw = gp.dot(visiable.T, hidden) - gp.dot(v2.T, h2) gv = visiable.sum(axis=0) - v2.sum(axis=0) gh = hidden.sum(axis=0) - h2.sum(axis=0) return gw, gh, gv, v2
def gradients(self, forward_propagateState, errSignals): """ Lazily generate (negative) gradients for the weights and biases given the result of forward_propagate (forward_propagateState) and the result of backward_propagate (errSignals). """ assert(len(forward_propagateState) == len(self.weights)+1) assert(len(errSignals) == len(self.weights) == len(self.biases)) for i in range(len(self.weights)): yield gp.dot(forward_propagateState[i].T, errSignals[i]), errSignals[i].sum(axis=0)
def CD1_dropout(visiable, weights, visible_bias, hidden_bias, visible_unit = Binary(), hidden_unit = Binary(),dropout=0.0): """ Using Gaussian hidden units hasn't been tested. By assuming the visible units are Binary, ReLU, or Gaussian and the hidden units are Binary or ReLU this function becomes quite simple. """ #Positive phase if(dropout == 0): hidden = hidden_unit.activate(gp.dot(visiable, weights) + hidden_bias) else: mask = gp.rand(*weights.shape) > dropout dropoutMultiplier = 1.0/(1.0-dropout) hidden = hidden_unit.activate(gp.dot(dropoutMultiplier*visiable, mask * weights) + hidden_bias) hidden_sampled = hidden_unit.sampleStates(hidden) #Negative phase v2 = visible_unit.activate(gp.dot(hidden_sampled, weights.T) + visible_bias) h2 = hidden_unit.activate(gp.dot(v2, weights) + hidden_bias) #calculate gradients gw = gp.dot(visiable.T, hidden) - gp.dot(v2.T, h2) gv = visiable.sum(axis=0) - v2.sum(axis=0) gh = hidden.sum(axis=0) - h2.sum(axis=0) return gw, gh, gv, v2
def forward_propagate(self, inputBatch, upToLayer = None ): """ Perform a (possibly partial) forward pass through the network. Updates self.state which, on a full forward pass, holds the input followed by each hidden layer's activation and finally the net input incident on the output layer. For a full forward pass, we return the actual output unit activations. In a partial forward pass we return None. """ if upToLayer == None: #work through all layers upToLayer = len(self.weights) #self.state holds everything before the output nonlinearity, including the net input to the output units self.state = [inputBatch] for i in range(min(len(self.weights) - 1, upToLayer)): curActs = self.hidden_activation_functions[i].activate(gp.dot(self.state[-1], self.weights[i]) + self.biases[i]) self.state.append(curActs) if upToLayer >= len(self.weights): self.state.append(gp.dot(self.state[-1], self.weights[-1]) + self.biases[-1]) self.acts = self.output_activation_function.activate(self.state[-1]) return self.acts #we didn't reach the output units # To return the first set of hidden activations, we would set # upToLayer to 1. return self.state[upToLayer]
def backward_propagate(self, outputErrSignal, forward_propagateState = None): """ Perform a backward pass through the network. forward_propagateState defaults to self.state (set during forward_propagate) and outputErrSignal should be self.output_activation_function.dErrordNetInput(...). """ if forward_propagateState == None: forward_propagateState = self.state assert(len(forward_propagateState) == len(self.weights) + 1) errSignals = [None for i in range(len(self.weights))] errSignals[-1] = outputErrSignal for i in reversed(range(len(self.weights) - 1)): errSignals[i] = gp.dot(errSignals[i+1], self.weights[i+1].T)*self.hidden_activation_functions[i].dEdNetInput(forward_propagateState[i+1]) return errSignals
def visible_expectation(self, hidden, bias=0.): '''Given hidden states, return the expected visible unit values.''' visible = gp.dot(hidden, self.weights.T) + self.visible_bias + bias return self.visible_unittype.activate(visible)
def hidden_expectation(self, visible, bias=0.): '''Given visible data, return the expected hidden unit values.''' hidden = gp.dot(visible, self.weights) + self.hidden_bias + bias return self.hidden_unittype.activate(hidden)