def CD1_dropout(visiable, weights, visible_bias, hidden_bias, visible_unit = Binary(), hidden_unit = Binary(),dropout=0.0): """ Using Gaussian hidden units hasn't been tested. By assuming the visible units are Binary, ReLU, or Gaussian and the hidden units are Binary or ReLU this function becomes quite simple. """ #Positive phase if(dropout == 0): hidden = hidden_unit.activate(gp.dot(visiable, weights) + hidden_bias) else: mask = gp.rand(*weights.shape) > dropout dropoutMultiplier = 1.0/(1.0-dropout) hidden = hidden_unit.activate(gp.dot(dropoutMultiplier*visiable, mask * weights) + hidden_bias) hidden_sampled = hidden_unit.sampleStates(hidden) #Negative phase v2 = visible_unit.activate(gp.dot(hidden_sampled, weights.T) + visible_bias) h2 = hidden_unit.activate(gp.dot(v2, weights) + hidden_bias) #calculate gradients gw = gp.dot(visiable.T, hidden) - gp.dot(v2.T, h2) gv = visiable.sum(axis=0) - v2.sum(axis=0) gh = hidden.sum(axis=0) - h2.sum(axis=0) return gw, gh, gv, v2
def forward_propagate_dropout(self, inputBatch, upToLayer = None ): """ Perform a (possibly partial) forward pass through the network. Updates self.state which, on a full forward pass, holds the input followed by each hidden layer's activation and finally the net input incident on the output layer. For a full forward pass, we return the actual output unit activations. In a partial forward pass we return None. reference: IMPROVING DEEP NEURAL NETWORKS FOR LVCSR USING RECTIFIED LINEAR UNITS AND DROPOUT """ if upToLayer == None: #work through all layers upToLayer = len(self.weights) self.state = [inputBatch] for i in range(min(len(self.weights) - 1, upToLayer)): if self.dropouts[i] > 0: mask = gp.rand(*self.weights[i].shape) > self.dropouts[i] dropoutMultiplier = 1.0/(1.0-self.dropouts[i]) curActs = self.hidden_activation_functions[i].activate(gp.dot(dropoutMultiplier*self.state[-1], mask * self.weights[i]) + self.biases[i]) else: curActs = self.hidden_activation_functions[i].activate(gp.dot(self.state[-1], self.weights[i]) + self.biases[i]) #apply dropout on hidden units #if self.dropouts[i+1] > 0: curActs = curActs * (gp.rand(*curActs.shape) > self.dropouts[i+1]) self.state.append(curActs) if upToLayer >= len(self.weights): self.state.append(gp.dot(self.state[-1], self.weights[-1]) + self.biases[-1]) self.acts = self.output_activation_function.activate(self.state[-1]) return self.acts #we didn't reach the output units # To return the first set of hidden activations, we would set # upToLayer to 1. return self.state[upToLayer]
def sampleStates(self, acts): return gp.rand(*acts.shape) <= acts