class DNNDropout(object): def __init__(self, np_rng, hidden_layers_sizes, n_ins, n_outs, theano_rng=None, dnn_shared=None, shared_layers=None, input_dropout_factor=0.1, dropout_factor=0.5, ): if shared_layers is None: shared_layers = [] self.layers = [] self.dropout_layers = [] self.params = [] self.delta_params = [] self.max_col_norm = 2.0 self.n_ins = n_ins self.n_outs = n_outs self.hidden_layers_sizes = hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.input_dropout_factor = input_dropout_factor self.dropout_factor = dropout_factor # sometimes you need a theano rng and not a numpy one if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x if self.input_dropout_factor > 0.0: dropout_layer_input = _dropout_from_layer( theano_rng, self.x, self.input_dropout_factor) else: dropout_layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = (1 - self.dropout_factor) * \ self.layers[-1].output dropout_layer_input = self.dropout_layers[-1].dropout_output W = None b = None if (i in shared_layers): W = dnn_shared.layers[i].W b = dnn_shared.layers[i].b dropout_layer = DropoutHiddenLayer(rng=np_rng, input=dropout_layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[ i], W=W, b=b, dropout_factor=self.dropout_factor) hidden_layer = HiddenLayer(rng=np_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=dropout_layer.W, b=dropout_layer.b) # add the layer to our list of layers self.layers.append(hidden_layer) self.dropout_layers.append(dropout_layer) self.params.extend(dropout_layer.params) self.delta_params.extend(dropout_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.dropout_logLayer = LogisticRegression( input=self.dropout_layers[-1].dropout_output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) self.logLayer = LogisticRegression( input=(1 - self.dropout_factor) * self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs, W=self.dropout_logLayer.W, b=self.dropout_logLayer.b) self.dropout_layers.append(self.dropout_logLayer) self.layers.append(self.logLayer) self.params.extend(self.dropout_logLayer.params) self.delta_params.extend(self.dropout_logLayer.delta_params) # compute the cost self.finetune_cost = self.dropout_logLayer.negative_log_likelihood( self.y) self.errors = self.logLayer.errors(self.y) def build_functions(self, train_shared_xy, valid_shared_xy, test_shared_xy, batch_size, onlyTrain=False): (train_set_x, train_set_y) = train_shared_xy if not onlyTrain: (valid_set_x, valid_set_y) = valid_shared_xy (test_set_x, test_set_y) = test_shared_xy train_set_x = theano.shared(value=train_set_x.astype(np.float32, copy=False)) / 256 train_set_y = theano.shared(value=train_set_y.astype(np.int32, copy=False)) if not onlyTrain: valid_set_x = theano.shared(value=valid_set_x.astype(np.float32, copy=False)) / 256 valid_set_y = theano.shared(value=valid_set_y.astype(np.int32, copy=False)) test_set_x = theano.shared(value=test_set_x.astype(np.float32, copy=False)) / 256 test_set_y = theano.shared(value=test_set_y.astype(np.int32, copy=False)) test_set_x = theano.shared(value=test_set_x.astype(np.float32, copy=False)) / 256 test_set_y = theano.shared(value=test_set_y.astype(np.int32, copy=False)) index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = collections.OrderedDict() for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * learning_rate for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] for i in xrange(self.hidden_layers_number): W = self.layers[i].W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms)) train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default=0.1), theano.Param(momentum, default=0.5)], outputs=[self.errors, self.finetune_cost], updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) if not onlyTrain: valid_fn = theano.function(inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) test_fn = theano.function(inputs=[index], outputs=self.errors, givens={ self.x: test_set_x[index * batch_size: (index + 1) * batch_size], self.y: test_set_y[index * batch_size: (index + 1) * batch_size]}) return train_fn, valid_fn, test_fn return train_fn, None, None
def __init__(self, np_rng, hidden_layers_sizes, n_ins, n_outs, theano_rng=None, dnn_shared=None, shared_layers=None, input_dropout_factor=0.1, dropout_factor=0.5, ): if shared_layers is None: shared_layers = [] self.layers = [] self.dropout_layers = [] self.params = [] self.delta_params = [] self.max_col_norm = 2.0 self.n_ins = n_ins self.n_outs = n_outs self.hidden_layers_sizes = hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.input_dropout_factor = input_dropout_factor self.dropout_factor = dropout_factor # sometimes you need a theano rng and not a numpy one if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x if self.input_dropout_factor > 0.0: dropout_layer_input = _dropout_from_layer( theano_rng, self.x, self.input_dropout_factor) else: dropout_layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = (1 - self.dropout_factor) * \ self.layers[-1].output dropout_layer_input = self.dropout_layers[-1].dropout_output W = None b = None if (i in shared_layers): W = dnn_shared.layers[i].W b = dnn_shared.layers[i].b dropout_layer = DropoutHiddenLayer(rng=np_rng, input=dropout_layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[ i], W=W, b=b, dropout_factor=self.dropout_factor) hidden_layer = HiddenLayer(rng=np_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=dropout_layer.W, b=dropout_layer.b) # add the layer to our list of layers self.layers.append(hidden_layer) self.dropout_layers.append(dropout_layer) self.params.extend(dropout_layer.params) self.delta_params.extend(dropout_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.dropout_logLayer = LogisticRegression( input=self.dropout_layers[-1].dropout_output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) self.logLayer = LogisticRegression( input=(1 - self.dropout_factor) * self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs, W=self.dropout_logLayer.W, b=self.dropout_logLayer.b) self.dropout_layers.append(self.dropout_logLayer) self.layers.append(self.logLayer) self.params.extend(self.dropout_logLayer.params) self.delta_params.extend(self.dropout_logLayer.delta_params) # compute the cost self.finetune_cost = self.dropout_logLayer.negative_log_likelihood( self.y) self.errors = self.logLayer.errors(self.y)