def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=1, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.matrix('y') # the labels are presented as 1D vector of # self.y = T.ivector('y') # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.rnn = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_hidden=5000, n_out=n_outs, activation=T.tanh, output_type='real', use_symbolic_softmax=False) # self.logLayer = LogisticRegression( # input=self.sigmoid_layers[-1].output, # n_in=hidden_layers_sizes[-1], n_out=n_outs) # self.get_prediction = theano.function( # inputs=[self.x], # outputs=[self.logLayer.y_pred] # ) # self.params.extend(self.logLayer.params) # self.finetune_cost = self.logLayer.squared_error(self.y) # self.errors = self.logLayer.errors(self.y) self.get_prediction = theano.function( inputs=[self.x], outputs=T.round(self.rnn.y_pred) ) self.params.extend(self.rnn.params) self.finetune_cost = self.rnn.mse(self.y) self.errors = self.rnn.errors(self.y)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=1, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.get_prediction = theano.function(inputs=[self.x], outputs=[self.logLayer.y_pred]) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood # self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.finetune_cost = self.logLayer.squared_error(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=1, corruption_levels=[0.1, 0.1], y_type=1, sparse_weight=0, recurrent=False, dropout=False): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) # assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.matrix('y') if y_type==0: self.y = T.matrix('y') # the labels are presented as 1D vector else: if recurrent: self.y = T.matrix(name='y', dtype='int32') else: self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if dropout == True: print 'Dropout' sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) else: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer if sparse_weight == 0: dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) else: print 'SparseAutoencoder used' dA_layer = SparseAutoencoder(input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, reg_weight=sparse_weight, corruption_level=corruption_levels[i] ) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP if y_type == 0: output_type = 'real' else: output_type = 'binary' if len(self.sigmoid_layers) > 0: if recurrent == True: self.logLayer = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_hidden=500, n_out=1, activation=T.tanh, output_type=output_type, use_symbolic_softmax=False) else: self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, y_type=y_type ) else: if recurrent == True: self.logLayer = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_hidden=500, n_out=1, activation=T.tanh, output_type=output_type, use_symbolic_softmax=False) else: self.logLayer = LogisticRegression( input=self.x, n_in=n_ins, n_out=n_outs, y_type=y_type ) if recurrent == True: if y_type == 0: outputs=T.round(self.logLayer.y_pred) else: outputs=T.round(self.logLayer.p_y_given_x) self.get_prediction = theano.function( inputs=[self.x], outputs=outputs ) else: self.get_prediction = theano.function( inputs=[self.x], outputs=[self.logLayer.y_pred] ) self.get_lastlayer_output = theano.function( inputs=[self.x], outputs=[self.sigmoid_layers[-1].output] ) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood # self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) if recurrent == True: self.finetune_cost = self.logLayer.loss(self.y) else: if y_type == 0: self.finetune_cost = self.logLayer.squared_error(self.y) else: self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)