Ejemplo n.º 1
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=1,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.matrix('y')  # the labels are presented as 1D vector of
        # self.y = T.ivector('y') 

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.rnn = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1],
                       n_hidden=5000, n_out=n_outs,
                       activation=T.tanh, output_type='real',
                       use_symbolic_softmax=False)

        # self.logLayer = LogisticRegression(
        #                  input=self.sigmoid_layers[-1].output,
        #                  n_in=hidden_layers_sizes[-1], n_out=n_outs)

        # self.get_prediction = theano.function(
        #     inputs=[self.x],
        #     outputs=[self.logLayer.y_pred]
        #     )
        
        # self.params.extend(self.logLayer.params)
        # self.finetune_cost = self.logLayer.squared_error(self.y)
        # self.errors = self.logLayer.errors(self.y)

        self.get_prediction = theano.function(
            inputs=[self.x],
            outputs=T.round(self.rnn.y_pred)
            )
        self.params.extend(self.rnn.params)
        self.finetune_cost = self.rnn.mse(self.y)
        self.errors = self.rnn.errors(self.y)
Ejemplo n.º 2
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=1,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.matrix('y')  # the labels are presented as 1D vector of
        # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.get_prediction = theano.function(inputs=[self.x],
                                              outputs=[self.logLayer.y_pred])
        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        # self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost = self.logLayer.squared_error(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Ejemplo n.º 3
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=1,
                 corruption_levels=[0.1, 0.1], y_type=1, sparse_weight=0,
                 recurrent=False, dropout=False):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        # assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.matrix('y')
        if y_type==0:
            self.y = T.matrix('y')  # the labels are presented as 1D vector
        else: 
            if recurrent:
                self.y = T.matrix(name='y', dtype='int32')
            else:
                self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels


        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output
            if dropout == True:
                print 'Dropout'
                sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng,
                                                    input=layer_input,
                                                    n_in=input_size,
                                                    n_out=hidden_layers_sizes[i],
                                                    activation=T.nnet.sigmoid)
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                            input=layer_input,
                                            n_in=input_size,
                                            n_out=hidden_layers_sizes[i],
                                            activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            if sparse_weight == 0:
                dA_layer = dA(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              bhid=sigmoid_layer.b)
                self.dA_layers.append(dA_layer)
            else:
                print 'SparseAutoencoder used'
                dA_layer = SparseAutoencoder(input=layer_input,
                                             n_visible=input_size,
                                             n_hidden=hidden_layers_sizes[i],
                                             W=sigmoid_layer.W,
                                             bhid=sigmoid_layer.b,
                                             reg_weight=sparse_weight,
                                             corruption_level=corruption_levels[i]
                                             )
                self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        if y_type == 0:
            output_type = 'real'
        else:
            output_type = 'binary'
        if len(self.sigmoid_layers) > 0:
            if recurrent == True:
                self.logLayer = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1],
                                   n_hidden=500, n_out=1,
                                   activation=T.tanh, output_type=output_type,
                                   use_symbolic_softmax=False)

            else:
                self.logLayer = LogisticRegression(
                                 input=self.sigmoid_layers[-1].output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs,
                                 y_type=y_type
                                 )
        else:
            if recurrent == True:
                
                self.logLayer = RNN(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1],
                                   n_hidden=500, n_out=1,
                                   activation=T.tanh, output_type=output_type,
                                   use_symbolic_softmax=False)
            else:
                self.logLayer = LogisticRegression(
                                 input=self.x,
                                 n_in=n_ins, n_out=n_outs,
                                 y_type=y_type
                                 )
        if recurrent == True:
            if y_type == 0:
                outputs=T.round(self.logLayer.y_pred)
            else:
                outputs=T.round(self.logLayer.p_y_given_x)
            self.get_prediction = theano.function(
                inputs=[self.x],
                outputs=outputs
                )

        else:
            self.get_prediction = theano.function(
                inputs=[self.x],
                outputs=[self.logLayer.y_pred]
                )
        self.get_lastlayer_output = theano.function(
            inputs=[self.x],
            outputs=[self.sigmoid_layers[-1].output]
            )
        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        # self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        if recurrent == True:
            self.finetune_cost = self.logLayer.loss(self.y)
        else:
            if y_type == 0:
                self.finetune_cost = self.logLayer.squared_error(self.y)
            else:
                self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) 
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)