Exemple #1
0
    def __init__(self,
                 numpy_rng,
                 f_load_MLP=None,
                 f_load_SDA=None,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1],
                 name_appendage='',
                 xtropy_fraction=0):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.out_sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in xrange(self.n_layers):
            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer_ReLU(rng=numpy_rng,
                                             input=layer_input,
                                             n_in=input_size,
                                             n_out=hidden_layers_sizes[i],
                                             name_appendage=name_appendage +
                                             '_sigmoid_' + str(i))

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

        for i in xrange(self.n_layers):
            all_layers = self.sigmoid_layers + self.out_sigmoid_layers

            input_size = all_layers[-1].n_out

            output_size = self.sigmoid_layers[-i - 1].n_in

            # the input to the inverse sigmoid layer is always the activation of the
            # sigmoid layer behind it (forward sigmoid if its' the first inverse layer)
            layer_input = all_layers[-1].output

            out_sigmoid_layer = HiddenLayer_ReLU(
                rng=numpy_rng,
                input=layer_input,
                n_in=input_size,
                n_out=output_size,
                name_appendage=name_appendage + '_outsigmoid_' + str(i))

            self.out_sigmoid_layers.append(out_sigmoid_layer)
            self.params.extend(out_sigmoid_layer.params)

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            # Construct a denoising autoencoder that shared weights with each layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=sigmoid_layer.input,
                          n_visible=sigmoid_layer.n_in,
                          n_hidden=sigmoid_layer.n_out,
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          name_appendage=name_appendage + '_dA_' + str(i))
            self.dA_layers.append(dA_layer)

        if f_load_MLP != None:
            self.predictLayer = MLP(rng=numpy_rng,
                                    input=self.out_sigmoid_layers[-1].output,
                                    f_load=f_load_MLP,
                                    name_appendage=name_appendage +
                                    '_MLPLayer')
        elif f_load_SDA != None:
            self.predictLayer = SdA(numpy_rng=numpy_rng,
                                    n_ins=28 * 28,
                                    hidden_layers_sizes=[1000, 1000, 1000],
                                    n_outs=10,
                                    input=self.out_sigmoid_layers[-1].output)
            self.predictLayer.load(f_load_SDA)

        self.xtropy_cost = -T.mean(
            self.x * T.log(self.out_sigmoid_layers[-1].output) +
            (1 - self.x) * T.log(1 - self.out_sigmoid_layers[-1].output))
        self.mse_cost = T.mean(
            (self.x - self.out_sigmoid_layers[-1].output)**2)
        self.logloss_cost = self.predictLayer.logLayer.negative_log_likelihood(
            self.y)
        self.finetune_cost = xtropy_fraction * self.mse_cost + (
            1 - xtropy_fraction) * self.logloss_cost

        self.errors = self.predictLayer.logLayer.errors(self.y)