def __init__(self, numpy_rng, f_load_MLP=None, f_load_SDA=None, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], name_appendage='', xtropy_fraction=0): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.out_sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer_ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], name_appendage=name_appendage + '_sigmoid_' + str(i)) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) for i in xrange(self.n_layers): all_layers = self.sigmoid_layers + self.out_sigmoid_layers input_size = all_layers[-1].n_out output_size = self.sigmoid_layers[-i - 1].n_in # the input to the inverse sigmoid layer is always the activation of the # sigmoid layer behind it (forward sigmoid if its' the first inverse layer) layer_input = all_layers[-1].output out_sigmoid_layer = HiddenLayer_ReLU( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=output_size, name_appendage=name_appendage + '_outsigmoid_' + str(i)) self.out_sigmoid_layers.append(out_sigmoid_layer) self.params.extend(out_sigmoid_layer.params) for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] # Construct a denoising autoencoder that shared weights with each layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=sigmoid_layer.input, n_visible=sigmoid_layer.n_in, n_hidden=sigmoid_layer.n_out, W=sigmoid_layer.W, bhid=sigmoid_layer.b, name_appendage=name_appendage + '_dA_' + str(i)) self.dA_layers.append(dA_layer) if f_load_MLP != None: self.predictLayer = MLP(rng=numpy_rng, input=self.out_sigmoid_layers[-1].output, f_load=f_load_MLP, name_appendage=name_appendage + '_MLPLayer') elif f_load_SDA != None: self.predictLayer = SdA(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10, input=self.out_sigmoid_layers[-1].output) self.predictLayer.load(f_load_SDA) self.xtropy_cost = -T.mean( self.x * T.log(self.out_sigmoid_layers[-1].output) + (1 - self.x) * T.log(1 - self.out_sigmoid_layers[-1].output)) self.mse_cost = T.mean( (self.x - self.out_sigmoid_layers[-1].output)**2) self.logloss_cost = self.predictLayer.logLayer.negative_log_likelihood( self.y) self.finetune_cost = xtropy_fraction * self.mse_cost + ( 1 - xtropy_fraction) * self.logloss_cost self.errors = self.predictLayer.logLayer.errors(self.y)