Esempio n. 1
0
class DNN_Dropout(nnet):

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 adv_activation = None, max_col_norm = None,
                 l1_reg = None, l2_reg = None):

        super(DNN_Dropout, self).__init__()

        self.layers = []
        self.dropout_layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
		
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output
			
            if not adv_activation  is None:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation= activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation=activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] ,
                                        activation= activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();


    def save(self,filename,start_layer = 0,max_layer_num = -1,withfinal=True):
        nnet_dict = {}
        if max_layer_num == -1:
           max_layer_num = self.n_layers

        for i in range(start_layer, max_layer_num):
           dict_a = str(i) + ' W'
           if i == 0:
               nnet_dict[dict_a] = _array2string((1.0 - self.input_dropout_factor) * (
                self.layers[i].params[0].get_value()))
           else:
               nnet_dict[dict_a] = _array2string((1.0 - self.dropout_factor[i - 1])* (
                self.layers[i].params[0].get_value()))
           dict_a = str(i) + ' b'
           nnet_dict[dict_a] = _array2string(self.layers[i].params[1].get_value())

        if withfinal: 
            dict_a = 'logreg W'
            nnet_dict[dict_a] = _array2string((1.0 - self.dropout_factor[-1])* (
                self.logLayer.params[0].get_value()))
            dict_a = 'logreg b'
            nnet_dict[dict_a] = _array2string(self.logLayer.params[1].get_value())
   
        with open(filename, 'wb') as fp:
            json.dump(nnet_dict, fp, indent=2, sort_keys = True)
            fp.flush()

    def load(self,filename,start_layer = 0,max_layer_num = -1,withfinal=True):
        nnet_dict = {}
        if max_layer_num == -1:
            max_layer_num = self.n_layers

        with open(filename, 'rb') as fp:
            nnet_dict = json.load(fp)
        
        for i in xrange(max_layer_num):
            dict_key = str(i) + ' W'
            self.layers[i].params[0].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
            dict_key = str(i) + ' b' 
            self.layers[i].params[1].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))

        if withfinal:
            dict_key = 'logreg W'
            self.logLayer.params[0].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
            dict_key = 'logreg b'
            self.logLayer.params[1].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
Esempio n. 2
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 adv_activation = None, max_col_norm = None,
                 l1_reg = None, l2_reg = None):

        super(DNN_Dropout, self).__init__()

        self.layers = []
        self.dropout_layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
		
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output
			
            if not adv_activation  is None:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation= activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation=activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] ,
                                        activation= activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();
Esempio n. 3
0
	def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, 
			use_fast=False,hidden_activation = T.nnet.sigmoid,l1_reg=None,l2_reg=None,
			max_col_norm=None,input_dropout_factor=0.0):

		super(DropoutCNN, self).__init__(conv_layer_configs,hidden_layer_configs,l1_reg,l2_reg,max_col_norm)
		self.input_dropout_factor = input_dropout_factor;
		
		self.dropout_layers = []
		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
            
		for i in xrange(self.conv_layer_num):		# construct the convolution layer
			if i == 0:  							#is_input layer
				conv_input = self.x
				if self.input_dropout_factor > 0.0:
					dropout_conv_input = _dropout_from_layer(theano_rng, self.x,self.input_dropout_factor)
				else:
					dropout_conv_input = self.x;	
			else:
				conv_input = (1-conv_layer_configs[i-1]['dropout_factor'])*self.layers[-1].output #output of previous layer
				dropout_conv_input = self.dropout_layers[-1].dropout_output;
				
			config = conv_layer_configs[i]
			conv_activation= parse_activation(config['activation'])
			dropout_conv_layer = DropoutConvLayer(numpy_rng=numpy_rng, input=dropout_conv_input,
				input_shape=config['input_shape'],filter_shape=config['filter_shape'],poolsize=config['poolsize'],
				activation = conv_activation, use_fast = use_fast,dropout_factor=conv_layer_configs[i]['dropout_factor'])
			
			conv_layer = ConvLayer(numpy_rng=numpy_rng, input=conv_input,input_shape=config['input_shape'],
				filter_shape=config['filter_shape'],poolsize=config['poolsize'],activation = conv_activation,
				use_fast = use_fast, W = dropout_conv_layer.W, b = dropout_conv_layer.b)
			
				
			self.dropout_layers.append(dropout_conv_layer);
			self.layers.append(conv_layer)
			self.conv_layers.append(conv_layer)
			
			if config['update']==True:	# only few layers of convolution layer are considered for updation
				self.params.extend(dropout_conv_layer.params)
				self.delta_params.extend(dropout_conv_layer.delta_params)

		hidden_layers = hidden_layer_configs['hidden_layers'];
		self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]
		adv_activation_configs = hidden_layer_configs['adv_activation'] 
		
		#flattening the last convolution output layer
		self.dropout_features = self.dropout_layers[-1].dropout_output.flatten(2);
		self.features = self.conv_layers[-1].output.flatten(2);
		self.features_dim = self.conv_output_dim;

		self.dropout_layers = [];
		self.dropout_factor = hidden_layer_configs['dropout_factor'];
		
		for i in xrange(self.hidden_layer_num):		# construct the hidden layer
			if i == 0:								# is first sigmoidal layer
				input_size = self.conv_output_dim
				dropout_layer_input = self.dropout_features
				layer_input = self.features
			else:
				input_size = hidden_layers[i - 1]	# number of hidden neurons in previous layers
				dropout_layer_input = self.dropout_layers[-1].dropout_output			
				layer_input = (1 - self.dropout_factor[i-1]) * self.layers[-1].output
				
			if adv_activation_configs is None:
				dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation,
						dropout_factor = self.dropout_factor[i]);
						
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation,
						W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b);
										
						
			else:
				dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order'],
						dropout_factor = self.dropout_factor[i]);
						
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order'],
						W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b);
						
			self.layers.append(sigmoid_layer)
			self.dropout_layers.append(dropout_sigmoid_layer)
			self.mlp_layers.append(sigmoid_layer)

			if config['update']==True:	# only few layers of hidden layer are considered for updation
						self.params.extend(dropout_sigmoid_layer.params)
						self.delta_params.extend(dropout_sigmoid_layer.delta_params)

		self.dropout_logLayer = LogisticRegression(input=self.dropout_layers[-1].dropout_output,n_in=hidden_layers[-1],n_out=n_outs)
		self.logLayer = LogisticRegression(
							input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
							n_in=hidden_layers[-1],n_out=n_outs,
							W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)
		
		self.dropout_layers.append(self.dropout_logLayer)
		self.layers.append(self.logLayer)
		self.params.extend(self.dropout_logLayer.params)
		self.delta_params.extend(self.dropout_logLayer.delta_params)
		
		self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
		self.errors = self.logLayer.errors(self.y)
		self.output = self.logLayer.prediction()
		
		#regularization
		if self.l1_reg is not None:
			self.__l1Regularization__(self.hidden_layer_num*2);
		if self.l2_reg is not None:
			self.__l2Regularization__(self.hidden_layer_num*2);
Esempio n. 4
0
	def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs,
		hidden_activation = T.nnet.sigmoid,l1_reg=None,l2_reg=None,max_col_norm=None):

		super(CNN3D, self).__init__(conv_layer_configs, hidden_layer_configs,l1_reg,l2_reg,max_col_norm)
		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
			            
		for i in xrange(self.conv_layer_num):		# construct the convolution layer
			if i == 0:  				#is_input layer
				input = self.x
			else:
				input = self.layers[-1].output #output of previous layer
			
			config = conv_layer_configs[i]
			conv_activation = parse_activation(config['activation']);
			conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,input_shape=config['input_shape'],
				filter_shape=config['filter_shape'],poolsize=config['poolsize'],
				activation = conv_activation)
			self.layers.append(conv_layer)
			self.conv_layers.append(conv_layer)
			if config['update']==True:	# only few layers of convolution layer are considered for updation
				self.params.extend(conv_layer.params)
				self.delta_params.extend(conv_layer.delta_params)

		hidden_layers = hidden_layer_configs['hidden_layers'];
		self.conv_output_dim = numpy.prod(config['output_shape'][1:])
		adv_activation_configs = hidden_layer_configs['adv_activation'] 
		
		#flattening the last convolution output layer
		self.features = self.conv_layers[-1].output.flatten(2);
		self.features_dim = self.conv_output_dim;
		
		for i in xrange(self.hidden_layer_num):		# construct the hidden layer
			if i == 0:				# is first sigmoidla layer
				input_size = self.conv_output_dim
				layer_input = self.features
			else:
				input_size = hidden_layers[i - 1]	# number of hidden neurons in previous layers
				layer_input = self.layers[-1].output
			
			
			if adv_activation_configs is None:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation);
						
			else:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order']);
						
						
			self.layers.append(sigmoid_layer)
			self.mlp_layers.append(sigmoid_layer)

			if config['update']==True:	# only few layers of hidden layer are considered for updation
                		self.params.extend(sigmoid_layer.params)
                		self.delta_params.extend(sigmoid_layer.delta_params)
           

		self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=hidden_layers[-1],n_out=n_outs)
		
		self.layers.append(self.logLayer)
		self.params.extend(self.logLayer.params)
		self.delta_params.extend(self.logLayer.delta_params)
		
		self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
		self.errors = self.logLayer.errors(self.y)
		self.output = self.logLayer.prediction()
		
		#regularization
		if self.l1_reg is not None:
			self.__l1Regularization__(self.hidden_layer_num*2);
		if self.l2_reg is not None:
			self.__l2Regularization__(self.hidden_layer_num*2);
Esempio n. 5
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 first_layer_gb=True,
                 pretrainedLayers=None,
                 activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else:
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN.
            # The visible biases in the RBM are parameters of those RBMs,
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                                  theano_rng=theano_rng,
                                  input=layer_input,
                                  n_visible=input_size,
                                  n_hidden=hidden_layers_sizes[i],
                                  W=sigmoid_layer.W,
                                  hbias=sigmoid_layer.b,
                                  activation=activation)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b,
                                activation=activation)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction()
        self.features = self.layers[-2].output
        self.features_dim = self.layers[-2].n_out
Esempio n. 6
0
class CNN(CNNBase):
	""" Instantiation of Convolution neural network ... """
	def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, 
			use_fast=False,hidden_activation = T.nnet.sigmoid,l1_reg=None,l2_reg=None,max_col_norm=None):

		super(CNN, self).__init__(conv_layer_configs, hidden_layer_configs,l1_reg,l2_reg,max_col_norm)
		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
			            
		for i in xrange(self.conv_layer_num):		# construct the convolution layer
			if i == 0:  				#is_input layer
				input = self.x
			else:
				input = self.layers[-1].output #output of previous layer
			
			config = conv_layer_configs[i]
			conv_activation= parse_activation(config['activation'])
			conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,input_shape=config['input_shape'],
				filter_shape=config['filter_shape'],poolsize=config['poolsize'],
				activation = conv_activation, use_fast = use_fast)
			self.layers.append(conv_layer)
			self.conv_layers.append(conv_layer)
			if config['update']==True:	# only few layers of convolution layer are considered for updation
				self.params.extend(conv_layer.params)
				self.delta_params.extend(conv_layer.delta_params)

		hidden_layers = hidden_layer_configs['hidden_layers'];
		self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]
		adv_activation_configs = hidden_layer_configs['adv_activation'] 
		#flattening the last convolution output layer
		self.features = self.conv_layers[-1].output.flatten(2);
		self.features_dim = self.conv_output_dim;
		
		for i in xrange(self.hidden_layer_num):		# construct the hidden layer
			if i == 0:				# is first sigmoidla layer
				input_size = self.conv_output_dim
				layer_input = self.features
			else:
				input_size = hidden_layers[i - 1]	# number of hidden neurons in previous layers
				layer_input = self.layers[-1].output
			
			
			if adv_activation_configs is None:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation);
						
			else:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order']);
						
						
			self.layers.append(sigmoid_layer)
			self.mlp_layers.append(sigmoid_layer)

			if config['update']==True:	# only few layers of hidden layer are considered for updation
                		self.params.extend(sigmoid_layer.params)
                		self.delta_params.extend(sigmoid_layer.delta_params)
           

		self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=hidden_layers[-1],n_out=n_outs)
		
		self.layers.append(self.logLayer)
		self.params.extend(self.logLayer.params)
		self.delta_params.extend(self.logLayer.delta_params)
		
		self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
		self.errors = self.logLayer.errors(self.y)
		self.output = self.logLayer.prediction()
		
		#regularization
		if self.l1_reg is not None:
			self.__l1Regularization__(self.hidden_layer_num*2);
		if self.l2_reg is not None:
			self.__l2Regularization__(self.hidden_layer_num*2);
		
		
	def save_mlp2dict(self,withfinal=True,max_layer_num=-1):
		if max_layer_num == -1:
		   max_layer_num = self.hidden_layer_num
		mlp_dict = {}
		for i in range(max_layer_num):
			dict_a = str(i) +' W'
			mlp_dict[dict_a] = _array2string(self.mlp_layers[i].params[0].get_value())
			dict_a = str(i) + ' b'
			mlp_dict[dict_a] = _array2string(self.mlp_layers[i].params[1].get_value())

		if withfinal: 
			dict_a = 'logreg W'
			mlp_dict[dict_a] = _array2string(self.logLayer.params[0].get_value())
			dict_a = 'logreg b'
			mlp_dict[dict_a] = _array2string(self.logLayer.params[1].get_value())
		return mlp_dict
Esempio n. 7
0
    def __init__(self,
                 numpy_rng,
                 theano_rng,
                 batch_size,
                 n_outs,
                 conv_layer_configs,
                 hidden_layer_configs,
                 hidden_activation=T.nnet.sigmoid,
                 l1_reg=None,
                 l2_reg=None,
                 max_col_norm=None):

        super(CNN3D, self).__init__(conv_layer_configs, hidden_layer_configs,
                                    l1_reg, l2_reg, max_col_norm)
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        for i in xrange(
                self.conv_layer_num):  # construct the convolution layer
            if i == 0:  #is_input layer
                input = self.x
            else:
                input = self.layers[-1].output  #output of previous layer

            config = conv_layer_configs[i]
            conv_activation = parse_activation(config['activation'])
            conv_layer = ConvLayer(numpy_rng=numpy_rng,
                                   input=input,
                                   input_shape=config['input_shape'],
                                   filter_shape=config['filter_shape'],
                                   poolsize=config['poolsize'],
                                   activation=conv_activation)
            self.layers.append(conv_layer)
            self.conv_layers.append(conv_layer)
            if config[
                    'update'] == True:  # only few layers of convolution layer are considered for updation
                self.params.extend(conv_layer.params)
                self.delta_params.extend(conv_layer.delta_params)

        hidden_layers = hidden_layer_configs['hidden_layers']
        self.conv_output_dim = numpy.prod(config['output_shape'][1:])
        adv_activation_configs = hidden_layer_configs['adv_activation']

        #flattening the last convolution output layer
        self.features = self.conv_layers[-1].output.flatten(2)
        self.features_dim = self.conv_output_dim

        for i in xrange(self.hidden_layer_num):  # construct the hidden layer
            if i == 0:  # is first sigmoidla layer
                input_size = self.conv_output_dim
                layer_input = self.features
            else:
                input_size = hidden_layers[
                    i - 1]  # number of hidden neurons in previous layers
                layer_input = self.layers[-1].output

            if adv_activation_configs is None:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                            input=layer_input,
                                            n_in=input_size,
                                            n_out=hidden_layers[i],
                                            activation=hidden_activation)

            else:
                sigmoid_layer = HiddenLayer(
                    rng=numpy_rng,
                    input=layer_input,
                    n_in=input_size,
                    n_out=hidden_layers[i] *
                    adv_activation_configs['pool_size'],
                    activation=hidden_activation,
                    adv_activation_method=adv_activation_configs['method'],
                    pool_size=adv_activation_configs['pool_size'],
                    pnorm_order=adv_activation_configs['pnorm_order'])

            self.layers.append(sigmoid_layer)
            self.mlp_layers.append(sigmoid_layer)

            if config[
                    'update'] == True:  # only few layers of hidden layer are considered for updation
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)

        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=hidden_layers[-1],
                                           n_out=n_outs)

        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
        self.output = self.logLayer.prediction()

        #regularization
        if self.l1_reg is not None:
            self.__l1Regularization__(self.hidden_layer_num * 2)
        if self.l2_reg is not None:
            self.__l2Regularization__(self.hidden_layer_num * 2)
Esempio n. 8
0
class DBN(nnet):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 first_layer_gb=True,
                 pretrainedLayers=None,
                 activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else:
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN.
            # The visible biases in the RBM are parameters of those RBMs,
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                                  theano_rng=theano_rng,
                                  input=layer_input,
                                  n_visible=input_size,
                                  n_hidden=hidden_layers_sizes[i],
                                  W=sigmoid_layer.W,
                                  hbias=sigmoid_layer.b,
                                  activation=activation)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b,
                                activation=activation)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction()
        self.features = self.layers[-2].output
        self.features_dim = self.layers[-2].n_out

    def pretraining_functions(self, train_set_x, batch_size, weight_cost):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param weight_cost: weigth cost

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        momentum = T.scalar('momentum')
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None,k=1) for training each RBM.
            r_cost, fe_cost, updates = rbm.get_cost_updates(
                batch_size, learning_rate, momentum, weight_cost)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(learning_rate, default=0.0001),
                    theano.Param(momentum, default=0.5)
                ],
                outputs=[r_cost, fe_cost],
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append function to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
Esempio n. 9
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 first_layer_gb = True,pretrainedLayers=None,activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else :
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output


            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN. 
            # The visible biases in the RBM are parameters of those RBMs, 
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b,
                              activation=activation)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b,
                              activation=activation)
            self.rbm_layers.append(rbm_layer)            

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out
Esempio n. 10
0
class DBN(nnet):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 first_layer_gb = True,pretrainedLayers=None,activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else :
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output


            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN. 
            # The visible biases in the RBM are parameters of those RBMs, 
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b,
                              activation=activation)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b,
                              activation=activation)
            self.rbm_layers.append(rbm_layer)            

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

    def pretraining_functions(self, train_set_x, batch_size, weight_cost):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param weight_cost: weigth cost

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        momentum = T.scalar('momentum')
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None,k=1) for training each RBM.
            r_cost, fe_cost, updates = rbm.get_cost_updates(batch_size, learning_rate,
                                                            momentum, weight_cost)
            # compile the theano function
            fn = theano.function(inputs=[index,
                              theano.Param(learning_rate, default=0.0001),
                              theano.Param(momentum, default=0.5)],
                              outputs= [r_cost, fe_cost],
                              updates=updates,
                              givens={self.x: train_set_x[batch_begin:batch_end]})
            # append function to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
Esempio n. 11
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """
        super(SDA, self).__init__()
        
        self.layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          activation=T.nnet.sigmoid)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out
Esempio n. 12
0
class SDA(nnet):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """
        super(SDA, self).__init__()
        
        self.layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          activation=T.nnet.sigmoid)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

    def pretraining_functions(self, train_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_x: theano.tensor.TensorType
        :param train_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # number of batches
        n_batches = train_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(inputs=[index,
                              theano.Param(corruption_level, default=0.2),
                              theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x: train_x[batch_begin:
                                                             batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
Esempio n. 13
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, adv_activation = None,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        super(DNN, self).__init__()
        
        self.layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            if not adv_activation is  None:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'])
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out