def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, activation = T.nnet.sigmoid, input_dropout_factor = 0, dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2], adv_activation = None, max_col_norm = None, l1_reg = None, l2_reg = None): super(DNN_Dropout, self).__init__() self.layers = [] self.dropout_layers = [] self.n_layers = len(hidden_layers_sizes) self.max_col_norm = max_col_norm self.l1_reg = l1_reg self.l2_reg = l2_reg self.input_dropout_factor = input_dropout_factor self.dropout_factor = dropout_factor assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): # construct the sigmoidal layer if i == 0: input_size = n_ins layer_input = self.x if input_dropout_factor > 0.0: dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor) else: dropout_layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output dropout_layer_input = self.dropout_layers[-1].dropout_output if not adv_activation is None: dropout_layer = DropoutHiddenLayer(rng=numpy_rng, input=dropout_layer_input, n_in=input_size, n_out=hidden_layers_sizes[i] * adv_activation['pool_size'], activation= activation, adv_activation_method = adv_activation['method'], pool_size = adv_activation['pool_size'], pnorm_order = adv_activation['pnorm_order'], dropout_factor=self.dropout_factor[i]) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i] * adv_activation['pool_size'], activation=activation, adv_activation_method = adv_activation['method'], pool_size = adv_activation['pool_size'], pnorm_order = adv_activation['pnorm_order'], W=dropout_layer.W, b=dropout_layer.b) else: dropout_layer = DropoutHiddenLayer(rng=numpy_rng, input=dropout_layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation= activation, dropout_factor=self.dropout_factor[i]) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i] , activation= activation, W=dropout_layer.W, b=dropout_layer.b) # add the layer to our list of layers self.layers.append(sigmoid_layer) self.dropout_layers.append(dropout_layer) self.params.extend(dropout_layer.params) self.delta_params.extend(dropout_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.dropout_logLayer = LogisticRegression( input=self.dropout_layers[-1].dropout_output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.logLayer = LogisticRegression( input=(1 - self.dropout_factor[-1]) * self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, W=self.dropout_logLayer.W, b=self.dropout_logLayer.b) self.dropout_layers.append(self.dropout_logLayer) self.layers.append(self.logLayer) self.params.extend(self.dropout_logLayer.params) self.delta_params.extend(self.dropout_logLayer.delta_params) # compute the cost self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction(); self.features = self.layers[-2].output; self.features_dim = self.layers[-2].n_out if self.l1_reg is not None: self.__l1Regularization__(); if self.l2_reg is not None: self.__l2Regularization__();
def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, use_fast=False,hidden_activation = T.nnet.sigmoid,l1_reg=None,l2_reg=None, max_col_norm=None,input_dropout_factor=0.0): super(DropoutCNN, self).__init__(conv_layer_configs,hidden_layer_configs,l1_reg,l2_reg,max_col_norm) self.input_dropout_factor = input_dropout_factor; self.dropout_layers = [] if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) for i in xrange(self.conv_layer_num): # construct the convolution layer if i == 0: #is_input layer conv_input = self.x if self.input_dropout_factor > 0.0: dropout_conv_input = _dropout_from_layer(theano_rng, self.x,self.input_dropout_factor) else: dropout_conv_input = self.x; else: conv_input = (1-conv_layer_configs[i-1]['dropout_factor'])*self.layers[-1].output #output of previous layer dropout_conv_input = self.dropout_layers[-1].dropout_output; config = conv_layer_configs[i] conv_activation= parse_activation(config['activation']) dropout_conv_layer = DropoutConvLayer(numpy_rng=numpy_rng, input=dropout_conv_input, input_shape=config['input_shape'],filter_shape=config['filter_shape'],poolsize=config['poolsize'], activation = conv_activation, use_fast = use_fast,dropout_factor=conv_layer_configs[i]['dropout_factor']) conv_layer = ConvLayer(numpy_rng=numpy_rng, input=conv_input,input_shape=config['input_shape'], filter_shape=config['filter_shape'],poolsize=config['poolsize'],activation = conv_activation, use_fast = use_fast, W = dropout_conv_layer.W, b = dropout_conv_layer.b) self.dropout_layers.append(dropout_conv_layer); self.layers.append(conv_layer) self.conv_layers.append(conv_layer) if config['update']==True: # only few layers of convolution layer are considered for updation self.params.extend(dropout_conv_layer.params) self.delta_params.extend(dropout_conv_layer.delta_params) hidden_layers = hidden_layer_configs['hidden_layers']; self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] adv_activation_configs = hidden_layer_configs['adv_activation'] #flattening the last convolution output layer self.dropout_features = self.dropout_layers[-1].dropout_output.flatten(2); self.features = self.conv_layers[-1].output.flatten(2); self.features_dim = self.conv_output_dim; self.dropout_layers = []; self.dropout_factor = hidden_layer_configs['dropout_factor']; for i in xrange(self.hidden_layer_num): # construct the hidden layer if i == 0: # is first sigmoidal layer input_size = self.conv_output_dim dropout_layer_input = self.dropout_features layer_input = self.features else: input_size = hidden_layers[i - 1] # number of hidden neurons in previous layers dropout_layer_input = self.dropout_layers[-1].dropout_output layer_input = (1 - self.dropout_factor[i-1]) * self.layers[-1].output if adv_activation_configs is None: dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, n_out = hidden_layers[i], activation=hidden_activation, dropout_factor = self.dropout_factor[i]); sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, n_out = hidden_layers[i], activation=hidden_activation, W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b); else: dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation, adv_activation_method = adv_activation_configs['method'], pool_size = adv_activation_configs['pool_size'], pnorm_order = adv_activation_configs['pnorm_order'], dropout_factor = self.dropout_factor[i]); sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation, adv_activation_method = adv_activation_configs['method'], pool_size = adv_activation_configs['pool_size'], pnorm_order = adv_activation_configs['pnorm_order'], W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b); self.layers.append(sigmoid_layer) self.dropout_layers.append(dropout_sigmoid_layer) self.mlp_layers.append(sigmoid_layer) if config['update']==True: # only few layers of hidden layer are considered for updation self.params.extend(dropout_sigmoid_layer.params) self.delta_params.extend(dropout_sigmoid_layer.delta_params) self.dropout_logLayer = LogisticRegression(input=self.dropout_layers[-1].dropout_output,n_in=hidden_layers[-1],n_out=n_outs) self.logLayer = LogisticRegression( input=(1 - self.dropout_factor[-1]) * self.layers[-1].output, n_in=hidden_layers[-1],n_out=n_outs, W=self.dropout_logLayer.W, b=self.dropout_logLayer.b) self.dropout_layers.append(self.dropout_logLayer) self.layers.append(self.logLayer) self.params.extend(self.dropout_logLayer.params) self.delta_params.extend(self.dropout_logLayer.delta_params) self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction() #regularization if self.l1_reg is not None: self.__l1Regularization__(self.hidden_layer_num*2); if self.l2_reg is not None: self.__l2Regularization__(self.hidden_layer_num*2);
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, first_layer_gb=True, pretrainedLayers=None, activation=T.nnet.sigmoid): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type first_layer_gb: bool :param first_layer_gb: wether first layer is gausian-bernolli or bernolli-bernolli """ super(DBN, self).__init__() self.layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layers_sizes) if pretrainedLayers == None: self.nPreTrainLayers = n_layers else: self.nPreTrainLayers = pretrainedLayers assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=activation) # add the layer to our list of layers self.layers.append(sigmoid_layer) # the parameters of the sigmoid_layers are parameters of the DBN. # The visible biases in the RBM are parameters of those RBMs, # but not of the DBN. self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # Construct an RBM that shared weights with this layer # the first layer could be Gaussian-Bernoulli RBM # other layers are Bernoulli-Bernoulli RBMs if i == 0 and first_layer_gb: rbm_layer = GBRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b, activation=activation) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b, activation=activation) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression(input=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction() self.features = self.layers[-2].output self.features_dim = self.layers[-2].n_out
def __init__(self, numpy_rng, theano_rng, batch_size, n_outs, conv_layer_configs, hidden_layer_configs, hidden_activation=T.nnet.sigmoid, l1_reg=None, l2_reg=None, max_col_norm=None): super(CNN3D, self).__init__(conv_layer_configs, hidden_layer_configs, l1_reg, l2_reg, max_col_norm) if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) for i in xrange( self.conv_layer_num): # construct the convolution layer if i == 0: #is_input layer input = self.x else: input = self.layers[-1].output #output of previous layer config = conv_layer_configs[i] conv_activation = parse_activation(config['activation']) conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=conv_activation) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) if config[ 'update'] == True: # only few layers of convolution layer are considered for updation self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) hidden_layers = hidden_layer_configs['hidden_layers'] self.conv_output_dim = numpy.prod(config['output_shape'][1:]) adv_activation_configs = hidden_layer_configs['adv_activation'] #flattening the last convolution output layer self.features = self.conv_layers[-1].output.flatten(2) self.features_dim = self.conv_output_dim for i in xrange(self.hidden_layer_num): # construct the hidden layer if i == 0: # is first sigmoidla layer input_size = self.conv_output_dim layer_input = self.features else: input_size = hidden_layers[ i - 1] # number of hidden neurons in previous layers layer_input = self.layers[-1].output if adv_activation_configs is None: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers[i], activation=hidden_activation) else: sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers[i] * adv_activation_configs['pool_size'], activation=hidden_activation, adv_activation_method=adv_activation_configs['method'], pool_size=adv_activation_configs['pool_size'], pnorm_order=adv_activation_configs['pnorm_order']) self.layers.append(sigmoid_layer) self.mlp_layers.append(sigmoid_layer) if config[ 'update'] == True: # only few layers of hidden layer are considered for updation self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) self.logLayer = LogisticRegression(input=self.layers[-1].output, n_in=hidden_layers[-1], n_out=n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction() #regularization if self.l1_reg is not None: self.__l1Regularization__(self.hidden_layer_num * 2) if self.l2_reg is not None: self.__l2Regularization__(self.hidden_layer_num * 2)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ super(SDA, self).__init__() self.layers = [] self.dA_layers = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, activation=T.nnet.sigmoid) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) self.output = self.logLayer.prediction(); self.features = self.layers[-2].output; self.features_dim = self.layers[-2].n_out
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, activation = T.nnet.sigmoid, adv_activation = None, max_col_norm = None, l1_reg = None, l2_reg = None): super(DNN, self).__init__() self.layers = [] self.n_layers = len(hidden_layers_sizes) self.max_col_norm = max_col_norm self.l1_reg = l1_reg self.l2_reg = l2_reg assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): # construct the sigmoidal layer if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output if not adv_activation is None: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i] * pool_size, activation = activation, adv_activation_method = adv_activation['method'], pool_size = adv_activation['pool_size'], pnorm_order = adv_activation['pnorm_order']) else: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=activation) # add the layer to our list of layers self.layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) if self.l1_reg is not None: self.__l1Regularization__(); if self.l2_reg is not None: self.__l2Regularization__(); self.output = self.logLayer.prediction(); self.features = self.layers[-2].output; self.features_dim = self.layers[-2].n_out