def __init__( self, numpy_rng, n_in, hidden_layer_sizes, n_out ): self.sigmoid_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) assert self.n_layers > 0 self.input = T.matrix('input') for i in xrange(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_sizes[i - 1] if i == 0: layer_input = self.input else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], activation=T.nnet.sigmoid ) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) cA_layer = cA( numpy_rng=numpy_rng, input=layer_input, W=sigmoid_layer.W, b=sigmoid_layer.b, n_visible=input_size, n_hidden=hidden_layer_sizes[i], n_class=n_out ) self.cA_layers.append(cA_layer) self.logLayer = OneSidedCostRegressor( input=self.sigmoid_layers[-1].output, n_in=hidden_layer_sizes[-1], n_out=n_out ) self.params.extend(self.logLayer.params)
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001, batch_size=100): """ This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. :type batch_size: int :param batch_size: minibatch size """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row of which is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the ScA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # Construct a contractive autoencoder that shared weights with this # layer cA_layer = cA(numpy_rng=rng, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], n_batchsize=batch_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.cA_layers.append(cA_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers > 0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s = [] L2_sqrs = [] for i in range(self.n_layers): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = cA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, activation=T.nnet.sigmoid, lambda1=0,lambda2=0,alpha1=0,alpha2=0,batch_size=100): """ Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid (default)} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. batch_size: int, minibatch size. """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer=InputLayer(input=self.x,n_in=n_in) self.params.extend(input_layer.params) self.input_layer=input_layer # hidden layers for i in range(len(n_hidden)): if i==0: input_hidden=self.input_layer.output n_in_hidden=n_in else: input_hidden=self.hidden_layers[i-1].output n_in_hidden=n_hidden[i-1] hd=HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) cA_layer = cA(numpy_rng=rng, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], n_batchsize=batch_size, W=hd.W, bhid=hd.b) self.cA_layers.append(cA_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden)<=0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input=abs(self.input_layer.w).sum() self.L2_input=(self.input_layer.w **2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s=[] L2_sqrs=[] for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred=self.logRegressionLayer.y_pred self.y_pred_prob=self.logRegressionLayer.y_pred_prob
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001, batch_size=100): """ This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. :type batch_size: int :param batch_size: minibatch size """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row of which is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the ScA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # Construct a contractive autoencoder that shared weights with this # layer cA_layer = cA(numpy_rng=rng, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], n_batchsize=batch_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.cA_layers.append(cA_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers>0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s=[] L2_sqrs=[] for i in range(self.n_layers): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
print "#############" print "## configs ##" print "#############" print configs print "instantiating and training model" numpy_rng = np.random.RandomState(2355) theano_rng = RandomStreams(2355) models = [] h_in, h_out = zip([trtrainset.shape[1]]+configs['hid'],configs['hid'])[i] print h_in,h_out model = cA.cA(numpy_rng=numpy_rng, theano_rng=theano_rng, numvis=h_in, numhid=h_out, activation=T.tanh, vistype="real", contraction=configs['contract'][i]) sgd.train(trtrainset,trvalidset,model, batch_size=configs['batch_size'], wait_for=20, learning_rate=configs['lr'][i], epochs=training_epochs, epsylon=configs['epsylon'][i], aug=1.01) X = T.matrix() encoding = model.hiddens(X) f = theano.function([X],encoding) trtrainset = np.vstack(sgd.iterate([trtrainset],[f],configs['batch_size'])[0]) trvalidset = np.vstack(sgd.iterate([trvalidset],[f],configs['batch_size'])[0])