def __init__(self, input_dim, nb_experts, output_dim): self.nb_experts = nb_experts self.output_dim = output_dim self.gates = LogisticRegression(input_dim, nb_experts) self.experts = [ LogisticRegression(input_dim, output_dim) for k in range(nb_experts) ]
def __init__(self, rng, input, n_hidden_out, n_out, nkerns, batch_size): self.layer0 = LeNetConvPoolLayer(rng, input=input.reshape( (batch_size, 1, 28, 28)), image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) self.layer1 = LeNetConvPoolLayer(rng, input=self.layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) self.layer2 = HiddenLayer(rng, input=self.layer1.output.flatten(2), n_in=nkerns[1] * 4 * 4, n_out=n_hidden_out, activation=T.tanh) self.logRegressionLayer = LogisticRegression(input=self.layer2.output, n_in=n_hidden_out, n_out=n_out) self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likehood) self.errors = self.logRegressionLayer.errors self.params = self.layer0.params + self.layer1.params + self.layer2.params + self.logRegressionLayer.params self.input = input
def __init__(self, rng, input, n_in, n_hidden, n_out): self.hiddenLayer = HiddenLayer( rng = rng, input = input, n_in = n_in, n_out = n_hidden, activation = T.tanh ) self.logRegressionLayer = LogisticRegression( input = self.hiddenLayer.output, n_in = n_hidden, n_out = n_out ) self.L1 = ( abs(self.hiddenLayer.W).sum()+abs(self.logRegressionLayer.W).sum() ) self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum()+(self.logRegressionLayer.W ** 2).sum() ) self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likehood ) self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.input = input
def get_by_name(name: str, dataset: AbstractDataset) -> nn.Module: name = name.lower() if name == ModelType.LOGISTIC.name.lower(): return LogisticRegression(dataset) elif name == ModelType.MLP.name.lower(): return MLP(dataset) elif name == ModelType.VGG.name.lower(): return Vgg(dataset)
def __init__(self, input, n_in, n_hidden, n_out, n_layers, n_total, batch, mask): # adjust the input input = input.dimshuffle(1, 0, 2) # hidden layers self.params = [] self.hiddenLayers = [] self.velo = [] input_list = [] input_list.append(input) input_list.append(input[::-1]) self.hiddenLayers.append( HiddenLayer(input_list=input_list, n_in=n_in, n_out=n_hidden, BATCH=batch)) self.params.extend(self.hiddenLayers[0].params) self.velo.extend(self.hiddenLayers[0].velo) for i in range(1, n_layers): self.hiddenLayers.append( HiddenLayer(input_list=self.hiddenLayers[i - 1].output_list, n_in=n_hidden, n_out=n_hidden, BATCH=batch)) self.params.extend(self.hiddenLayers[i].params) self.velo.extend(self.hiddenLayers[i].velo) # output layer self.logRegressionLayer = LogisticRegression( input_list=self.hiddenLayers[n_layers - 1].output_list, n_in=n_hidden, n_out=n_out, n_total=n_total, mask=mask, batch=batch) self.params.extend(self.logRegressionLayer.params) self.velo.extend(self.logRegressionLayer.velo) # L1 regularization l1_sum = 0 for layer in self.hiddenLayers: l1_sum += abs(layer.W2).sum() + abs(layer.W1).sum() + abs( layer.U1).sum() + abs(layer.U2).sum() self.L1 = l1_sum + abs(self.logRegressionLayer.W).sum() # L2 squared regularization l2_sum = 0 for layer in self.hiddenLayers: l2_sum += abs(layer.W2**2).sum() + abs(layer.W1**2).sum() + abs( layer.U1**2).sum() + abs(layer.U2**2).sum() self.L2_sqr = l2_sum + (self.logRegressionLayer.W**2).sum() + ( self.logRegressionLayer.M**2).sum() # negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # errors self.errors = self.logRegressionLayer.errors # predict self.y_pred = self.logRegressionLayer.y_pred
def __init__(self, rng, _input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will # translate into a TanhLayer connected to the LogisticRegression # layer; this can be replaced by a SigmoidalLayer, or a layer # implementing any other nonlinearity self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( _input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likehood(self.y) self.errors = self.logLayer.errors(self.y)
def main(): X_train, y_train, X_test, y_test = load_income('./income.csv') lr = LogisticRegression(C=1000, lr_decay='step').fit(X_train, y_train) # lr.score(X_train, y_train) # lr.score(X_test, y_test) y_pred = lr.predict(X_train) print('\n==> train:\n', classification_report(y_train, y_pred)) y_pred = lr.predict(X_test) print('\n==> test:\n', classification_report(y_test, y_pred))
def __init__(self, input, rng, n_in, n_out, n_hidden): self.hidden = HiddenLayer( input=input, rng=rng, n_in=n_in, n_out=n_hidden, ) self.logistic_reg = LogisticRegression(input=self.hidden.output, n_in=n_hidden, n_out=n_out)
def test_lr_newton_method(): X, y = read_data() lr_clf = LogisticRegression(solver="newton_method") lr_clf.fit(X, y) # test intercept intercept = lr_clf.intercept_ assert (abs(intercept - -2.618) < 0.01) # test coefficient coef = lr_clf.coef_ assert (abs(coef[0] - 0.76) < 0.01) assert (abs(coef[1] - 1.17) < 0.01)
def __init__(self,rng,input,n_in,n_h,n_out): self.hidden_layer = HiddenLayer(rng,input=input,n_in=n_in,n_out=n_h) self.output_layer = LogisticRegression(input=self.hidden_layer.output, n_in=n_h,n_out=n_out) #regularization self.L1 = abs(self.hidden_layer.w).sum() + abs(self.output_layer.w).sum() self.L2 = (self.hidden_layer.w**2).sum() + (self.output_layer.w**2).sum() # Negative Log Likelihood self.neg_log_likelihood = (self.output_layer.neg_log_likelihood) # errors function self.errors = (self.output_layer.errors) # params self.params = self.hidden_layer.params + self.output_layer.params self.input = input
def test_lr_stochastic_gradient_descent(): X, y = read_data() lr_clf = LogisticRegression(learning_rate=0.001, max_iter=10000, solver="stochastic_gradient_descent") lr_clf.fit(X, y) # test intercept intercept = lr_clf.intercept_ assert (abs(intercept - -2.618) < 0.01) # test coefficient coef = lr_clf.coef_ assert (abs(coef[0] - 0.76) < 0.01) assert (abs(coef[1] - 1.17) < 0.01)
def __init__(self, input, n_in, n_hidden, n_out, n_layers): # hidden layers self.params = [] self.hiddenLayers = [] self.velo = [] self.hiddenLayers.append( HiddenLayer(input=input, n_in=n_in, n_out=n_hidden, activation=a.relu)) self.params.extend(self.hiddenLayers[0].params) self.velo.extend(self.hiddenLayers[0].velo) for i in range(1, n_layers): self.hiddenLayers.append( HiddenLayer(input=self.hiddenLayers[i - 1].output, n_in=n_hidden, n_out=n_hidden, activation=a.relu)) self.params.extend(self.hiddenLayers[i].params) self.velo.extend(self.hiddenLayers[i].velo) # output layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayers[n_layers - 1].output, n_in=n_hidden, n_out=n_out) self.params.extend(self.logRegressionLayer.params) self.velo.extend(self.logRegressionLayer.velo) # L1 regularization l1_sum = 0 for layer in self.hiddenLayers: l1_sum += abs(layer.W).sum() self.L1 = l1_sum + abs(self.logRegressionLayer.W).sum() # L2 squared regularization l2_sum = 0 for layer in self.hiddenLayers: l2_sum += (layer.W**2).sum() self.L2_sqr = l2_sum + (self.logRegressionLayer.W**2).sum() # negative log likelihood self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # errors self.errors = self.logRegressionLayer.errors # predict self.y_pred = self.logRegressionLayer.y_pred self.output = self.logRegressionLayer.y.T
def __init__(self, input=None, Cparams=None, Mparams=None): c_w0, c_b0, c_w1, c_b1, c_w2, c_b2, c_w3, c_b3 = Cparams m_w1, m_b1, o_w1, o_b1 = Mparams c1_layer0 = LeNetConvPoolLayer(input=input, filter_shape=filter_shape0, image_shape=image_shape0, W=c_w0, b=c_b0, poolsize=poolsize0) c1_layer1 = LeNetConvPoolLayer(input=c1_layer0.output, filter_shape=filter_shape1, image_shape=image_shape1, W=c_w1, b=c_b1, poolsize=poolsize1) c1_layer2 = LeNetConvPoolLayer(input=c1_layer1.output, filter_shape=filter_shape2, image_shape=image_shape2, W=c_w2, b=c_b2, poolsize=poolsize2) c1_layer3 = LeNetConvPoolLayer(input=c1_layer2.output, filter_shape=filter_shape3, image_shape=image_shape3, W=c_w3, b=c_b3, poolsize=poolsize3) m_input = c1_layer3.output m_input = m_input.flatten(2) m_layer1 = HiddenLayer(m_input, W=m_w1, b=m_b1) s_layer = LogisticRegression(m_layer1.output, W=o_w1, b=o_b1) #self.y_pred= s_layer.getlabel() self.y_pred = c1_layer3.output.flatten(1)
def __init__(self, input=None, y=None, Cparams=None, Mparams=None): c_w0, c_b0, c_w1, c_b1, c_w2, c_b2, c_w3, c_b3 = Cparams m_w1, m_b1, o_w1, o_b1 = Mparams c_layer0 = LeNetConvPoolLayer(input=input, filter_shape=filter_shape0, image_shape=image_shape0, W=c_w0, b=c_b0, poolsize=poolsize0) c_layer1 = LeNetConvPoolLayer(input=c_layer0.output, filter_shape=filter_shape1, image_shape=image_shape1, W=c_w1, b=c_b1, poolsize=poolsize1) c_layer2 = LeNetConvPoolLayer(input=c_layer1.output, filter_shape=filter_shape2, image_shape=image_shape2, W=c_w2, b=c_b2, poolsize=poolsize2) c_layer3 = LeNetConvPoolLayer(input=c_layer2.output, filter_shape=filter_shape3, image_shape=image_shape3, W=c_w3, b=c_b3, poolsize=poolsize3) m_input = c_layer3.output m_input = m_input.flatten(2) m_layer1 = HiddenLayer(m_input, W=m_w1, b=m_b1) s_layer = LogisticRegression(m_layer1.output, W=o_w1, b=o_b1) self.cost = s_layer.negative_log_likelihood(y)
from mnist import MNIST mndata = MNIST('./MNIST') trImg, trLab = mndata.load_training() teImg, teLab = mndata.load_testing() trImg = np.asanyarray(trImg) trLab = np.asanyarray(trLab) teImg = np.asanyarray(teImg) teLab = np.asanyarray(teLab) usps = LoadUSPS.LoadUSPS('proj3_images.zip') uspsImg, uspsLab = usps.load() #1> logistic Regression logistic = LogisticRegression(28 * 28, 10) logistic.train(trImg, trLab, lr = 0.3) accuracy = logistic.test(teImg, teLab) uspsacc = logistic.test(uspsImg, uspsLab) print('logisticregression accuracy :', accuracy, uspsacc) #grid search for best learning rate performance #for lr in [0.5, 0.3, 0.1, 0.05, 0.01]: # logistic.train(trImg, trLab, lr = 0.1) # accuracy = logistic.test(teImg, teLab) # print(lr, accuracy) #2> Multilayer perceptron implementation using tensorflow mlp = MLP.MLP()
def test_cnn(trainpath, trainlist, validset, dumppath, learning_rate=0.01, n_epochs=200, batch_size=100, earlystop=True): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(123) # datasets = load_data(dataset) datasets = loadmat(trainpath=trainpath, trainlist=trainlist, validset=validset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # H - height; W - width # when the input is note salience matrix # idim0_H = 42 # idim0_W = 36 # fdim0_H = 6 # fdim0_W = 6 # when the input is chromagram idim0_H = 12 idim0_W = 12 fdim0_H = 2 fdim0_W = 2 pdim0_H = 2 pdim0_W = 2 idim1_H = (idim0_H - fdim0_H + 1) / pdim0_H idim1_W = (idim0_W - fdim0_W + 1) / pdim0_W fdim1_H = 2 fdim1_W = 2 pdim1_H = 2 pdim1_W = 2 idim2_H = (idim1_H - fdim1_H + 1) / pdim1_H idim2_W = (idim1_W - fdim1_W + 1) / pdim1_W fdim2 = 800 nkerns = [20, 20] # the below comments are examples of using this cnn to deal with chromagram with input feature size 144 = 12*12 # Reshape matrix of rasterized images of shape (batch_size, 12 * 12) # to a 4D tensor, compatible with our ConvPoolLayer # (12, 12) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (12-2+1 , 12-2+1) = (11, 11) # maxpooling reduces this further to (11/2, 11/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[0], 5, 5) layer0 = ConvPoolLayer(rng, input=layer0_input, input_shape=(batch_size, 1, idim0_H, idim0_W), filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W), poolsize=(pdim0_H, pdim0_W)) # Construct the second convolutional pooling layer # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4) # maxpooling reduces this further to (4/2, 4/2) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[1], 2, 2) layer1 = ConvPoolLayer(rng, input=layer0.output, input_shape=(batch_size, nkerns[0], idim1_H, idim1_W), filter_shape=(nkerns[1], nkerns[0], fdim1_H, fdim1_W), poolsize=(pdim1_H, pdim1_W)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 2 * 2), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * idim2_H * idim2_W, n_out=fdim2, activation=T.nnet.relu) # classify the values of the fully-connected sigmoidal layer nclass = max(train_set_y.eval()) + 1 layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_score = theano.function( [index], layer3.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.996 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 training_history = [] start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] #training_losses = [train_score(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) #this_training_loss = numpy.mean(training_losses) #training_history.append([iter,this_training_loss,this_validation_loss]) training_history.append([iter, this_validation_loss]) # print('epoch %i, minibatch %i/%i, training error %f %%' % # (epoch, minibatch_index + 1, n_train_batches, # this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) print('iter = %d' % iter) print('patience = %d' % patience) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) numpy.savez(dumppath, model=params, training_history=training_history, best_validation_loss=best_validation_loss) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter print('best_validation_loss %f' % best_validation_loss) if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() # final save numpy.savez(dumppath, model=params, training_history=training_history, best_validation_loss=best_validation_loss) print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ') % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def __init__(self, rng, input, n_in, hidden_layers_sizes, n_out, model=None): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type hidden_layers_sizes: int list :param n_hidden: number of hidden units in each hidden layer :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ self.n_layers = len(hidden_layers_sizes) self.hiddenlayers = [] self.params = [] self.L1 = 0 self.L2_sqr = 0 # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function for i in xrange(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = input else: layer_input = self.hiddenlayers[i - 1].output if model is None: W = None b = None else: W = model[i * 2] b = model[i * 2 + 1] hiddenLayer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], W=W, b=b, activation=T.nnet.sigmoid) self.hiddenlayers.append(hiddenLayer) self.params.extend(hiddenLayer.params) self.L1 += (abs(hiddenLayer.W).sum()) self.L2_sqr += ((hiddenLayer.W**2).sum()) # The logistic regression layer gets as input the hidden units # of the hidden layer if model is None: W = None b = None else: W = model[-2] b = model[-1] self.logRegressionLayer = LogisticRegression( input=self.hiddenlayers[-1].output, n_in=hidden_layers_sizes[-1], W=W, b=b, n_out=n_out) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 += (abs(self.logRegressionLayer.W).sum()) self.L2_sqr += ((self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors self.predprobs = self.logRegressionLayer.p_y_given_x self.preds = self.logRegressionLayer.y_pred # the parameters of the model are the parameters of the two layer it is # made out of self.params.extend(self.logRegressionLayer.params) # end-snippet-3 # keep track of model input self.input = input
features.append((float(col_1), float(col_2), float(col_3), float(col_4), float(col_5), float(col_6))) categories.append(int(col_7)) return features, categories if __name__ == "__main__": train_x, train_y = getInput('./dataForTrainingLogistic.txt') test_x, test_y = getInput('./dataForTestingLogistic.txt') train_x = np.hstack((np.array(train_x), np.ones((len(train_x), 1)))) test_x = np.hstack((np.array(test_x), np.ones(((len(test_x), 1))))) train_y = np.array(train_y).reshape(len(train_y)) test_y = np.array(test_y).reshape(len(test_y)) lr = LogisticRegression(learning_rate=0.00015, initial_w=np.zeros(train_x.shape[1])) # batch gradient descent # history_loss, history_test_loss, history_score,_ = lr.train_gradient_descent( # epoch=150000, epoch_per_round=10000, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) # stochastic gradient descent history_loss, history_test_loss, history_score, _ = lr.train_stochastic_gradient_descent( iteration_num=500000, iter_per_round=100, batch_size=1, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) print('Coefficient:', lr.w) variable_x = range(100, 500001, 100)
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (28, 28) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print( (' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
# Store predictions at the right positions in the result vector. predictions[cat_indices_te] = predictions_cat.reshape( predictions[cat_indices_te].shape) return predictions if __name__ == "__main__": # Import data y_train, x_train, ids_train = helper.load_csv_data('train.csv') y_test, x_test, ids_test = helper.load_csv_data('test.csv') y_train[y_train < 0] = 0 # Define 1 model per category models = [ LogisticRegression(degree=3, gamma=0.1), LogisticRegression(degree=6, gamma=0.1), LogisticRegression(degree=6, gamma=0.1), LogisticRegression(degree=6, gamma=0.1) ] # Train and predict predictions = train_predict_categories(y_train, x_train, x_test, *models) # Prepare for export predictions[predictions == 0] = -1 # Export results helper.create_csv_submission(ids_test, predictions, 'predictions.csv')
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='data/mnist.pkl.gz', batch_size=600): training_set, validation_set, testing_set, = data_loader.load(dataset) training_set_x, training_set_y = training_set validation_set_x, validation_set_y = validation_set testing_set_x, testing_set_y = testing_set # compute number of minibatches for training, validation and testing n_train_batches = training_set_x.get_value( borrow=True).shape[0] / batch_size n_valid_batches = validation_set_x.get_value( borrow=True).shape[0] / batch_size n_test_batches = testing_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = tensor.lscalar() # generate symbolic variables for input (x and y represent a # minibatch) x = tensor.matrix('x') y = tensor.ivector('y') classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: testing_set_x[index * batch_size:(index + 1) * batch_size], y: testing_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: validation_set_x[index * batch_size:(index + 1) * batch_size], y: validation_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = tensor.grad(cost=cost, wrt=classifier.W) g_b = tensor.grad(cost=cost, wrt=classifier.b) # update the parameters of the model updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: training_set_x[index * batch_size:(index + 1) * batch_size], y: training_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = 5 * n_train_batches # requency of training best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iter: number of minibatches used) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # update best_validation_loss best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(classifier, f) if patience <= iter: done_looping = True break epoch = epoch + 1 end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))
X_test = None Y_test = None print("Non 0-1 labels removed from testing dataset!") print("\nTraining SVM on MNIST dataset...") svm = SupportVectorMachine() svm.train(X, Y, 1) print("SVM trained!") print("\nTraining Linear Regression on MNIST dataset...") linear = LinearRegression() linear.train(X, Y) print("Linear regression trained!") print("\nTraining Logistic Regression on MNIST dataset...") logistic = LogisticRegression() logistic.train(X, Y) print("Logistic regression trained!") # Test SVM print("\nRunning SVM on test data...") misclassified = svm.test(X2, Y2) print("Generalization Error:", round(misclassified/Y2.size, 3)) print("Misclassified:", misclassified, "/", Y2.size) print("Accuracy (on test data):", round((1 - (misclassified/Y2.size)) * 100, 3), '%') # Test Linear Regression print("\nRunning Linear Regression on test data...") misclassified = linear.test(X2, Y2) print("Generalization Error:", round(misclassified/Y2.size, 3)) print("Misclassified:", misclassified, "/", Y2.size)
def solve_CNN(datapath, batch=500, n_hidden=5, n_out=10, n_epoch=3, learning_rate=0.54): x = T.dmatrix('x') y = T.ivector('y') index = T.iscalar('index') kernal = (50, 30) cifar_data = upload() train, test = cifar_data print 'data being converted to theano-shared............ ' train_x, train_y = to_shared(train) test_x, test_y = to_shared(test) n_train_batch = train[0].shape[0] // batch n_valid_batch = test[0].shape[0] // batch rng = np.random.RandomState(123) layer0_input = x.reshape((batch, 3, 32, 32)) layer0 = ConvPoolLayer( input=layer0_input, rng=rng, filter_shape=(kernal[0], 3, 5, 5), ) layer1 = ConvPoolLayer(input=layer0.output, rng=rng, filter_shape=(kernal[1], kernal[0], 5, 5)) layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer( input=layer2_input, rng=rng, n_out=n_hidden, n_in=kernal[1] * 5 * 5, ) layer3 = LogisticRegression(input=layer2.output, n_in=n_hidden, n_out=n_out) fun_valid = theano.function( inputs=[index], outputs=layer3.error(y), givens=[(x, test_x[index * batch:(index + 1) * batch, :]), (y, test_y[index * batch:(index + 1) * batch])]) cost = layer3.negative_log_likelihood(y) params = layer0.params + layer1.params + layer2.params + layer3.params grad_all = T.grad(cost=cost, wrt=params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grad_all)] fun_train = theano.function( inputs=[index], outputs=[], updates=updates, givens=[(x, train_x[index * batch:(index + 1) * batch, :]), (y, train_y[index * batch:(index + 1) * batch])]) ################ #TRAINING MODEL# ################.......................................... print 'training starts now -->' patience = 5000 patience_increase = 2 improvement = 0.995 validation_frequency = min(n_train_batch, patience // 2) least_error = np.Inf epoch = 0 done_looping = False this_error = 0 start_time = timeit.default_timer() print 'EPOCH counting .....' while epoch < n_epoch and (not done_looping): for current_batch in range(n_train_batch): total_batches = epoch * n_train_batch + current_batch fun_train(current_batch) if (total_batches + 1) % validation_frequency == 0: this_error = [fun_valid(n) for n in range(n_valid_batch)] this_error = np.mean(this_error) print this_error if this_error < least_error * improvement: least_error = this_error patience = max(patience, total_batches * patience_increase) #with open('/home/sameer/best_model_neural_filters.pkl', 'wb') as f: # pickle.dump(layer0.params, f) # f.close() if total_batches > patience: done_looping = True epoch += 1 if total_batches != 0: #print 'the convergence ratio is %f' %(patience/float(total_batches)) print this_error print epoch save[epoch] = this_error print 'the error is %f' % least_error print 'the total number of epoch %d' % epoch end_time = timeit.default_timer() t = end_time - start_time print 'total time = %f sec' % t print 'time per epoch = %f sec/epoch' % (t / epoch)
import numpy as np import matplotlib.pyplot as plt from logistic import LogisticRegression # read data X = np.loadtxt('logistic_x.txt') y = np.loadtxt('logistic_y.txt') # build model lr = LogisticRegression() lr.fit(X, y) y_ = lr.predict(X) # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 h = 0.1 # step_size xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) data = np.vstack((xx.ravel(), yy.ravel())).T labels = lr.predict(data) # plot fig, ax = plt.subplots() ax.scatter(data[:, 0], data[:, 1], c=np.where(labels == 1, 'green', 'red'), alpha=0.01) plt.title('Decision Boundary of Logistic Regression') ax.scatter(X[y == 1, 0], X[y == 1, 1], c='green',
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, nkerns=[20, 50]): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels batch_size = 500 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. rng = numpy.random.RandomState(23455) self.layer0_input = self.x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) self.layer0 = LeNetConvPoolLayer(rng, input=self.layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.layer1 = LeNetConvPoolLayer(rng, input=self.layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) self.layer2_input = self.layer1.output.flatten(2) self.layer2 = HiddenLayer(rng, input=self.layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = 500 else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.layer2.output else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, L1_reg=0, L2_reg=0, first_layer='grbm', model=None): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.L1 = 0 self.L2_sqr = 0 assert self.n_layers > 0 if not theano_rng: theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].output if model is None: W = None b = None else: W = model[i * 2] b = model[i * 2 + 1] sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], W=W, b=b, activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.L1 += (abs(sigmoid_layer.W).sum()) self.L2_sqr += ((sigmoid_layer.W**2).sum()) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer if i == 0: # first layer GBRBM - dealing with continous value if first_layer == 'grbm': rbm_layer = GRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) if first_layer == 'rbm': rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) # elif i == self.n_layers-1: # last layer GGRBM # rbm_layer = GRBM(numpy_rng=numpy_rng, # theano_rng=theano_rng, # input=layer_input, # n_visible=input_size, # n_hidden=hidden_layers_sizes[i], # W=sigmoid_layer.W, # hbias=sigmoid_layer.b) else: # subsequence layers BBRBM - binary RBM to cope with regularization rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP if model is None: W = None b = None else: W = model[-2] b = model[-1] self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], W=W, b=b, n_out=n_outs) self.params.extend(self.logLayer.params) self.L1 += (abs(self.logLayer.W).sum()) self.L2_sqr += ((self.logLayer.W**2).sum()) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = (self.logLayer.negative_log_likelihood(self.y) + +L1_reg * self.L1 + L2_reg * self.L2_sqr) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) self.predprobs = self.logLayer.p_y_given_x self.preds = self.logLayer.y_pred
def main(): bodies, stances, index, body_IDs, stance_IDs, labels = generateSentences( 'train_bodies.csv', 'train_stances.csv') encoder = LabelEncoder() encoder.fit(label_headers) encoded_labels = encoder.transform(labels) combined = matchStance(bodies, stances, body_IDs, stance_IDs) sorted_bodies = linkBodies(body_IDs, stance_IDs, bodies) training_bodies = sorted_bodies[0:index + 1] training_stances = stances[0:index + 1] training_labels = encoded_labels[0:index + 1] cv, tfidf = vectorise(training_bodies, training_stances, training_labels) # b_cv = cv.transform(training_bodies) # s_cv = cv.transform(training_stances) # b_tf = tfidf.transform(b_cv) # s_tf = tfidf.transform(s_cv) # cosineSim(training_bodies,training_stances,cv,training_labels,'plots/CS-vect.png') # cosineSim(b_tf,s_tf,tfidf,training_labels,'plots/CS-tfidf.png') # # kldivergence(b_cv.toarray(),s_cv.toarray(),training_labels,'plots/KL-vect.png') # kldivergence(b_tf.toarray(),s_tf.toarray(),training_labels,'plots/KL-tfidf.png') # valid_bodies = sorted_bodies[index+1:len(sorted_bodies)] # valid_stances = stances[index+1:len(stances)] valid_labels = encoded_labels[index + 1:len(encoded_labels)] valid_b_cv = cv.transform(sorted_bodies) valid_s_cv = cv.transform(stances) valid_b_tf = list(tfidf.transform(valid_b_cv).toarray()) valid_s_tf = list(tfidf.transform(valid_s_cv).toarray()) dists = calcDistances(valid_b_tf, valid_s_tf) distanceShow(dists[0:index + 1], training_labels) linear_dists = [ dists[i][len(dists[i]) - 5:len(dists[i])] for i in range(0, len(dists)) ] test_b, test_s, test_index, test_b_ids, test_s_ids, test_labels = generateSentences( 'competition_test_bodies.csv', 'competition_test_stances.csv') encoded_test = encoder.transform(test_labels) test_sorted_b = linkBodies(test_b_ids, test_s_ids, test_b) test_b_tf = list(tfidf.transform(cv.transform(test_sorted_b)).toarray()) test_s_tf = list(tfidf.transform(cv.transform(test_s)).toarray()) test_dists = calcDistances(test_b_tf, test_s_tf) test_linear_dists = [ test_dists[i][len(test_dists[i]) - 5:len(test_dists[i])] for i in range(0, len(test_dists)) ] lrs = [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] for i in range(0, len(lrs)): logistic = LogisticRegression(lr=lrs[i], steps=10000) logistic.fit(input=dists[0:index + 1], labels=training_labels) y_pred = logistic.predict(test_dists) linear = LinearRegression(lr=lrs[i], steps=50) linear.fit(input=linear_dists[0:index + 1], labels=training_labels) y_pred2 = linear.predict(test_linear_dists) print "Logistic Classification, LR: {}".format(lrs[i]) print( classification_report(y_true=list(encoded_test), y_pred=list(y_pred))) print(matthews_corrcoef(encoded_test, y_pred)) print "Linear Classification, LR: {}".format(lrs[i]) print( classification_report(y_true=list(encoded_test), y_pred=list(y_pred2))) print(matthews_corrcoef(encoded_test, y_pred2))