def test(): train = read_data('./data/data2.csv') test_x = read_data('./data/test2.csv') # Generates training set and test set. train_x = train[:, 1 : :] train_y = train[:, 0] train_x = map_feature(train_x) test_x = map_feature(test_x) # Feature scaling. train_x, mu, sigma = scale_data(train_x) test_x = (test_x - mu) / sigma clf = LogisticRegression(train_x, train_y, 0.1) clf.learn() output = clf.predict(test_x) # Write Results to fie seedling=open("./data/logistic_regression.csv", "wb") lr_csv = csv.writer(seedling) lr_csv.writerow(['PassengerId','Survived']) for i in range(len(output)): row = [str(i+892), output[i].astype(uint8)] lr_csv.writerow(row) seedling.close()
def runML(meth, itrs, data_train, data_test, labels_train, labels_test): print meth,datetime.now().time() model = LogisticRegression(method=meth,max_iters=itrs) model.fit(data_train, labels_train) print datetime.now().time() prediction = model.predict(data_test) tagscores = LogisticRegression.tagAccuracy(labels_test, prediction) score = np.mean(tagscores) print " score tags: mean: {}, max: {}, min: {}".format(score,max(tagscores),min(tagscores)) print " error rate: {}".format(1 - score) print datetime.now().time()
def standard_lr(x_train, y_train, x_valid, y_valid): from sklearn.linear_model import LogisticRegression lr = LogisticRegression(penalty='l2', max_iter=500, solver='sag', multi_class='ovr') lr.fit(x_train, y_train) pre = lr.predict(x_valid) correct = 0 for i in range(len(y_valid)): if pre[i] == y_valid[i]: correct += 1 print correct*1.0/len(y_valid)
def __init__(self): LogisticRegression.__init__(self) # 各特徴が数値データなら0,カテゴリカルデータなら1を要素に持つ配列 self.x_types = None # 数値データ,カテゴリカルデータそれぞれの中での番号を要素に持つ配列 self.x_types_index = None self.band_width_vector = None self.numerical_index = None self.categorical_index = None self.feature_vectors_for_numeric = None self.feature_vectors_for_category = None self.max_values = None self.min_values = None self.num_of_bins = 0 self.bin_length = None
def test_lr(x_train, y_train, x_valid, y_valid): a = np.array([1.0 for i in range(len(x_train))]) x_train = np.column_stack((x_train, a)) lr = LogisticRegression(alpha=0.01, regularization='', num_iters=3000) theta, cost = lr.train(x_train, y_train, verbose=True, optimizer="sgd") a = np.array([1.0 for i in range(len(x_valid))]) x_valid = np.column_stack((x_valid, a)) correct = 0 for i in range(len(x_valid)): label = lr.classify(x_valid[i], theta) if label == y_valid[i]: correct += 1 print "accuracy:", correct*1.0/len(x_valid)
def __init__(self): import theano import util from theano import tensor as T from logistic_regression import LogisticRegression self.index = T.iscalar('index') self.BATCH_SIZE = 100 self.LEARNING_RATE = 0.12 self.dataSets = util.loadMnistData("mnist.pkl.gz") self.x = T.dmatrix('x') self.y = T.ivector('y') self.index = T.iscalar('index') self.classifier = LogisticRegression(input=self.x, nIn=28 * 28, nOut=10) self.cost = self.classifier.negativeLogLikelihood(self.y) self.gW = T.grad(cost=self.cost, wrt=self.classifier.W) self.gB = T.grad(cost=self.cost, wrt=self.classifier.b) self.trainSet, self.validSet, self.testSet = self.dataSets self.nTrainSet, self.nValidSet, self.nTestSet = map(self.numBatches, self.dataSets) updates = [ (self.classifier.W, self.classifier.W - self.LEARNING_RATE * self.gW), (self.classifier.b, self.classifier.b - self.LEARNING_RATE * self.gB) ] def makeGivens(data): return { self.x: data[0][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE], self.y: data[1][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE] } self.testModel = theano.function( inputs=[self.index], outputs=self.classifier.errors(self.y), givens=makeGivens(self.dataSets[2]) ) self.validationModel = theano.function( inputs=[self.index], outputs=self.classifier.errors(self.y), givens=makeGivens(self.dataSets[1]) ) self.trainModel = theano.function( inputs=[self.index], outputs=self.cost, updates=updates, givens=makeGivens(self.dataSets[0]) )
def sgd(mus, rates, decays, data, labels, data_train, labels_train, data_valid, labels_valid, data_test, labels_test): print "starting grid search for SGD" validation_results = {} dicts = [] for mu in mus: for rate in rates: for decay in decays: print "trying mu={} rate={} decay={}".format(mu, rate, decay) model = LogisticRegression(method="sgd", mu=mu, rate=rate, decay=decay, random_state=0) model.fit(data_train, labels_train) prediction = model.predict(data_valid) score = accuracy_score(labels_valid, prediction) validation_results[(mu, rate, decay)] = score print " score: {}".format(score) print " error rate: {}".format(1 - score) d = dict(method="sgd", mu=mu, rate=rate, decay=decay, score=score, lcl=model.lcl_, rlcl=model.rlcl_, test=False) dicts.append(d) print "evaluating on test set" # get hyperparameters for highest accuracy on validation set mu, rate, decay = max(validation_results, key=validation_results.get) print "Using mu={} rate={} decay={}".format(mu, rate, decay) # train on entire train set and predict on test set model = LogisticRegression(method="sgd", mu=mu, rate=rate, decay=decay, random_state=0) model.fit(data, labels) prediction = model.predict(data_test) score = accuracy_score(labels_test, prediction) print "SGD test score: {}, error rate: {}".format(score, 1 - score) d = dict(method="sgd", mu=mu, rate=rate, decay=decay, score=score, lcl=model.lcl_, rlcl=model.rlcl_, test=True) dicts.append(d) return pd.DataFrame(dicts)
def cross_valid(): x = read_data() # Generates training set and cross validation set. y = x[:, 0] x = x[:, 1 : :] x = map_feature(x) num = int(x.shape[0] * .7) x_cv = x[num : :, :] y_cv = y[num : :] x = x[0 : num, :] y = y[0 : num] # Feature scaling. x, mu, sigma = scale_data(x) x_cv = (x_cv - mu) / sigma # Use cross validation set to find the best lambda for regularization. C_candidates = [0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] lambda_ = 0 best_accuracy = 0 for C in C_candidates: clf = LogisticRegression(x, y, C) clf.learn() p_cv = clf.predict(x_cv) accuracy = (p_cv == y_cv).mean() if accuracy > best_accuracy: best_accuracy = accuracy lambda_ = C print 'Best regularization parameter lambda: %f' % lambda_ clf = LogisticRegression(x, y, lambda_) clf.learn() p = clf.predict(x) p_cv = clf.predict(x_cv) print 'Accuracy in training set: %f'% (p == y).mean() print 'Accuracy in cv: %f' % (p_cv == y_cv).mean()
def lbfgs(mus, data, labels, data_train, labels_train, data_valid, labels_valid, data_test, labels_test): print "starting grid search for L-BFGS" validation_results = {} dicts = [] for mu in mus: print "trying mu={}".format(mu) model = LogisticRegression(method="lbfgs", mu=mu) model.fit(data_train, labels_train) prediction = model.predict(data_valid) score = accuracy_score(labels_valid, prediction) validation_results[mu] = score print " score: {}".format(score) print " error rate: {}".format(1 - score) d = dict(method="lbfgs", mu=mu, rate=-1, decay=-1, score=score, lcl=model.lcl_, rlcl=model.rlcl_, test=False) dicts.append(d) print "evaluating on test set" # get hyperparameters for highest accuracy on validation set mu = max(validation_results, key=validation_results.get) print "Using mu of {}".format(mu) # train on entire train set and predict on test set model = LogisticRegression(method="lbfgs", mu=mu) model.fit(data, labels) prediction = model.predict(data_test) score = accuracy_score(labels_test, prediction) print "L-BFGS test score: {}, error rate: {}".format(score, 1 - score) d = dict(method="lbfgs", mu=mu, rate=-1, decay=-1, score=score, lcl=model.lcl_, rlcl=model.rlcl_, test=True) dicts.append(d) return pd.DataFrame(dicts)
def __init__(self, numpyRng, theanoRng=None, nIn=28*28, hiddenLayerSizes=[500,500], nOut=10): self.nLayers = len(hiddenLayerSizes) if not theanoRng: theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') def makeSigmoidLayer(lastLayer,lastLayerSize,size): return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid) def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer): return DenoisingAutoEncoder( numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer, nVisible=lastLayerSize, nHidden=size, W=sigmoidLayer.W, bHidden=sigmoidLayer.b) def makeLayers(lastLayer,lastInputSize,nextLayerSizes): if nextLayerSizes: newList = list(nextLayerSizes) size = newList.pop() sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size) daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer) yield (sigmoidLayer,daLayer) for layer in makeLayers(sigmoidLayer.output,size,newList): yield layer self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes))) print "created sda with layer shapes below." for da in self.dALayers: print "layersize:", da.W.get_value().shape self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut) self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)] self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh ) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out ) # Enforce L1 norm to be small self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum() ) # Enforce square of L2 norm to be small self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum() ) # negative log likelihood of MLP is negative log likelihood of model # which is NLL of LR layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood ) self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.input = input
X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) print(X_train.shape) print(X_train[0]) print(y_train.shape) print(y_train[0]) from logistic_regression import LogisticRegression regressor = LogisticRegression(lr=0.0001, n_iters=1000) regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy print("LR classification accuracy:", accuracy(y_test, predicted)) cmap = ListedColormap(["#FF0000", "#00FF00"]) fig = plt.figure(figsize=(8, 6)) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, edgecolors="k", s=20)
import numpy as np import sys sys.path.append('../../../') from logistic_regression import LogisticRegression from model import Data from utilities import * train_data = np.load('data/train_data.npy') test_data = np.load('data/test_data.npy') train_inputs, train_target = Data.normalize( train_data[:, :-1]), train_data[:, -1:].astype(int).flatten() test_inputs, test_target = Data.normalize( test_data[:, :-1]), test_data[:, -1:].astype(int).flatten() model = LogisticRegression(input_dim=7, num_classes=3, batch_size=8, epochs=50, learning_rate=1e-3) model.train(train_inputs, train_target) print('After training the model accuracy is about ', accuracy(model.predict(test_inputs), test_target)) confusion_plot(model, test_inputs, test_target, outfile='plots/confusion_matrix')
def evaluate_convnet(learning_rate=0.1, n_epochs=1, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = ConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4) layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, # This is the negative-log-likelihood of the Logisitc Regression layer updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
audio_features = ['danceability', 'energy', 'speechiness', 'acousticness','instrumentalness', 'valence'] all_playlists = data_trans[audio_features].describe().T rap = data_trans.loc[data_trans['playlist_id'] == 0][audio_features].describe().T rap.rename(columns={'mean':'mean_rap'}, inplace=True) jazz = data_trans.loc[data_trans['playlist_id'] == 1][audio_features].describe().T jazz.rename(columns={'mean':'mean_jazz'}, inplace=True) df1 = rap['mean_rap'] df2 = jazz['mean_jazz'] df3 = all_playlists['mean'] r = pd.concat([df1, df2, df3], axis=1) r.plot(kind='bar', figsize=(8,5), title='Audio feature average value per playlist', colormap='viridis', rot=20); features = data_trans.loc[:, 'danceability':'valence'].values targets = data_trans.loc[:, 'playlist_id'].values x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.30, random_state=100) lr = LogisticRegression(iterations=15000, learning_rate=0.10) pred_y = lr.fit(x_train, y_train).predict(x_test) accuracy_score(pred_y, y_test) confusion_matrix(y_test, pred_y) gnb = GaussianNaiveBayes() pred_y = gnb.fit(x_train, y_train).predict(x_test) accuracy_score(y_test, pred_y) confusion_matrix(y_test, pred_y)
def optimize_cnn_lenet(learning_rate=0.01, n_epochs=200, dataset='data/mnist.pkl.gz', batch_size=500, n_hidden=500, nkerns=[20, 50], rng=np.random.RandomState(23455)): print '... load training set' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindex index = T.lscalar() # dataシンボル x = T.matrix('x') # labelシンボル y = T.ivector('y') print '... building the model' # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする # 追加した1はチャンネル数 # ここではグレイスケール画像なのでチャンネル数は1 layer0_input = x.reshape((batch_size, 1, 28, 28)) # filterのnkerns[0]は20 layer0 = ConvLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5)) layer1 = PoolLayer(layer0.output, poolsize=(2, 2)) # filterのnkerns[1]は50 layer2 = ConvLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5)) layer3 = PoolLayer(layer2.output, poolsize=(2, 2)) # layer2_input # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする layer4_input = layer3.output.flatten(2) # 500ユニットの隠れレイヤー # layer2_inputで作成した入力ベクトルのサイズ=n_in layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[1]*4*4, n_out=n_hidden, activation=T.tanh) # 出力は500ユニット layer5 = LogisticRegression(input=layer4.output, n_in=n_hidden, n_out=10) # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる) cost = layer5.negative_log_likelihood(y) # testモデル # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する test_model = theano.function([index], layer5.errors(y), givens={x:test_set_x[index*batch_size : (index + 1)*batch_size], y: test_set_y[index*batch_size : (index + 1)*batch_size]}) # validationモデル validate_model = theano.function([index], layer5.errors(y), givens={x:valid_set_x[index*batch_size : (index + 1)*batch_size], y: valid_set_y[index*batch_size : (index + 1)*batch_size]}) # 微分用のパラメータ(pooling層にはパラメータがない) params = layer5.params + layer4.params + layer2.params + layer0.params # コスト関数パラメータについてのの微分 grads = T.grad(cost, params) # パラメータの更新 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] # trainモデル train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index*batch_size : (index + 1)*batch_size], y:train_set_y[index*batch_size : (index+1)*batch_size]}) # optimize print "train model ..." patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience/2) best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False fp1 = open('log/lenet_validation_error.txt', 'w') fp2 = open('log/lenet_test_error.txt', 'w') while(epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.)) fp1.write("%d\t%f\n" % (epoch, this_validation_loss*100)) if this_validation_loss < best_validation_loss: if(this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter*patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = [test_model(i) for i in xrange(n_test_batches)] ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.)) fp2.write("%d\t%f\n" % (epoch, test_score*100)) if patience <= iter: done_looping = True break fp1.close() fp2.close() end_time = timeit.default_timer() print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.)) import cPickle cPickle.dump(layer0, open("model/cnn_layer0.pkl", "wb")) cPickle.dump(layer2, open("model/cnn_layer2.pkl", "wb")) cPickle.dump(layer4, open("model/cnn_layer4.pkl", "wb")) cPickle.dump(layer5, open("model/cnn_layer5.pkl", "wb"))
def test_regression_model_mnist(dataset_name='mnist.pkl.gz', learning_rate=0.13, n_epochs=1000, batch_size=600): # Set up the dataset dataset = load_data(dataset_name) # Split the data into a training, validation and test set train_data, train_labels = dataset[0] test_data, test_labels = dataset[1] validation_data, validation_labels = dataset[2] # Compute number of minibatches for each set n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size data_dim = (28, 28) # The dimension of each image in the dataset data_classes = 10 # The number of classes within the data # Build the model # --------------- # Allocate symbolic variables for data index = T.lscalar() # This is the index to a minibatch x = T.matrix('x') # Data (rasterized images) y = T.ivector('y') # Labels (1d vector of ints) # Construct logistic regression class classifier = LogisticRegression(input=x, n_in=data_dim[0]*data_dim[1], n_out=data_classes) # Cost to minimize during training cost = classifier.negative_log_likelihood(y) # Compile a Theano function that computes mistakes made by the model on a minibatch test_model = th.function(inputs=[index], # This function is for the test data outputs=classifier.errors(y), givens={x: test_data[index * batch_size: (index + 1) * batch_size], y: test_labels[index * batch_size: (index + 1) * batch_size]}) validate_model = th.function(inputs=[index], # This function is for the validation data outputs=classifier.errors(y), givens={x: validation_data[index * batch_size: (index + 1) * batch_size], y: validation_labels[index * batch_size: (index + 1) * batch_size]}) # Compute the gradient of cost with respect to theta = (W,b) grad_W = T.grad(cost=cost, wrt=classifier.W) grad_b = T.grad(cost=cost, wrt=classifier.b) # Specify how to update model parameters as a list of (variable, update expression) pairs updates = [(classifier.W, classifier.W - learning_rate * grad_W), (classifier.b, classifier.b - learning_rate * grad_b)] # Compile Theano function that returns the cost and updates parameters of model based on update rules train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y outputs=cost, # Cost/loss associated with x,y updates=updates, givens={x: train_data[index * batch_size: (index + 1) * batch_size], y: train_labels[index * batch_size: (index + 1) * batch_size]}) # Train the model # --------------- # Setup the early-stopping parameters patience = 5000 # Minimum number of examples to examine patience_increase = 2 # How much longer to wait once a new best is found improvement_threshold = 0.995 # Value of a significant relative improvement validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating best_validation_loss = np.inf test_score = 0 start_time = time.clock() # Setup the training loop done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Set the iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # Compute the zero-one loss on the validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation score is the best if this_validation_loss < best_validation_loss: # Improve the patience is loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Test on test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # Stop the loop if we have exhausted our patience if patience <= iter: done_looping = True break; # The loop has ended so record the time it took end_time = time.clock() # Print out results and timing information print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
# .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescale label for Adaboost to {-1, 1} rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = DecisionTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel) # ........ # TRAIN # ........ print "Training:" print "\tAdaboost" adaboost.fit(X_train, rescaled_y_train) print "\tNaive Bayes" naive_bayes.fit(X_train, y_train) print "\tLogistic Regression"
def __init__(self, datasets, batch_size=500, nkerns=[20, 50], img_size=(28, 28), learning_rate=0.1): train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] self.batch_size = batch_size # compute number of minibatches for training, validation and testing self.n_train_batches = train_set_x.get_value(borrow=True).shape[0] self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] self.n_test_batches = test_set_x.get_value(borrow=True).shape[0] self.n_train_batches /= batch_size self.n_valid_batches /= batch_size self.n_test_batches /= batch_size # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') self.y = T.ivector('y') rng = np.random.RandomState(23455) layer0_input = self.x.reshape((batch_size, 1, img_size[0], img_size[1])) # Create the two convolutional layers that also perform downsampling using maxpooling self.layer0 = ConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_size[0], img_size[1]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2,2)) self.layer1 = ConvPoolLayer(rng, input=self.layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2,2)) layer2_input = self.layer1.output.flatten(2) # Create the hidden layer of the MLP self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # Create the logistic regression layer for classifiying the results self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=10) self.cost = self.layer3.negative_log_likelihood(self.y) self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params self.grads = T.grad(self.cost, self.params) # Update list for the paramters to be used when training the model updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(self.params, self.grads)] # This function updates the model parameters using Stochastic Gradient Descent self.train_model = th.function([self.index], self.cost, # This is the negative-log-likelihood of the Logistic Regression layer updates=updates, givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]}) # These are Theano functions for testing performance on our test and validation datasets self.test_model = th.function([self.index], self.layer3.errors(self.y), givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]}) self.validate_model = th.function([self.index], self.layer3.errors(self.y), givens={self.x: valid_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: valid_set_y[self.index * batch_size: (self.index + 1) * batch_size]})
class CNN(object): ''' Convolutional Neural Network with 2 convolutional pooling layers The default parameters are for the MNIST dataset NOTE: Dataset is required to be 28x28 images with three sub data sets ''' def __init__(self, datasets, batch_size=500, nkerns=[20, 50], img_size=(28, 28), learning_rate=0.1): train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] self.batch_size = batch_size # compute number of minibatches for training, validation and testing self.n_train_batches = train_set_x.get_value(borrow=True).shape[0] self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] self.n_test_batches = test_set_x.get_value(borrow=True).shape[0] self.n_train_batches /= batch_size self.n_valid_batches /= batch_size self.n_test_batches /= batch_size # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') self.y = T.ivector('y') rng = np.random.RandomState(23455) layer0_input = self.x.reshape((batch_size, 1, img_size[0], img_size[1])) # Create the two convolutional layers that also perform downsampling using maxpooling self.layer0 = ConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_size[0], img_size[1]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2,2)) self.layer1 = ConvPoolLayer(rng, input=self.layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2,2)) layer2_input = self.layer1.output.flatten(2) # Create the hidden layer of the MLP self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # Create the logistic regression layer for classifiying the results self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=10) self.cost = self.layer3.negative_log_likelihood(self.y) self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params self.grads = T.grad(self.cost, self.params) # Update list for the paramters to be used when training the model updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(self.params, self.grads)] # This function updates the model parameters using Stochastic Gradient Descent self.train_model = th.function([self.index], self.cost, # This is the negative-log-likelihood of the Logistic Regression layer updates=updates, givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]}) # These are Theano functions for testing performance on our test and validation datasets self.test_model = th.function([self.index], self.layer3.errors(self.y), givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]}) self.validate_model = th.function([self.index], self.layer3.errors(self.y), givens={self.x: valid_set_x[self.index * batch_size: (self.index + 1) * batch_size], self.y: valid_set_y[self.index * batch_size: (self.index + 1) * batch_size]}) def train(self, n_epochs, patience=10000, patience_increase=2, improvement_threshold=0.995): ''' Train the CNN on the training data for a defined number of epochs ''' # Setup the variables for training the model n_train_batches = self.n_train_batches n_valid_batches = self.n_valid_batches n_test_batches = self.n_test_batches validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 best_score = 0. epoch = 0 done_looping = False # Train the CNN for a defined number of epochs while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index # Every 100 iterations if iter % 100 == 0: print 'Training iteration ', iter cost_ij = self.train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # Compute zero-one loss on validation set validation_losses = [self.validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation loss is best so far if this_validation_loss < best_validation_loss: # Improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # Save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break print 'Optimization complete.' print('Best validation score of %f %% obtained at iteration %i' % (best_validation_loss * 100., best_iter + 1)) def test(self, set_x, set_y): ''' Test data sets and return the test score ''' # allocate symbolic variables for the data n_test_batches = set_x.get_value(borrow=True).shape[0] n_test_batches /= self.batch_size test_model = th.function(inputs=[self.index], outputs=self.layer3.errors(self.y), givens={self.x: set_x[self.index * self.batch_size: (self.index + 1) * self.batch_size], self.y: set_y[self.index * self.batch_size: (self.index + 1) * self.batch_size]}) test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) return test_score def classify(self, set): ''' Return the labels for the given set NOTE: The batch size must be the same as the training set ''' n_test_batches = set.get_value(borrow=True).shape[0] n_test_batches /= self.batch_size classify_data = th.function(inputs=[self.index], # Input to this function is a mini-batch at index outputs=self.layer3.y_pred, # Output the y_predictions givens={self.x: set[self.index * batch_size: (self.index + 1) * batch_size]}) # Generate labels for the given data labels = [classify_data(i) for i in xrange(n_test_batches)] return np.array(labels)
from sklearn.cross_validation import train_test_split # Read the training data f = open("../data/train.csv") reader = csv.reader(f) next(reader, None) # skip header data = [data for data in reader] f.close() X = np.asarray([x[1:] for x in data], dtype=np.int16) y = np.asarray([x[0] for x in data], dtype=np.int16) X = np.true_divide(X, 255) # normalize image data to 0-1 del data # free up the memory print("loaded training data") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RandomState()) lr = LogisticRegression(C=0.35) lr.fit(X_train, y_train, 10) guesses = lr.predict(X_test) score = 0.0 for g in range(guesses.shape[0]): if guesses[g] == y_test[g]: score += 1 print("Score: ", score / len(guesses))
from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score if __name__ == '__main__': raw_data = pd.read_csv('../data/train_binary.csv', header=0) data = raw_data.values imgs = data[0::, 1::] labels = data[::, 0] test_time = 10 p = Perceptron() lr = LogisticRegression() writer = csv.writer(file('result.csv', 'wb')) for time in xrange(test_time): print 'iterater time %d' % time train_features, test_features, train_labels, test_labels = train_test_split( imgs, labels, test_size=0.33, random_state=23323) p.train(train_features, train_labels) lr.train(train_features, train_labels) p_predict = p.predict(test_features) lr_predict = lr.predict(test_features)
def __init__(self, random_generator, theano_random_generator=None, x_dim=28 * 28, y_dim=10, hidden_layer_sizes=[500, 500], corruption_levels=[0.1, 0.1]): """ """ # Declare empty sigmoid layer array for MLP self.sigmoid_layers = [] # Declare an empty array of DenoisingAutoEncoder self.autoencoder_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) if theano_random_generator == None: self.theano_random_generator = RandomStreams( random_generator.randint(2**30)) else: self.theano_random_generator = theano_random_generator # Inputs using Theano self.x = T.matrix("x") self.y = T.ivector("y") # Initialize all parameters for i in range(self.n_layers): # Define x and y dimensions if i == 0: internal_x_dim = x_dim else: internal_x_dim = hidden_layer_sizes[i - 1] internal_y_dim = hidden_layer_sizes[i] # Find inputs if i == 0: internal_input = self.x else: internal_input = self.sigmoid_layers[i - 1].output # Define Sigmoid Layer self.sigmoid_layers.append( HiddenLayer(internal_input, internal_x_dim, internal_y_dim, random_generator, activation=T.nnet.sigmoid)) # Define input self.autoencoder_layers.append( DenoisingAutoEncoder(random_generator, theano_random_generator, internal_x_dim, internal_y_dim, internal_input, W=self.sigmoid_layers[i].W, b=self.sigmoid_layers[i].b)) # Uppdate parameters self.params.extend(self.sigmoid_layers[i].params) # Finally add logistic layer self.logistic_layer = LogisticRegression( self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim) self.params.extend(self.logistic_layer.params) # These are two important costs # Finetuning after pretraining individual AutoEncoders self.finetune_cost = self.logistic_layer.negative_log_likelihood( self.y) # Error from prediction self.error = self.logistic_layer.error(self.y)
''' # initialise the model solver = 'svrg-sgd' batchsize = 128 n_svrg_updates = 128 if solver == 'svrg-sgd' or solver == 'svrg-adagrad' or solver == 'svrg-rmsprop' else 1 n_epochs = 20 n_updates = int(np.ceil(n_epochs * n_train / batchsize / n_svrg_updates)) eval_freq = n_svrg_updates learning_rate = 2**(-5) # train the model print( 'Train Logistic Regression using %s with the optimal learning rate of %f.' % (solver, learning_rate)) model = LogisticRegression(np.random.normal(0, 1, X_train.shape[1]), solver=solver, batchsize=batchsize) _, eval_log = model.fit(X_train, y_train, n_updates=n_updates, learning_rate=learning_rate, n_svrg_updates=n_svrg_updates, eval_freq=eval_freq, eval_fn=partial(model.predict, X_test, y_test, False, False), debug=False) eval_log_filename = './results/lr_eval_%s_lr%f' % (dataset, learning_rate) # save results to files np.save(eval_log_filename, eval_log)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, input, n_in, n_hidden, n_out, \ W_hid=None, b_hid=None, W_out=None, b_out=None): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ if rng is None: rng = numpy.random.RandomState() # Since we are dealing with a one hidden layer MLP, this will # translate into a TanhLayer connected to the LogisticRegression # layer; this can be replaced by a SigmoidalLayer, or a layer # implementing any other nonlinearity self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, W_values=W_hid, b_values=b_hid, activation=theano.tensor.nnet.sigmoid) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( rng=rng, input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out, W_values=None, b_values=None) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() #+ abs(self.hiddenLayer2.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W**2).sum() \ + (self.logRegressionLayer.W**2).sum() #+ (self.hiddenLayer2.W**2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # takes logs of the last softmax layer self.log_posteriors = self.logRegressionLayer.log_posteriors # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # returns the labels and predictions self.log_error_results = self.logRegressionLayer.log_error_results self.cost = self.negative_log_likelihood self.sum = self.logRegressionLayer.negative_log_likelihood_sum #self.delta_params = self.hiddenLayer.delta_params + self.logRegressionLayer.delta_params #self.params = self.hiddenLayer1.params + self.logRegressionLayer.params + self.hiddenLayer2.params self.params = self.logRegressionLayer.params + self.hiddenLayer.params self.delta_params = self.logRegressionLayer.delta_params + self.hiddenLayer.delta_params
def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, hidden_layers_sizes=[1024, 1024], n_phn=62 * 3, n_spkr=1, rho=0.90, eps=1.E-6): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) #self._rho = shared(numpy.cast['float32'](rho), name='rho') # for adadelta #self._eps = shared(numpy.cast['float32'](eps), name='eps') # for adadelta self._rho = rho self._eps = eps self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.fmatrix('x') # the data is presented as rasterized images self.y_phn = T.ivector('y_phn') # the labels are presented as 1D vector # of [int] labels self.y_spkr = T.ivector('y_spkr') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO # Construct an RBM that shared weights with this layer if i == 0: rbm_layer = GRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayerPhn = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_phn) self.params.extend(self.logLayerPhn.params) self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO self.logLayerSpkr = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_spkr) self.params.extend(self.logLayerSpkr.params) self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO self.finetune_cost_sum_phn = self.logLayerPhn.negative_log_likelihood_sum(self.y_phn) self.finetune_cost_sum_spkr = self.logLayerSpkr.negative_log_likelihood_sum(self.y_spkr) self.finetune_cost_phn = self.logLayerPhn.negative_log_likelihood(self.y_phn) self.finetune_cost_spkr = self.logLayerSpkr.negative_log_likelihood(self.y_spkr) self.errors_phn = self.logLayerPhn.errors(self.y_phn) self.errors_spkr = self.logLayerSpkr.errors(self.y_spkr)
# 绘制散点图(x_axis, y_axis对应一个iris_types) for iris_type in iris_types: plt.scatter(data[x_axis][data['class'] == iris_type], data[y_axis][data['class'] == iris_type], label=iris_type) plt.show() num_examples = data.shape[0] x_train = data[[x_axis, y_axis]].values.reshape((num_examples, 2)) y_train = data['class'].values.reshape((num_examples, 1)) max_iterations = 1000 polynomial_degree = 0 sinusoid_degree = 0 logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree, sinusoid_degree) thetas, loss_histories = logistic_regression.train(max_iterations) labels = logistic_regression.unique_labels # 绘制损失函数 plt.plot(range(len(loss_histories[0])), loss_histories[0], label=labels[0]) plt.plot(range(len(loss_histories[1])), loss_histories[1], label=labels[1]) plt.plot(range(len(loss_histories[2])), loss_histories[2], label=labels[2]) plt.show() y_train_prections = logistic_regression.predict(x_train) precision = np.sum(y_train_prections == y_train) / y_train.shape[0] * 100 print('precision: ' + str(precision) + "%") # 生成随机数据,用于生成决策边界 x_min = np.min(x_train[:, 0])
def fit(n_windows, win_width, rand_state, data_set, data_labels, filename="LR_weights.pkl"): # Permuting data rng = np.random.RandomState(8000) indices = rng.permutation(len(data_set)) data_set = np.array(data_set) data_labels = np.array(data_labels) data_set, data_labels = data_set[indices], data_labels[indices] print str(len(data_set)) + " all samples" train_len = int(len(data_set) * 9.0 / 10.0) valid_len = len(data_set) - train_len print "Train: " + str(train_len) print "Validate: " + str(valid_len) # Splitting fs train_dir = fs.File("LR_training.hdf5", "a") train_data = train_dir.create_dataset("LR_train_data", shape=((train_len + 1) * n_windows, 41, 41), dtype="i") train_labels = train_dir.create_dataset("LR_train_labels", shape=((train_len + 1) * n_windows,), dtype="i") valid_dir = fs.File("LR_validating.hdf5", "a") valid_data = valid_dir.create_dataset("LR_valid_data", shape=((valid_len + 1) * n_windows, 41, 41), dtype="i") valid_labels = valid_dir.create_dataset("LR_valid_labels", shape=((valid_len + 1) * n_windows,), dtype="i") counter = 0 next_counter = 0 for iter, data_sample in enumerate(data_set): if iter % 10000 == 0: print iter windows = WinExt.get_windows(data_sample, n_windows, win_width, rand_state) for window in windows: # First windows part for training # Second part for validation if iter < train_len: train_data[counter] = window train_labels[counter] = data_labels[iter] counter += 1 else: valid_data[next_counter] = window valid_labels[next_counter] = data_labels[iter] next_counter += 1 # Setting real length train_len = counter valid_len = next_counter print "Size of train is " + str(train_len) print "Size of valid is " + str(valid_len) print "Extracting has finished its work..." batch_size = 500 if train_len % batch_size != 0: # if the last batch is not full, just don't use the remainder whole = (train_len / batch_size) * batch_size train_len = whole if valid_len % batch_size != 0: whole = (valid_len / batch_size) * batch_size valid_len = whole n_train_batches = train_len / batch_size n_valid_batches = valid_len / batch_size data_tr = theano.shared( np.asarray(np.zeros((batch_size, 41, 41), dtype=np.int), dtype=theano.config.floatX), borrow=True ) labels_tr = theano.shared(np.asarray(np.zeros(batch_size, dtype=np.int), dtype="int32"), borrow=True) data_val = theano.shared( np.asarray(np.zeros((batch_size, 41, 41), dtype=np.int), dtype=theano.config.floatX), borrow=True ) labels_val = theano.shared(np.asarray(np.zeros(batch_size, dtype=np.int), dtype="int32"), borrow=True) print "Building logistic regression classifier..." x = T.dtensor3("x") # dtensor3 for 3d array y = T.ivector("y") # the labels are presented as 1D vector of [int] labels rng = np.random.RandomState(8000) classifier = LogisticRegression(input=x.flatten(2), n_in=41 * 41, n_out=2) cost = classifier.negative_log_likelihood(y) learning_rate = 0.03 # 0.3 / float(n_train_batches) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] validate_model = theano.function(inputs=[], outputs=classifier.errors(y), givens={x: data_val, y: labels_val}) # indices - for random shuffle train_model = theano.function( inputs=[], outputs=classifier.errors(y), updates=updates, givens={x: data_tr, y: labels_tr} ) print "Training..." # GDM with batches epoch = 0 n_epochs = 30 min_error = 100.0 errors = [] indices = rng.permutation(train_len) while epoch < n_epochs: print "================= " + str(epoch + 1) + " epoch =============== " for minibatch_index in range(n_train_batches): if minibatch_index % 50 == 0: print str(minibatch_index) + " batch" data_tr.set_value( np.array([train_data[indices[minibatch_index * batch_size + i]] for i in range(batch_size)]), borrow=True, ) labels_tr.set_value( np.array([train_labels[indices[minibatch_index * batch_size + i]] for i in range(batch_size)]), borrow=True, ) train_model() # compute zero-one loss on validation set validation_losses = [] for i in range(n_valid_batches): data_val.set_value(np.array(valid_data[i * batch_size : (i + 1) * batch_size]), borrow=True) labels_val.set_value(np.array(valid_labels[i * batch_size : (i + 1) * batch_size]), borrow=True) validation_losses.append(validate_model()) this_validation_loss = np.mean(validation_losses) * 100 errors.append(this_validation_loss) if this_validation_loss < min_error: print str(this_validation_loss) + "% error" min_error = this_validation_loss save_parameters(classifier, filename) epoch += 1 print "Shuffling..." indices = rng.permutation(train_len) show_errors(errors, "LogReg: 4 windows, h=41") # Cleaning data train_dir.clear() valid_dir.clear() train_dir.close() valid_dir.close()
import numpy as np from sklearn.model_selection import train_test_split from sklearn import datasets import matplotlib.pyplot as plt from logistic_regression import LogisticRegression #from regression import LogisticRegression def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy bc = datasets.load_breast_cancer() X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000) regressor.fit(X_train, y_train) predictions = regressor.predict(X_test) print("LR classification accuracy:", accuracy(y_test, predictions))
def main(): rng = np.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing batch_size = 500 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size nkerns = [20, 50] # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels layer0_input = x.reshape(batch_size, 1, 28, 28) layer0 = LeNetConvPoolLayer(rng, layer0_input, filter_shape=(nkerns[0], 1, 5, 5), image_shape=(batch_size, 1, 28, 28), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, filter_shape=(nkerns[1], nkerns[0], 5, 5), image_shape=(batch_size, nkerns[0], 12, 12), poolsize=(2, 2)) layer2_input = layer1.output.flaten(2) layer2 = HiddenLayer(rng, layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500) layer3 = LogisticRegression(layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] }) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) learning_rate = 0.1 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }) print "Start training..." patience = 10000 patience_increase = 2 improvement_threshold = 0.995 n_epochs = 200 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = np.inf test_score = 0. epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) # NOQA if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break
import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import make_classification from logistic_regression import LogisticRegression def sigmoid(X): ''' Computes the Sigmoid function of the input argument X. ''' return 1.0 / (1 + np.exp(-X)) lr = LogisticRegression() X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) lr.fit(X, y) H = lr.predict(X) print("Training Accuracy : " + str(float(np.sum(H == y)) / y.shape[0])) #Plot data plt.scatter(X[y == 1, 0], X[y == 1, 1], marker='o', c='b') #positive samples plt.scatter(X[y == 0, 0], X[y == 0, 1], marker='x', c='r') #negative samples #Plot Decision Boundary u = np.linspace(-2, 2, 50) v = np.linspace(-2, 2, 50) z = np.zeros(shape=(len(u), len(v))) for i in range(len(u)):
def stochastic_gradient_descent_mnist( learning_rate=0.13, n_epochs=1000, path='/home/tao/Projects/machine-learning/data/mnist.pkl.gz', batch_size=600): datasets = load_data(path) train_set_data, train_set_label = datasets[0] validation_set_data, validation_set_label = datasets[1] test_set_data, test_set_label = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_data.get_value( borrow=True).shape[0] // batch_size n_valid_batches = validation_set_data.get_value( borrow=True).shape[0] // batch_size n_test_batches = test_set_data.get_value( borrow=True).shape[0] // batch_size print('... building the model') index = T.lscalar() # index to a [mini]batch data = T.matrix('x') # data, presented as rasterized images label = T.ivector('y') # labels, presented as 1D vector of [int] labels classifier = LogisticRegression(input=data, input_dim=28 * 28, output_dim=10) objective_function = classifier.negative_log_likelihood(label) # testing model test_model = theano.function( inputs=[index], outputs=classifier.errors(label), givens={ data: test_set_data[index * batch_size:(index + 1) * batch_size], label: test_set_label[index * batch_size:(index + 1) * batch_size] }) # validation model validate_model = theano.function( inputs=[index], outputs=classifier.errors(label), givens={ data: validation_set_data[index * batch_size:(index + 1) * batch_size], label: validation_set_label[index * batch_size:(index + 1) * batch_size] }) # gradients g_W = T.grad(cost=objective_function, wrt=classifier.W) g_b = T.grad(cost=objective_function, wrt=classifier.b) # update rule updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # training model train_model = theano.function( inputs=[index], outputs=objective_function, updates=updates, givens={ data: train_set_data[index * batch_size:(index + 1) * batch_size], label: train_set_label[index * batch_size:(index + 1) * batch_size] }) print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant # go through this many minibatche before checking the network on the validation set; in this case we check every epoch validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] # grammar sugar this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print(( ' epoch %i, minibatch %i/%i, test error of best model %f %%' ) % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) with open('best_model.pkl', 'wb') as f: pickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( 'Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
def train_CNN_mini_batch(learning_rate, n_epochs, num_kernels, batch_size, filter_size, is_multi_scale, num_of_classes, height, width, use_interpolation, use_hidden_layer): train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \ train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \ = load_processed_img_data() n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0] n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size index = theano.tensor.lscalar() x_by_1 = theano.tensor.ftensor4('x_by_1') x_by_2 = theano.tensor.ftensor4('x_by_2') x_by_4 = theano.tensor.ftensor4('x_by_4') y = theano.tensor.ivector('y') print '... initialize the model' cnn_dir = 'models/CNN_' if is_multi_scale is True: cnn_dir += 'M_' else: cnn_dir += 'S_' if use_hidden_layer is True: cnn_dir += 'H_' else: cnn_dir += 'L_' if use_interpolation is True: cnn_dir += 'I_' else: cnn_dir += 'N_' cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str( num_kernels[1]) + '_' + str( num_kernels[2]) + '_' + str(batch_size) + '_' curr_date = str(datetime.date.today()) curr_date = curr_date.replace('-', '_') cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S')) print 'CNN model is ', cnn_dir if not os.path.exists(cnn_dir): os.makedirs(cnn_dir) class Logger(object): def __init__(self): self.terminal = sys.stdout self.log = open(cnn_dir + '/log.txt', 'w') def write(self, message): self.terminal.write(message) self.log.write(message) sys.stdout = Logger() layer0 = CNN_Layer( name='Layer_0', W=None, b=None, filter_shape=(num_kernels[0], 3, filter_size, filter_size), ) layer1 = CNN_Layer( name='Layer_1', W=None, b=None, filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size), ) layer2 = CNN_Layer( name='Layer_2', W=None, b=None, filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size), ) layer3 = HiddenLayer(name='Layer_3', W=None, b=None, n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2], n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, activation=theano.tensor.tanh) if is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 4 elif is_multi_scale and not use_hidden_layer: layer4_in = num_kernels[2] * 3 elif not is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 2 else: layer4_in = num_kernels[2] layer4 = LogisticRegression( name='Layer_4', W=None, b=None, n_in=layer4_in, n_out=num_of_classes, ) forward_propagation(layer0=layer0, layer1=layer1, layer2=layer2, layer3=layer3, layer4=layer4, x_by_1=x_by_1, x_by_2=x_by_2, x_by_4=x_by_4, num_kernels=num_kernels, batch_size=batch_size, filter_size=filter_size, is_multi_scale=is_multi_scale, height=height, width=width, use_interpolation=use_interpolation, use_hidden_layer=use_hidden_layer) if use_hidden_layer is True: L2_norm = (layer4.W**2).sum() + (layer3.W**2).sum() + ( layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum() else: L2_norm = (layer4.W**2).sum() + (layer2.W**2).sum() + ( layer1.W**2).sum() + (layer0.W**2).sum() regularization = 0.00001 cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm) if is_multi_scale is True: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: test_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: test_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) else: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) if is_multi_scale is True: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: valid_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: valid_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) else: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) if use_hidden_layer is True: params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params else: params = layer4.params + layer2.params + layer1.params + layer0.params grads = theano.tensor.grad(cost, params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] if is_multi_scale is True: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: train_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: train_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size * width * height:(index + 1) * batch_size * width * height] }) else: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size * width * height:(index + 1) * batch_size * width * height] }) print '... training the model' patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_layer_0_W = numpy.zeros_like(layer0.W.get_value()) best_layer_0_b = numpy.zeros_like(layer0.b.get_value()) best_layer_1_W = numpy.zeros_like(layer1.W.get_value()) best_layer_1_b = numpy.zeros_like(layer1.b.get_value()) best_layer_2_W = numpy.zeros_like(layer2.W.get_value()) best_layer_2_b = numpy.zeros_like(layer2.b.get_value()) best_layer_3_W = numpy.zeros_like(layer3.W.get_value()) best_layer_3_b = numpy.zeros_like(layer3.b.get_value()) best_layer_4_W = numpy.zeros_like(layer4.W.get_value()) best_layer_4_b = numpy.zeros_like(layer4.b.get_value()) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for mini_batch_index in xrange(n_train_batches): start = time.clock() iter = (epoch - 1) * n_train_batches + mini_batch_index cost_ij = train_model(mini_batch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, mini-batch %i/%i, validation error %f %%' % (epoch, mini_batch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save best filters best_layer_0_W = layer0.W.get_value() best_layer_0_b = layer0.b.get_value() best_layer_1_W = layer1.W.get_value() best_layer_1_b = layer1.b.get_value() best_layer_2_W = layer2.W.get_value() best_layer_2_b = layer2.b.get_value() best_layer_3_W = layer3.W.get_value() best_layer_3_b = layer3.b.get_value() best_layer_4_W = layer4.W.get_value() best_layer_4_b = layer4.b.get_value() # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, mini-batch %i/%i, test error of ' 'best model %f %%') % (epoch, mini_batch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break print 'training @ iter = %d, time taken = %f' % (iter, (time.clock() - start)) end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if not os.path.exists(cnn_dir + '/params'): os.makedirs(cnn_dir + '/params') numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W) numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b) numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W) numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b) numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W) numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b) numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W) numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b) numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W) numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b) numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels) numpy.save(cnn_dir + '/params/filter_size.npy', filter_size) return cnn_dir
plt.ylabel(y_axis) plt.title('Microchips Tests') plt.legend() plt.show() num_examples = data.shape[0] x_train = data[[x_axis, y_axis]].values.reshape((num_examples, 2)) y_train = data['validity'].values.reshape((num_examples, 1)) # 训练参数 max_iterations = 100000 regularization_param = 0 polynomial_degree = 5 sinusoid_degree = 0 # 逻辑回归 logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree, sinusoid_degree) # 训练 (thetas, costs) = logistic_regression.train(max_iterations) columns = [] for theta_index in range(0, thetas.shape[1]): columns.append('Theta ' + str(theta_index)) # 训练结果 labels = logistic_regression.unique_labels plt.plot(range(len(costs[0])), costs[0], label=labels[0]) plt.plot(range(len(costs[1])), costs[1], label=labels[1]) plt.xlabel('Gradient Steps')
def generate_segmented_image_tensors(img_by_1, img_by_2, img_by_4, model_dir, batch_size, height, width, num_of_classes): layer_0_W = numpy.load(model_dir + '/params/layer_0_W.npy') layer_0_b = numpy.load(model_dir + '/params/layer_0_b.npy') layer_1_W = numpy.load(model_dir + '/params/layer_1_W.npy') layer_1_b = numpy.load(model_dir + '/params/layer_1_b.npy') layer_2_W = numpy.load(model_dir + '/params/layer_2_W.npy') layer_2_b = numpy.load(model_dir + '/params/layer_2_b.npy') layer_3_W = numpy.load(model_dir + '/params/layer_3_W.npy') layer_3_b = numpy.load(model_dir + '/params/layer_3_b.npy') layer_4_W = numpy.load(model_dir + '/params/layer_4_W.npy') layer_4_b = numpy.load(model_dir + '/params/layer_4_b.npy') num_kernels = numpy.load(model_dir + '/params/filer_kernels.npy') filter_size = numpy.load(model_dir + '/params/filter_size.npy') if model_dir[11] == 'M': is_multi_scale = True elif model_dir[11] == 'S': is_multi_scale = False else: return NotImplemented if model_dir[13] == 'H': use_hidden_layer = True elif model_dir[13] == 'L': use_hidden_layer = False else: return NotImplemented if model_dir[15] == 'I': use_interpolation = True elif model_dir[13] == 'L': use_interpolation = False else: return NotImplemented layer0 = CNN_Layer( name='Layer_0', W=layer_0_W, b=layer_0_b, filter_shape=(num_kernels[0], 3, filter_size, filter_size), ) layer1 = CNN_Layer( name='Layer_1', W=layer_1_W, b=layer_1_b, filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size), ) layer2 = CNN_Layer( name='Layer_2', W=layer_2_W, b=layer_2_b, filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size), ) layer3 = HiddenLayer(name='Layer_3', W=layer_3_W, b=layer_3_b, n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2], n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, activation=theano.tensor.tanh) layer4 = LogisticRegression( name='Layer_4', W=layer_4_W, b=layer_4_b, n_in=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, n_out=num_of_classes, ) x_by_1 = theano.tensor.ftensor4('x_by_1') x_by_2 = theano.tensor.ftensor4('x_by_2') x_by_4 = theano.tensor.ftensor4('x_by_4') forward_propagation( layer0=layer0, layer1=layer1, layer2=layer2, layer3=layer3, layer4=layer4, x_by_1=x_by_1, x_by_2=x_by_2, x_by_4=x_by_4, num_kernels=num_kernels, batch_size=batch_size, filter_size=filter_size, is_multi_scale=is_multi_scale, height=height, width=width, use_interpolation=use_interpolation, use_hidden_layer=use_hidden_layer, ) # create a function to compute the mistakes that are made by the model if is_multi_scale is True: test_model = theano.function([x_by_1, x_by_2, x_by_4], layer4.y_prediction) else: test_model = theano.function([x_by_1], layer4.y_prediction) if is_multi_scale is True: op = test_model(img_by_1, img_by_2, img_by_4) else: op = test_model(img_by_1) y = theano.tensor.reshape(op, (batch_size, height, width)) return y.eval()
class StackedDenoisingAutoencoder: def __init__(self, numpyRng, theanoRng=None, nIn=28*28, hiddenLayerSizes=[500,500], nOut=10): self.nLayers = len(hiddenLayerSizes) if not theanoRng: theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') def makeSigmoidLayer(lastLayer,lastLayerSize,size): return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid) def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer): return DenoisingAutoEncoder( numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer, nVisible=lastLayerSize, nHidden=size, W=sigmoidLayer.W, bHidden=sigmoidLayer.b) def makeLayers(lastLayer,lastInputSize,nextLayerSizes): if nextLayerSizes: newList = list(nextLayerSizes) size = newList.pop() sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size) daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer) yield (sigmoidLayer,daLayer) for layer in makeLayers(sigmoidLayer.output,size,newList): yield layer self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes))) print "created sda with layer shapes below." for da in self.dALayers: print "layersize:", da.W.get_value().shape self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut) self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)] self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretrainingFunctions(self,trainSetX,batchSize): index = T.lscalar("index") corruptionLevel = T.scalar('corruption') learningRate = T.scalar("learning") batchBegin = batchSize * index batchEnd = batchBegin + batchSize for dA in self.dALayers: cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate) f = theano.function( inputs=[ index, theano.Param(corruptionLevel,default=0.2), theano.Param(learningRate,default=0.1) ], outputs=cost, updates=updates, givens={self.x:trainSetX[batchBegin:batchEnd]}, ) yield f def pretrainingFunctionsWithOptimizer(self,trainSetX,batchSize,optimizer): """ with optimizer. optimizer(params,grads) """ index = T.lscalar("index") corruptionLevel = T.scalar('corruption') learningRate = T.scalar("learning") batchBegin = batchSize * index batchEnd = batchBegin + batchSize for dA in self.dALayers: #cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate) cost, param, grads = dA.costParamGrads(corruptionLevel) updates = optimizer(param,grads) f = theano.function( inputs=[ index, theano.Param(corruptionLevel,default=0.2), ], outputs=cost, updates=updates, givens={self.x:trainSetX[batchBegin:batchEnd]}, ) yield f def fineTuneFunctions(self,datasets,batchSize,learningRate): index = T.lscalar('i') trainSetX,trainSetY = datasets[0] validSetX,validSetY = datasets[1] testSetX,testSetY = datasets[2] gparams = T.grad(self.fineTuneCost,self.params) updates = [ (param,param-gparam*learningRate) for param,gparam in zip(self.params,gparams) ] def makeGivens(x,y): return {self.x:x[index*batchSize:(index+1)*batchSize], self.y:y[index*batchSize:(index+1)*batchSize]} trainer = theano.function( inputs=[index], outputs=self.fineTuneCost, updates=updates, givens=makeGivens(trainSetX,trainSetY), name='train' ) testScoreI=theano.function( inputs=[index], outputs=self.errors, givens=makeGivens(testSetX,testSetY), name='test' ) validScoreI=theano.function( inputs=[index], outputs=self.errors, givens=makeGivens(validSetX,validSetY), name='valid' ) def validationScore(): return [validScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)] def testScore(): return [testScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)] return trainer,validationScore,testScore def preTrain(self, data, batchSize=20, preLearningRate=0.1, corruptionLevels=(.1,.2,.3)): import numpy,util preTrainer = list(self.pretrainingFunctions(data,batchSize=batchSize)) assert len(corruptionLevels) == len(preTrainer) , "given corruption levels do not correspond to the layers!!!" for i,(trainer,corruptionLevel) in enumerate(zip(preTrainer,corruptionLevels)): for epoch in xrange(15): print 'Pre-training layer %i, epoch %d start' % (i,epoch) trainScores = [trainer(batchIndex,corruptionLevel,preLearningRate) for batchIndex in xrange(data.get_value(borrow=True).shape[0]/batchSize)] print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),numpy.mean(trainScores)
class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, hidden_layers_sizes=[1024, 1024], n_outs=62 * 3, rho=0.90, eps=1.E-6): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) #self._rho = shared(numpy.cast['float32'](rho), name='rho') # for adadelta #self._eps = shared(numpy.cast['float32'](eps), name='eps') # for adadelta self._rho = rho self._eps = eps self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.fmatrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO # Construct an RBM that shared weights with this layer if i == 0: rbm_layer = GRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, k): batch_x = T.fmatrix('batch_x') learning_rate = T.scalar('lr') # learning rate to use pretrain_fns = [] for rbm in self.rbm_layers: # get the cost and the updates list # using CD-k here (persisent=None) for training each RBM. # TODO: change cost function to reconstruction error #markov_chain = shared(numpy.empty((batch_size, rbm.n_hidden), dtype='float32'), borrow=True) markov_chain = None cost, updates = rbm.get_cost_updates(learning_rate, persistent=markov_chain, k=k) # compile the theano function fn = theano.function(inputs=[batch_x, theano.Param(learning_rate, default=0.1)], outputs=cost, updates=updates, givens={self.x: batch_x}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def get_SGD_trainer(self): """ Returns a plain SGD minibatch trainer with learning rate as param. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for param, gparam in zip(self.params, gparams): updates[param] = param - gparam * learning_rate train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_adadelta_trainer(self): """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads, self._accudeltas, self.params, gparams): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_adagrad_trainer(self): """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, param, gparam in zip(self._accugrads, self.params, gparams): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = accugrad + gparam * gparam dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam updates[param] = param + dx updates[accugrad] = agrad train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_SAG_trainer(self): """ Returns a Stochastic Averaged Gradient (Bach & Moulines 2011) trainer. This is based on Bach 2013 slides: PRavg(theta_n) = Polyak-Ruppert averaging = (1+n)^{-1} * \sum_{k=0}^n theta_k theta_n = theta_{n-1} - gamma [ f'_n(PR_avg(theta_{n-1})) + f''_n(PR_avg( theta_{n-1})) * (theta_{n-1} - PR_avg(theta_{n-1}))] That returns two trainers: one for the first epoch, one for subsequent epochs. We use self._accudeltas to but the Polyak-Ruppert averaging, and self._accugrads for the number of iterations (updates). """ print "UNFINISHED, see TODO in get_SAG_trainer()" sys.exit(-1) batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # First trainer: gparams = T.grad(cost, self.params) updates = OrderedDict() for accudelta, accugrad, param, gparam in zip(self._accudeltas, self._accugrads, self.params, gparams): theta = param - gparam * learning_rate updates[accudelta] = (theta + accudelta * accugrad) / (accugrad + 1.) updates[param] = theta updates[accugrad] = accugrad + 1. train_fn_init = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) # Second trainer: gparams = T.grad(cost, self._accudeltas) # TODO recreate the network with # (TODO) self._accudeltas instead of self.params so that we can compute the cost hparams = T.grad(cost, gparams) # compute list of fine-tuning updates updates = OrderedDict() for accudelta, accugrad, param, gparam, hparam in zip(self._accudeltas, self._accugrads, self.params, gparams, hparams): theta = param - learning_rate * (gparam + hparam * (param - accudelta)) updates[accudelta] = (theta + accudelta * accugrad) / (accugrad + 1.) updates[param] = theta updates[accugrad] = accugrad + 1. train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn_init, train_fn def get_SGD_ld_trainer(self): """ Returns an SGD-ld trainer (Schaul et al. 2012). """ print "UNFINISHED, see TODO in get_SGD_ld_trainer()" sys.exit(-1) batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # INIT TODO # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, accuhess, param, gparam in zip(self._accugrads, self._accudeltas, self._accuhess, self.params, gparams): pass # TODO # TODO # TODO train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def score_classif(self, given_set): """ Returns functions to get current classification scores. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)], outputs=self.errors, givens={self.x: batch_x, self.y: batch_y}) # Create a function that scans the entire set given as input def scoref(): return [score(batch_x, batch_y) for batch_x, batch_y in given_set] return scoref
class StackedDenoisingAutoEncoders(object): def __init__(self, random_generator, theano_random_generator=None, x_dim=28 * 28, y_dim=10, hidden_layer_sizes=[500, 500], corruption_levels=[0.1, 0.1]): """ """ # Declare empty sigmoid layer array for MLP self.sigmoid_layers = [] # Declare an empty array of DenoisingAutoEncoder self.autoencoder_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) if theano_random_generator == None: self.theano_random_generator = RandomStreams( random_generator.randint(2**30)) else: self.theano_random_generator = theano_random_generator # Inputs using Theano self.x = T.matrix("x") self.y = T.ivector("y") # Initialize all parameters for i in range(self.n_layers): # Define x and y dimensions if i == 0: internal_x_dim = x_dim else: internal_x_dim = hidden_layer_sizes[i - 1] internal_y_dim = hidden_layer_sizes[i] # Find inputs if i == 0: internal_input = self.x else: internal_input = self.sigmoid_layers[i - 1].output # Define Sigmoid Layer self.sigmoid_layers.append( HiddenLayer(internal_input, internal_x_dim, internal_y_dim, random_generator, activation=T.nnet.sigmoid)) # Define input self.autoencoder_layers.append( DenoisingAutoEncoder(random_generator, theano_random_generator, internal_x_dim, internal_y_dim, internal_input, W=self.sigmoid_layers[i].W, b=self.sigmoid_layers[i].b)) # Uppdate parameters self.params.extend(self.sigmoid_layers[i].params) # Finally add logistic layer self.logistic_layer = LogisticRegression( self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim) self.params.extend(self.logistic_layer.params) # These are two important costs # Finetuning after pretraining individual AutoEncoders self.finetune_cost = self.logistic_layer.negative_log_likelihood( self.y) # Error from prediction self.error = self.logistic_layer.error(self.y) def pretrain(self, train_x, batch_size): """Generates a list of functions, each of them implementing one step in trainnig the dA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a dA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the dA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the dA layer """ index = T.iscalar("index") corruption_level = T.scalar("corruption_level") learning_rate = T.scalar("learning_rate") pretrain_functions = [] for autoencoder in self.autoencoder_layers: # Find cost and updates for the layer cost, updates = autoencoder.cost_updates(corruption_level, learning_rate) f = theano.function(inputs=[ index, theano.Param(corruption_level, default=0.2), theano.Param(learning_rate, default=0.1) ], outputs=cost, updates=updates, givens={ self.x: train_x[index * batch_size:(index + 1) * batch_size] }) pretrain_functions.append(f) return pretrain_functions def finetune(self, train_x, train_y, valid_x, valid_y, test_x, test_y, batch_size, learning_rate): """Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage """ # Define index index = T.iscalar("index") # Cost and updates in SGD grad = T.grad(self.finetune_cost, wrt=self.params) updates = list() for i in range(len(self.params)): updates.append( (self.params[i], self.params[i] - learning_rate * grad[i])) # Define train, valid and test models train_model = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_x[index * batch_size:(index + 1) * batch_size], self.y: train_y[index * batch_size:(index + 1) * batch_size] }) valid_model = theano.function( inputs=[index], outputs=self.error, givens={ self.x: valid_x[index * batch_size:(index + 1) * batch_size], self.y: valid_y[index * batch_size:(index + 1) * batch_size] }) test_model = theano.function( inputs=[index], outputs=self.error, givens={ self.x: test_x[index * batch_size:(index + 1) * batch_size], self.y: test_y[index * batch_size:(index + 1) * batch_size] }) return (train_model, valid_model, test_model)
def train_CNN_mini_batch(learning_rate, n_epochs, num_kernels, batch_size, filter_size, is_multi_scale, num_of_classes, height, width, use_interpolation, use_hidden_layer): train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \ train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \ = load_processed_img_data() n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0] n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size index = theano.tensor.lscalar() x_by_1 = theano.tensor.ftensor4('x_by_1') x_by_2 = theano.tensor.ftensor4('x_by_2') x_by_4 = theano.tensor.ftensor4('x_by_4') y = theano.tensor.ivector('y') print '... initialize the model' cnn_dir = 'models/CNN_' if is_multi_scale is True: cnn_dir += 'M_' else: cnn_dir += 'S_' if use_hidden_layer is True: cnn_dir += 'H_' else: cnn_dir += 'L_' if use_interpolation is True: cnn_dir += 'I_' else: cnn_dir += 'N_' cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str(num_kernels[1]) + '_' + str(num_kernels[2]) + '_' + str( batch_size) + '_' curr_date = str(datetime.date.today()) curr_date = curr_date.replace('-', '_') cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S')) print 'CNN model is ', cnn_dir if not os.path.exists(cnn_dir): os.makedirs(cnn_dir) class Logger(object): def __init__(self): self.terminal = sys.stdout self.log = open(cnn_dir + '/log.txt', 'w') def write(self, message): self.terminal.write(message) self.log.write(message) sys.stdout = Logger() layer0 = CNN_Layer( name='Layer_0', W=None, b=None, filter_shape=(num_kernels[0], 3, filter_size, filter_size), ) layer1 = CNN_Layer( name='Layer_1', W=None, b=None, filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size), ) layer2 = CNN_Layer( name='Layer_2', W=None, b=None, filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size), ) layer3 = HiddenLayer( name='Layer_3', W=None, b=None, n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2], n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, activation=theano.tensor.tanh ) if is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 4 elif is_multi_scale and not use_hidden_layer: layer4_in = num_kernels[2] * 3 elif not is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 2 else: layer4_in = num_kernels[2] layer4 = LogisticRegression( name='Layer_4', W=None, b=None, n_in=layer4_in, n_out=num_of_classes, ) forward_propagation( layer0=layer0, layer1=layer1, layer2=layer2, layer3=layer3, layer4=layer4, x_by_1=x_by_1, x_by_2=x_by_2, x_by_4=x_by_4, num_kernels=num_kernels, batch_size=batch_size, filter_size=filter_size, is_multi_scale=is_multi_scale, height=height, width=width, use_interpolation=use_interpolation, use_hidden_layer=use_hidden_layer ) if use_hidden_layer is True: L2_norm = (layer4.W ** 2).sum() + (layer3.W ** 2).sum() + (layer2.W ** 2).sum() + (layer1.W ** 2).sum() + ( layer0.W ** 2).sum() else: L2_norm = (layer4.W ** 2).sum() + (layer2.W ** 2).sum() + (layer1.W ** 2).sum() + (layer0.W ** 2).sum() regularization = 0.00001 cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm) if is_multi_scale is True: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size: (index + 1) * batch_size], x_by_2: test_set_x_by_2[index * batch_size: (index + 1) * batch_size], x_by_4: test_set_x_by_4[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width] } ) else: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width] } ) if is_multi_scale is True: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size: (index + 1) * batch_size], x_by_2: valid_set_x_by_2[index * batch_size: (index + 1) * batch_size], x_by_4: valid_set_x_by_4[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width] } ) else: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width] } ) if use_hidden_layer is True: params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params else: params = layer4.params + layer2.params + layer1.params + layer0.params grads = theano.tensor.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] if is_multi_scale is True: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size: (index + 1) * batch_size], x_by_2: train_set_x_by_2[index * batch_size: (index + 1) * batch_size], x_by_4: train_set_x_by_4[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size * width * height: (index + 1) * batch_size * width * height] } ) else: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size * width * height: (index + 1) * batch_size * width * height] } ) print '... training the model' patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_layer_0_W = numpy.zeros_like(layer0.W.get_value()) best_layer_0_b = numpy.zeros_like(layer0.b.get_value()) best_layer_1_W = numpy.zeros_like(layer1.W.get_value()) best_layer_1_b = numpy.zeros_like(layer1.b.get_value()) best_layer_2_W = numpy.zeros_like(layer2.W.get_value()) best_layer_2_b = numpy.zeros_like(layer2.b.get_value()) best_layer_3_W = numpy.zeros_like(layer3.W.get_value()) best_layer_3_b = numpy.zeros_like(layer3.b.get_value()) best_layer_4_W = numpy.zeros_like(layer4.W.get_value()) best_layer_4_b = numpy.zeros_like(layer4.b.get_value()) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for mini_batch_index in xrange(n_train_batches): start = time.clock() iter = (epoch - 1) * n_train_batches + mini_batch_index cost_ij = train_model(mini_batch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, mini-batch %i/%i, validation error %f %%' % (epoch, mini_batch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save best filters best_layer_0_W = layer0.W.get_value() best_layer_0_b = layer0.b.get_value() best_layer_1_W = layer1.W.get_value() best_layer_1_b = layer1.b.get_value() best_layer_2_W = layer2.W.get_value() best_layer_2_b = layer2.b.get_value() best_layer_3_W = layer3.W.get_value() best_layer_3_b = layer3.b.get_value() best_layer_4_W = layer4.W.get_value() best_layer_4_b = layer4.b.get_value() # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, mini-batch %i/%i, test error of ' 'best model %f %%') % (epoch, mini_batch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break print 'training @ iter = %d, time taken = %f' % (iter, (time.clock() - start)) end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if not os.path.exists(cnn_dir + '/params'): os.makedirs(cnn_dir + '/params') numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W) numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b) numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W) numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b) numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W) numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b) numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W) numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b) numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W) numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b) numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels) numpy.save(cnn_dir + '/params/filter_size.npy', filter_size) return cnn_dir
def sgd_optimize(learning_rate=0.1, n_epochs=200, batch_size=500, nkerns=[20, 50]): # Load input train, valid, test = util.load() print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory" train_x, train_y = util.create_theano_shared(train) print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory" valid_x, valid_y = util.create_theano_shared(valid) print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory" test_x, test_y = util.create_theano_shared(test) # Define symbolic input matrices print "Building Model..." index = T.iscalar() x = T.matrix("x") y = T.ivector("y") random_generator = numpy.random.RandomState(1) # Create Layer0 of Lenet Model layer0_input = x.reshape( (batch_size, 1, 28, 28) ) filter_shape0 = (nkerns[0], 1, 5, 5) image_shape0 = (batch_size, 1, 28, 28) layer0 = LeNetConvPoolLayer(layer0_input, filter_shape0, image_shape0, random_generator) # Create Layer1 of Lenet model filter_shape1 = (nkerns[1], nkerns[0], 5, 5) image_shape1 = (batch_size, nkerns[0], 12, 12) layer1 = LeNetConvPoolLayer(layer0.output, filter_shape1, image_shape1, random_generator) # Create Layer2 which is a simple MLP hidden layer layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(layer2_input, nkerns[1] * 4 * 4, 500, random_generator) # Finally, Layer3 is LogisticRegression layer layer3 = LogisticRegression(layer2.output, 500, 10) # Define error error = layer3.error(y) # Create cost function cost = layer3.negative_log_likelihood(y) # Gradient and update functions params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, wrt=params) updates = list() for i in range(len(params)): updates.append( (params[i], params[i] - learning_rate * grads[i]) ) # Train model train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens = { x: train_x[index*batch_size : (index+1)*batch_size], y: train_y[index*batch_size : (index+1)*batch_size] }) # Valid model valid_model = theano.function( inputs=[index], outputs=error, givens = { x: valid_x[index*batch_size : (index+1)*batch_size], y: valid_y[index*batch_size : (index+1)*batch_size] }) # Test Model test_model = theano.function( inputs=[index], outputs=error, givens={ x: test_x[index*batch_size : (index+1)*batch_size], y: test_y[index*batch_size : (index+1)*batch_size] }) # Create number of minibatches n_train_batches = train[0].shape[0] / batch_size n_valid_batches = valid[0].shape[0] / batch_size n_test_batches = test[0].shape[0] / batch_size # Finally, main loop for training util.train_test_model(n_epochs, train_model, valid_model, test_model, n_train_batches, n_valid_batches, n_test_batches)
import numpy as np from sklearn.model_selection import train_test_split from sklearn import datasets import matplotlib.pyplot as plt from logistic_regression import LogisticRegression bc = datasets.load_breast_cancer() X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1234) def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy re = LogisticRegression(lr=0.0001, n_iters=1000) re.fit(X_train, y_train) prediction = re.predict(X_test) print('accuracy: ', accuracy(y_test, prediction)) for i in range(len(prediction)): print(y_test[i], prediction[i])
class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size, k): '''Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared var. that contains all datapoints used for training the RBM :type batch_size: int :param batch_size: size of a [mini]batch :param k: number of Gibbs steps to do in CD-k / PCD-k ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use # number of batches n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for rbm in self.rbm_layers: # get the cost and the updates list # using CD-k here (persisent=None) for training each RBM. # TODO: change cost function to reconstruction error cost, updates = rbm.get_cost_updates(learning_rate, persistent=None, k=k) # compile the theano function fn = theano.function( inputs=[index, theano.Param(learning_rate, default=0.1)], outputs=cost, updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch learning_rate = T.scalar('lr') # learning rate to used # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append( (param, param - gparam * T.cast(learning_rate, dtype=theano.config.floatX))) train_fn = theano.function( inputs=[index, theano.Param(learning_rate, default=0.1)], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }) test_score_i = theano.function( [index], self.errors, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size], self.y: test_set_y[index * batch_size:(index + 1) * batch_size] }) valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score, test_score
count = np.zeros((len(X), VOCAB_SIZE)) for i, indices in enumerate(X): for idx in indices: count[i, idx] += 1 print("%.2f secs ==> Document-Term Matrix" % (time.time() - t0)) t0 = time.time() X = tfidf.fit_transform(count) print("%.2f secs ==> TF-IDF transform" % (time.time() - t0)) return X if __name__ == '__main__': (X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data( num_words=VOCAB_SIZE) tfidf = TfidfTransformer() X_train = transform(X_train, tfidf) X_test = transform(X_test, tfidf) model = LogisticRegression(VOCAB_SIZE, 2) model.fit(X_train, y_train, n_epoch=2, batch_size=32, val_data=(X_test, y_test)) y_pred = model.predict(X_test) final_acc = (y_pred == y_test).mean() print("final testing accuracy: %.4f" % final_acc)
def evaluate_model(learning_rate=0.001, n_epochs=100, nkerns=[16, 40, 50, 60], batch_size=20): """ Network for classification of MNIST database :type learning_rate: float :param learning_rate: this is the initial learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_size: the batch size for training """ print("Evaluating model") rng = numpy.random.RandomState(23455) # loading the data1 datasets = load_test_data(3) valid_set_x, valid_set_y = datasets[0] test_set_x, test_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels loaded_params = numpy.load('../saved_models/model3.npy') layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params ###################### # BUILD ACTUAL MODEL # ###################### print('Building the model...') # Reshape matrix of rasterized images of shape (batch_size, 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (32, 32) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 64, 88)) # Construct the first convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = MyConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 64, 88), p1=2, p2=2, filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), W=layer0_W, b=layer0_b) # Construct the second convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = MyConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 32, 44), p1=2, p2=2, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), W=layer1_W, b=layer1_b) # Construct the third convolutional pooling layer # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer2 = MyConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 16, 22), p1=2, p2=2, filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(2, 2), W=layer2_W, b=layer2_b) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4), # or (500, 20 * 4 * 4) = (500, 320) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 8 * 11, n_out=800, activation=T.tanh, W=layer3_W, b=layer3_b) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6, W=layer4_W, b=layer4_b) cost = layer4.negative_log_likelihood(y) predicted_output = layer4.y_pred # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) val_model_preds = theano.function( [index], layer4.prediction(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params val_preds = [val_model_preds(i) for i in range(n_valid_batches)] #print(val_preds) #preds = numpy(val_preds) preds = [] for pred in val_preds: for p in pred: preds.append(p) #preds = val_preds.reshape(valid_set_x.get_value(borrow=True).shape[0]) actual_labels = load_test_data(2, 2) n = len(actual_labels) confusion_matrix = numpy.zeros((6, 6)) for i in range(n): confusion_matrix[int(actual_labels[i])][preds[i]] += 1 print(confusion_matrix) correct = 0.0 for i in range(n): if (preds[i] == int(actual_labels[i])): correct += 1.0 accuracy = correct / n print("Number of correctly classified : ", correct) print("Test accuracy is", accuracy * 100)
file_num = 80 motif_num = 10 data_size = feature_list.shape[0] input_size = feature_list.shape[1] output_size = motif_num W = load_features(data_path, 'W_1.txt') makeFolder() # label = numpy.zeros((data_size, output_size)) # for i in xrange(data_size): # index = i / file_num # label[i][index] = 1 # LR = LogisticRegression(feature_list, label, input_size, output_size, data_size, fine_tune_lr) LR = LogisticRegression(feature_list, None, input_size, output_size, data_size, fine_tune_lr) LR.W = W for i in xrange(fine_tune_epoch): print 'epoch: ' + str(i) LR.fine_tune() # output_list = LR.predict(feature_list) # output_list = LR.predict_direct(feature_list) output_list = LR.predict_sigmoid(feature_list) saveW(LR.getW(), 'LR_after_train') saveFeatures(output_list, 'LR_judge.txt') # saveFeatures(label, 'label.txt')
# .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescaled labels {-1, 1} rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = DecisionTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel) lda = LDA() # ........ # TRAIN # ........ print "Training:" print "\tAdaboost" adaboost.fit(X_train, rescaled_y_train) print "\tNaive Bayes" naive_bayes.fit(X_train, y_train)
def evaluate_cnn(image_shape=[32], channels=3, nkerns=[64, 128], filter_shapes=[5, 5], hidden_layer=[1024], outputs=10, pools=[2, 2], dropouts=[0.1, 0.25, 0.5], learning_rate=0.1, momentum=0.5, n_epochs=2000, minibatch_size=1024): rng = np.random.RandomState(12345) # calculate shapes at each CNN layer for i in range(len(filter_shapes)): if (image_shape[-1] - filter_shapes[i] + 1) % pools[i] != 0: return -1 image_shape = image_shape + [ (image_shape[-1] - filter_shapes[i] + 1) // pools[i] ] # specify shape of filters shapes = [(nkerns[0], channels, filter_shapes[0], filter_shapes[0]), (nkerns[1], nkerns[0], filter_shapes[1], filter_shapes[1]), (nkerns[1] * image_shape[-1]**2, hidden_layer[0]), (hidden_layer[0], outputs)] # load parameters paramDataManager = ParamDataManager(image_shape, channels, nkerns, filter_shapes, hidden_layer, outputs, pools, dropouts, momentum, learning_rate, n_epochs, minibatch_size) toLoadParameters = False # Not loading parameters now toSaveParameters = True paramData = [None] * 8 if toLoadParameters: paramData, shapeData = paramDataManager.loadData() shapeMatched = True for i in range(len(shapes)): if (shapes[-i - 1] != shapeData[2 * i]): paramData[2 * i] = None paramData[2 * i + 1] = None print(".. Shape problem for %d .." % (2 * i), shapes[-i], shapeData[2 * i]) shapeMatched = False else: print('... Data loaded for layer %d ...' % i) if (shapeMatched == False): print('... Shape did not match ...') ####################### # Variables for model # ####################### x = T.matrix('x') y = T.ivector('y') ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') layer0_input = x.reshape( (minibatch_size, channels, image_shape[0], image_shape[0])) ###################### # TRAIN AREA # ###################### # Construct the first convolutional pooling layer: layer0 = ConvPoolLayer(rng, input=layer0_input, image_shape=(minibatch_size, channels, image_shape[0], image_shape[0]), filter_shape=shapes[0], poolsize=(pools[0], pools[0]), activation=T.nnet.relu, dropout=dropouts[0], W=paramData[6], b=paramData[7]) # Construct the second convolutional pooling layer layer1 = ConvPoolLayer(rng, input=layer0.output, image_shape=(minibatch_size, nkerns[0], image_shape[1], image_shape[1]), filter_shape=shapes[1], poolsize=(pools[1], pools[1]), activation=T.nnet.relu, dropout=dropouts[1], W=paramData[4], b=paramData[5]) # the HiddenLayer being fully-connected, it operates on 2D matrices of layer2_input = layer1.output.flatten(2) # shape = (7*7*128 , 64) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=shapes[2][0], n_out=shapes[2][1], activation=T.nnet.relu, dropout=dropouts[2], W=paramData[2], b=paramData[3]) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=shapes[3][0], n_out=shapes[3][1], W=paramData[0], b=paramData[1]) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params velocity = [] for i in range(len(params)): velocity = velocity + [ theano.shared(T.zeros_like(params[i]).eval(), borrow=True) ] # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(velocity_i, momentum * velocity_i + learning_rate * grad_i) for velocity_i, grad_i in zip(velocity, grads)] updates = updates + [(param_i, param_i - velocity_i) for param_i, velocity_i in zip(params, velocity)] train_model = theano.function( [x, y], cost, updates=updates, ) ###################### # TEST AREA # ###################### # Test layer 0 layer0_test_input = x.reshape( (minibatch_size, channels, image_shape[0], image_shape[0])) # Test layer 0 layer0_test_output = convPoolLayerTest( input=layer0_test_input, image_shape=(minibatch_size, channels, image_shape[0], image_shape[0]), filter_shape=shapes[0], poolsize=(pools[0], pools[0]), activation=T.nnet.relu, W=layer0.params[0], b=layer0.params[1]) # Test layer 1 layer1_test_output = convPoolLayerTest( input=layer0_test_output, image_shape=(minibatch_size, nkerns[0], image_shape[1], image_shape[1]), filter_shape=shapes[1], poolsize=(pools[1], pools[1]), activation=T.nnet.relu, W=layer1.params[0], b=layer1.params[1]) # the test HiddenLayer layer2_test_input = layer1_test_output.flatten(2) # test fully-connected sigmoidal layer layer2_test_output = hiddenLayerTest(input=layer2_test_input, activation=T.nnet.relu, W=layer2.params[0], b=layer2.params[1]) # test the fully-connected sigmoidal layer y_pred = logisticRegressionTest(input=layer2_test_output, W=layer3.params[0], b=layer3.params[1]) # function to validation scores validate_model = theano.function([x, y], classificationErrors(y_pred, y)) # create a function to compute test scores test_model = theano.function([x, y], classificationErrors(y_pred, y)) ######################### # TRAIN CONFIGURATION # ######################### patience = 10000 patience_increase = 2 improvement_threshold = 0.995 momentum_limit = 0.9 # Initialize training variables epoch = 0 done_looping = False minibatch_iteration = 0 best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() # Initialize sample loader for loading train, val, test samples sampleLoader = SampleLoader() validation_frequency = min(sampleLoader.n_train_batches, patience // 2) #validation_frequency = 10 ############ # TRAINING # ############ print "Training ..." while (epoch < n_epochs) and (not done_looping): #sys.stdout.flush() epoch = epoch + 1 learning_rate = learning_rate * 0.99 momentum = momentum + (momentum_limit - momentum) / 32 print('Learning rate = %f, Momentum = %f' % (learning_rate, momentum)) train_batch_data = sampleLoader.loadNextTrainBatch() print train_batch_data[0].shape.eval() while train_batch_data is not None: train_x, train_y = train_batch_data train_x = train_x.get_value() train_y = train_y.eval() n_minibatches = train_x.shape[0] / minibatch_size print type(n_minibatches) for minibatch_index in range(n_minibatches): minibatch_iteration += 1 x = train_x[minibatch_index * minibatch_size:(minibatch_index + 1) * minibatch_size].reshape((-1, train_x.shape[-1])) y = train_y[minibatch_index * minibatch_size:(minibatch_index + 1) * minibatch_size] print "minibatch_iteration ", minibatch_iteration cost_minibatch = train_model(x, y) print cost_minibatch # Validate with a frequency of validation_frequency if minibatch_iteration % validation_frequency == 0: validation_loss = get_validation_loss( sampleLoader, validate_model, minibatch_size) # if we got the best validation score until now print "validation_loss: ", validation_loss, " validation_loss: ", best_validation_loss if validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if validation_loss < best_validation_loss * improvement_threshold: patience = max( patience, minibatch_iteration * patience_increase) # save best validation score and iteration number best_validation_loss = validation_loss best_iter = minibatch_iteration """ Check for overfitting logic here """ # compute test loss test_loss = get_test_loss(sampleLoader, test_model, minibatch_size) print print "validation loss improved!" print print "validation_loss: ", validation_loss, " test_loss: ", test_loss if toSaveParameters: paramDataManager.saveData(params) if patience <= minibatch_iteration: done_looping = True break train_batch_data = sampleLoader.loadNextTrainBatch() end_time = timeit.default_timer() print "Training complete." print "Best Validation Score: ", best_validation_loss, " obtained at ", best_iter, " With test score ", test_score print "Program ran for ", ((end_time - start_time) / 60), "m" return (best_validation_loss, test_score, paramDataManager.getParamDataAddress())
def run_models_with_cross_validation(num_classes=2, learning_rate=0.5): #GET DATA #- expect data_0 ... data_4 data_groups = list() data_type = 'int' data_groups.append(FileManager.get_csv_file_data_array( 'data_0', data_type)) data_groups.append(FileManager.get_csv_file_data_array( 'data_1', data_type)) data_groups.append(FileManager.get_csv_file_data_array( 'data_2', data_type)) data_groups.append(FileManager.get_csv_file_data_array( 'data_3', data_type)) data_groups.append(FileManager.get_csv_file_data_array( 'data_4', data_type)) NUM_GROUPS = len(data_groups) #For each data_group, train on all others and test on me model1_culminating_result = 0 model2_culminating_result = 0 model1_final_average_result = 0 model2_final_average_result = 0 for test_group_id in range(NUM_GROUPS): print() #Form training data as 4/5 data train_data = list() for train_group_id in range(len(data_groups)): if (train_group_id != test_group_id): #Initialize train_data if necessary if (len(train_data) == 0): train_data = data_groups[train_group_id] else: train_data = train_data + data_groups[train_group_id] print('train_data group', str(test_group_id), 'length: ', len(train_data)) #print(train_data) test_data = data_groups[test_group_id] model1_result = 0 model2_result = 0 model1 = NaiveBayes(num_classes) model2 = LogisticRegression(pd.DataFrame(train_data)) model1.train(train_data) model2.train(pd.DataFrame(train_data), learning_rate) print_classifications = False if (test_group_id == 0 ): #Required to print classifications for one fold print_classifications = True model1_result = model1.test( test_data, print_classifications) # returns (attempts, fails, success) #print('result:', result) model1_accuracy = (model1_result[2] / model1_result[0]) * 100 print('Naive Bayes Accuracy (%):', model1_accuracy) model2_result = model2.test( pd.DataFrame(test_data), print_classifications) # returns (% accuracy) print('Logistic Regression Accuracy (%):', model2_result) model1_culminating_result = model1_culminating_result + model1_accuracy model2_culminating_result = model2_culminating_result + model2_result model1_final_average_result = model1_culminating_result / NUM_GROUPS model2_final_average_result = model2_culminating_result / NUM_GROUPS #print() #print('final average result:') #print(final_average_result) #print() return (model1_final_average_result, model2_final_average_result)
def test_weight_dimension(): from logistic_regression import LogisticRegression model = LogisticRegression(input_dimensions=2) assert model.weights.ndim == 2 and model.weights.shape[ 0] == 3 and model.weights.shape[1] == 1
from logistic_regression import LogisticRegression import numpy as np from sklearn import svm, datasets # import some data to play with iris = datasets.load_iris() # Take the first two features. We could avoid this by using a two-dim dataset X = iris.data[:, :2] y = iris.target lr = LogisticRegression(method='OneVsAll') lr.fit(X, y) H = lr.predict(X) print("Training Accuracy : " + str(float(np.sum(H == y)) / y.shape[0]))
neg_label = 'Not Admitted' xlabel = 'Exam 1 Score' ylabel = 'Exam 2 Score' title = 'Admission Based on Exam Scores' data = load_data('ex2data1.txt') X = data.iloc[:, :-1] y = data.iloc[:, -1] plot_data(data, xlabel, ylabel, title, pos_label, neg_label) X.insert(0, 'ones', 1) X = X.to_numpy() y = y.to_numpy().reshape((100, 1)) theta = np.zeros((X.shape[1], 1)) iterations = 2000 alpha = 0.00001 classifier = LogisticRegression() gradient, cost_history = classifier.gradient_descent( X, y, theta, iterations, alpha) plot_computeCost(cost_history, iterations) predictions = classifier.predict(X, gradient) correct = [ 1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y) ] accuracy = sum(correct) % len(correct) print('Accuracy: {0}%'.format(accuracy))
class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, hidden_layers_sizes=[1024, 1024], n_phn=62 * 3, n_spkr=1, rho=0.90, eps=1.E-6): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) #self._rho = shared(numpy.cast['float32'](rho), name='rho') # for adadelta #self._eps = shared(numpy.cast['float32'](eps), name='eps') # for adadelta self._rho = rho self._eps = eps self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.fmatrix('x') # the data is presented as rasterized images self.y_phn = T.ivector('y_phn') # the labels are presented as 1D vector # of [int] labels self.y_spkr = T.ivector('y_spkr') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO # Construct an RBM that shared weights with this layer if i == 0: rbm_layer = GRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayerPhn = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_phn) self.params.extend(self.logLayerPhn.params) self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO self.logLayerSpkr = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_spkr) self.params.extend(self.logLayerSpkr.params) self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO self.finetune_cost_sum_phn = self.logLayerPhn.negative_log_likelihood_sum(self.y_phn) self.finetune_cost_sum_spkr = self.logLayerSpkr.negative_log_likelihood_sum(self.y_spkr) self.finetune_cost_phn = self.logLayerPhn.negative_log_likelihood(self.y_phn) self.finetune_cost_spkr = self.logLayerSpkr.negative_log_likelihood(self.y_spkr) self.errors_phn = self.logLayerPhn.errors(self.y_phn) self.errors_spkr = self.logLayerSpkr.errors(self.y_spkr) def get_SGD_trainer(self): """ Returns a plain SGD minibatch trainer with learning rate as param. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for param, gparam in zip(self.params, gparams): updates[param] = param - gparam * learning_rate train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_adadelta_trainer(self): """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params. """ batch_x = T.fmatrix('batch_x') batch_y_phn = T.ivector('batch_y_phn') batch_y_spkr = T.ivector('batch_y_spkr') cost_phn = self.finetune_cost_sum_phn cost_spkr = self.finetune_cost_sum_spkr # compute the gradients with respect to the model parameters gparams_phn = T.grad(cost_phn, self.params[:-2]) gparams_spkr = T.grad(cost_spkr, self.params[:-4] + self.params[-2:]) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads[:-2], self._accudeltas[:-2], self.params[:-2], gparams_phn): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad for accugrad, accudelta, param, gparam in zip(self._accugrads[:-4] + self._accugrads[-2:], self._accudeltas[:-4] + self._accudeltas[-2:], self.params[:-4] + self.params[-2:], gparams_spkr): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_phn), theano.Param(batch_y_spkr)], outputs=(cost_phn, cost_spkr), updates=updates, givens={self.x: batch_x, self.y_phn: batch_y_phn, self.y_spkr: batch_y_spkr}) return train_fn def get_adadelta_trainers(self): """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params. """ batch_x = T.fmatrix('batch_x') batch_y_phn = T.ivector('batch_y_phn') batch_y_spkr = T.ivector('batch_y_spkr') #cost_phn = self.finetune_cost_sum_phn cost_phn = self.finetune_cost_phn #cost_spkr = self.finetune_cost_sum_spkr cost_spkr = self.finetune_cost_spkr # compute the gradients with respect to the model parameters gparams_phn = T.grad(cost_phn, self.params[:-2]) gparams_spkr = T.grad(cost_spkr, self.params[:-4] + self.params[-2:]) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads[:-2], self._accudeltas[:-2], self.params[:-2], gparams_phn): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn_phn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_phn)], outputs=cost_phn, updates=updates, givens={self.x: batch_x, self.y_phn: batch_y_phn}) updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads[:-4] + self._accugrads[-2:], self._accudeltas[:-4] + self._accudeltas[-2:], self.params[:-4] + self.params[-2:], gparams_spkr): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn_spkr = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_spkr)], outputs=cost_spkr, updates=updates, #givens={self.x: batch_x[20:24,:], self.y_spkr: batch_y_spkr[20:24]}) givens={self.x: batch_x, self.y_spkr: batch_y_spkr}) return train_fn_phn, train_fn_spkr def train_only_classif(self): batch_x = T.fmatrix('batch_x') batch_y_phn = T.ivector('batch_y_phn') batch_y_spkr = T.ivector('batch_y_spkr') #cost_phn = self.finetune_cost_sum_phn cost_phn = self.finetune_cost_phn #cost_spkr = self.finetune_cost_sum_spkr cost_spkr = self.finetune_cost_spkr # compute the gradients with respect to the model parameters gparams_phn = T.grad(cost_phn, self.params[-4:-2]) gparams_spkr = T.grad(cost_spkr, self.params[-2:]) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads[-4:-2], self._accudeltas[-4:-2], self.params[-4:-2], gparams_phn): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn_phn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_phn)], outputs=cost_phn, updates=updates, givens={self.x: batch_x, self.y_phn: batch_y_phn}) updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads[-2:], self._accudeltas[-2:], self.params[-2:], gparams_spkr): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn_spkr = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_spkr)], outputs=cost_spkr, updates=updates, #givens={self.x: batch_x[20:24,:], self.y_spkr: batch_y_spkr[20:24]}) givens={self.x: batch_x, self.y_spkr: batch_y_spkr}) return train_fn_phn, train_fn_spkr def score_classif(self, given_set): """ Returns functions to get current classification scores. """ batch_x = T.fmatrix('batch_x') batch_y_phn = T.ivector('batch_y_phn') batch_y_spkr = T.ivector('batch_y_spkr') score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_phn), theano.Param(batch_y_spkr)], outputs=(self.errors_phn, self.errors_spkr), givens={self.x: batch_x, self.y_phn: batch_y_phn, self.y_spkr: batch_y_spkr}) # Create a function that scans the entire set given as input def scoref(): return [score(batch_x, batch_y_phn, batch_y_spkr) for batch_x, batch_y_phn, batch_y_spkr in given_set] return scoref
batch_size=batch_size, shuffle=False) # * Hiển thị dữ liệu từ các pixel # plt.imshow(features[100].reshape(28,28)) # plt.axis("off") # plt.savefig('graph.png') # plt.show() # ! Xây dựng neuron network # * Bộ dữ liệu vào gồm 28 * 28 pixel là thuộct tính input_dim = 28 * 28 # * Đầu ra là tỉ lệ của mỗi lớp (10 lớp) output_dim = 10 # * Khởi tạo model model = LogisticRegression(input_dim, output_dim) # * Cross entropy loss function error = nn.CrossEntropyLoss() # * SGD learning_rate = 0.001 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # * Chạy thuật toán với training set để tối ưu model i = 0 loss_list = [] for epoch in range(epochs): for it, (feature, label) in enumerate(train_loader): train = Variable(feature.view(-1, 28 * 28)) label = Variable(label) optimizer.zero_grad() predict = model(train)
@author: sandicalhoun """ import numpy as np from util import read_file import ffs import tags from logistic_regression import LogisticRegression """Import a small sample dataset and run calcgis. Export the output to a csv.""" data_sample, labels_sample = read_file('sample') lr = LogisticRegression(method="collins", max_iters=1) labels_proc = lr.preproclabels(labels_sample) i = int(np.random.rand() * len(data_sample)) n = len(data_sample[i]) ws = np.random.rand(ffs.numJ) x = data_sample[i] y = labels_proc[i] #lr.calcgis(ws, x, n) print data_sample[i] print labels_sample[i],y print ws lr.calcAs(x, n)
class RRNN(object): """Recurrent ReLU Neural Network """ def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, relu_layers_sizes=[1024, 1024, 1024], recurrent_connections=[2], # layer(s), can only be i^t -> i^{t+1} n_outs=62 * 3, rho=0.9, eps=1.E-6): """ TODO """ self.relu_layers = [] self.params = [] self.n_layers = len(relu_layers_sizes) self._rho = rho # ``momentum'' for adadelta self._eps = eps # epsilon for adadelta self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta self.n_outs = n_outs assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.fmatrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = relu_layers_sizes[i-1] if i == 0: layer_input = self.x else: layer_input = self.relu_layers[-1].output if i in recurrent_connections: inputr_size = relu_layers_sizes[i] previous_output = T.fmatrix('previous_output') relu_layer = RecurrentReLU(rng=numpy_rng, input=layer_input, in_stack=previous_output, n_in=input_size, n_in_stack=inputr_size, n_out=inputr_size) #relu_layer.in_stack = relu_layer.output # TODO TODO TODO self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)]) else: relu_layer = ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=relu_layers_sizes[i]) self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) self.relu_layers.append(relu_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.relu_layers[-1].output, n_in=relu_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)]) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def get_SGD_trainer(self): """ Returns a plain SGD minibatch trainer with learning rate as param. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for param, gparam in zip(self.params, gparams): updates[param] = param - gparam * learning_rate train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_adadelta_trainer(self): """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, accudelta, param, gparam in zip(self._accugrads, self._accudeltas, self.params, gparams): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx updates[param] = param + dx updates[accugrad] = agrad train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def get_adagrad_trainer(self): """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') learning_rate = T.fscalar('lr') # learning rate to use cost = self.finetune_cost_sum # compute the gradients with respect to the model parameters gparams = T.grad(cost, self.params) # compute list of fine-tuning updates updates = OrderedDict() for accugrad, param, gparam in zip(self._accugrads, self.params, gparams): # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) agrad = accugrad + gparam * gparam dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam updates[param] = param + dx updates[accugrad] = agrad train_fn = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y), theano.Param(learning_rate)], outputs=cost, updates=updates, givens={self.x: batch_x, self.y: batch_y}) return train_fn def score_classif(self, given_set): """ Returns functions to get current classification scores. """ batch_x = T.fmatrix('batch_x') batch_y = T.ivector('batch_y') score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)], outputs=self.errors, givens={self.x: batch_x, self.y: batch_y}) # Create a function that scans the entire set given as input def scoref(): return [score(batch_x, batch_y) for batch_x, batch_y in given_set] return scoref
from matplotlib import pyplot as pp from util import read_file from logistic_regression import LogisticRegression data, labels = read_file('../1571/train.txt') data_train, data_valid, labels_train, labels_valid = \ train_test_split(data, labels, test_size=0.3, random_state=0) mus = list(10 ** x for x in range(-8, 2)) sgd_scores = [] for mu in mus: sgd_model = LogisticRegression(method="sgd", mu=mu, rate=0.1, decay=0.6, random_state=0) sgd_model.fit(data_train, labels_train) predicted = sgd_model.predict(data_valid) sgd_scores.append(accuracy_score(labels_valid, predicted)) pp.figure() pp.xscale('log') pp.scatter(mus, sgd_scores) pp.xlabel('regularization strength') pp.ylabel('accuracy') pp.savefig('./sgd_regularization.png') lbfgs_scores = [] for mu in mus: sgd_model = LogisticRegression(method="lbfgs", mu=mu, rate=0.1,
def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, relu_layers_sizes=[1024, 1024, 1024], recurrent_connections=[2], # layer(s), can only be i^t -> i^{t+1} n_outs=62 * 3, rho=0.9, eps=1.E-6): """ TODO """ self.relu_layers = [] self.params = [] self.n_layers = len(relu_layers_sizes) self._rho = rho # ``momentum'' for adadelta self._eps = eps # epsilon for adadelta self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta self.n_outs = n_outs assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.fmatrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = relu_layers_sizes[i-1] if i == 0: layer_input = self.x else: layer_input = self.relu_layers[-1].output if i in recurrent_connections: inputr_size = relu_layers_sizes[i] previous_output = T.fmatrix('previous_output') relu_layer = RecurrentReLU(rng=numpy_rng, input=layer_input, in_stack=previous_output, n_in=input_size, n_in_stack=inputr_size, n_out=inputr_size) #relu_layer.in_stack = relu_layer.output # TODO TODO TODO self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)]) else: relu_layer = ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=relu_layers_sizes[i]) self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) self.relu_layers.append(relu_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.relu_layers[-1].output, n_in=relu_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)]) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
num = int(x.shape[0] * .7) x_cv = x[num : :, :] y_cv = y[num : :] x = x[0 : num, :] y = y[0 : num] # Feature scaling. x, mu, sigma = scale_data(x) x_cv = (x_cv - mu) / sigma # Use cross validation set to find the best lambda for regularization. C_candidates = [0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] lambda_ = 0 best_accuracy = 0 for C in C_candidates: clf = LogisticRegression(x, y, C) clf.learn() p_cv = clf.predict(x_cv) accuracy = (p_cv == y_cv).mean() if accuracy > best_accuracy: best_accuracy = accuracy lambda_ = C print 'Best regularization parameter lambda: %f' % lambda_ clf = LogisticRegression(x, y, lambda_) clf.learn() p = clf.predict(x) p_cv = clf.predict(x_cv) print 'Accuracy in training set: %f'% (p == y).mean() print 'Accuracy in cv: %f' % (p_cv == y_cv).mean()