Beispiel #1
0
def test():
    train = read_data('./data/data2.csv')
    test_x = read_data('./data/test2.csv')

    # Generates training set and test set.
    train_x = train[:, 1 : :]
    train_y = train[:, 0]

    train_x = map_feature(train_x)
    test_x = map_feature(test_x)

    # Feature scaling.
    train_x, mu, sigma = scale_data(train_x)
    test_x = (test_x - mu) / sigma

    clf = LogisticRegression(train_x, train_y, 0.1)
    clf.learn()
    output = clf.predict(test_x)

    # Write Results to fie
    seedling=open("./data/logistic_regression.csv", "wb")
    lr_csv = csv.writer(seedling)
    lr_csv.writerow(['PassengerId','Survived'])
    for i in range(len(output)):
        row = [str(i+892), output[i].astype(uint8)]
        lr_csv.writerow(row)
    seedling.close()
Beispiel #2
0
def runML(meth, itrs, data_train, data_test, labels_train, labels_test):
    print meth,datetime.now().time()
    model = LogisticRegression(method=meth,max_iters=itrs)
    model.fit(data_train, labels_train)
    print datetime.now().time()
    prediction = model.predict(data_test)
    tagscores = LogisticRegression.tagAccuracy(labels_test, prediction)
    score = np.mean(tagscores)
    print "  score tags: mean: {}, max: {}, min: {}".format(score,max(tagscores),min(tagscores))
    print "  error rate: {}".format(1 - score)
    print datetime.now().time()
Beispiel #3
0
def standard_lr(x_train, y_train, x_valid, y_valid):
    from sklearn.linear_model import LogisticRegression
    lr = LogisticRegression(penalty='l2', max_iter=500, solver='sag', multi_class='ovr')
    lr.fit(x_train, y_train)
    pre = lr.predict(x_valid)

    correct = 0
    for i in range(len(y_valid)):
        if pre[i] == y_valid[i]:
            correct += 1
    print correct*1.0/len(y_valid)
 def __init__(self):
     LogisticRegression.__init__(self)
     # 各特徴が数値データなら0,カテゴリカルデータなら1を要素に持つ配列
     self.x_types = None
     # 数値データ,カテゴリカルデータそれぞれの中での番号を要素に持つ配列
     self.x_types_index = None
     self.band_width_vector = None
     self.numerical_index = None
     self.categorical_index = None
     self.feature_vectors_for_numeric = None
     self.feature_vectors_for_category = None
     self.max_values = None
     self.min_values = None
     self.num_of_bins = 0
     self.bin_length = None
Beispiel #5
0
def test_lr(x_train, y_train, x_valid, y_valid):
    a = np.array([1.0 for i in range(len(x_train))])
    x_train = np.column_stack((x_train, a))

    lr = LogisticRegression(alpha=0.01, regularization='', num_iters=3000)
    theta, cost = lr.train(x_train, y_train, verbose=True, optimizer="sgd")

    a = np.array([1.0 for i in range(len(x_valid))])
    x_valid = np.column_stack((x_valid, a))

    correct = 0
    for i in range(len(x_valid)):
        label = lr.classify(x_valid[i], theta)
        if label == y_valid[i]:
            correct += 1
    print "accuracy:", correct*1.0/len(x_valid)
Beispiel #6
0
    def __init__(self):
        import theano
        import util
        from theano import tensor as T
        from logistic_regression import LogisticRegression
        self.index = T.iscalar('index')
        self.BATCH_SIZE = 100
        self.LEARNING_RATE = 0.12
        self.dataSets = util.loadMnistData("mnist.pkl.gz")
        self.x = T.dmatrix('x')
        self.y = T.ivector('y')
        self.index = T.iscalar('index')
        self.classifier = LogisticRegression(input=self.x, nIn=28 * 28, nOut=10)
        self.cost = self.classifier.negativeLogLikelihood(self.y)
        self.gW = T.grad(cost=self.cost, wrt=self.classifier.W)
        self.gB = T.grad(cost=self.cost, wrt=self.classifier.b)
        self.trainSet, self.validSet, self.testSet = self.dataSets
        self.nTrainSet, self.nValidSet, self.nTestSet = map(self.numBatches, self.dataSets)
        updates = [
            (self.classifier.W, self.classifier.W - self.LEARNING_RATE * self.gW),
            (self.classifier.b, self.classifier.b - self.LEARNING_RATE * self.gB)
        ]

        def makeGivens(data):
            return {
                self.x: data[0][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE],
                self.y: data[1][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE]
            }

        self.testModel = theano.function(
            inputs=[self.index],
            outputs=self.classifier.errors(self.y),
            givens=makeGivens(self.dataSets[2])
        )
        self.validationModel = theano.function(
            inputs=[self.index],
            outputs=self.classifier.errors(self.y),
            givens=makeGivens(self.dataSets[1])
        )
        self.trainModel = theano.function(
            inputs=[self.index],
            outputs=self.cost,
            updates=updates,
            givens=makeGivens(self.dataSets[0])
        )
Beispiel #7
0
def sgd(mus, rates, decays, data, labels, data_train, labels_train,
        data_valid, labels_valid, data_test, labels_test):
    print "starting grid search for SGD"
    validation_results = {}
    dicts = []
    for mu in mus:
        for rate in rates:
            for decay in decays:
                print "trying mu={} rate={} decay={}".format(mu, rate, decay)
                model = LogisticRegression(method="sgd", mu=mu,
                                           rate=rate, decay=decay,
                                           random_state=0)
                model.fit(data_train, labels_train)
                prediction = model.predict(data_valid)
                score = accuracy_score(labels_valid, prediction)
                validation_results[(mu, rate, decay)] = score
                print "  score: {}".format(score)
                print "  error rate: {}".format(1 - score)

                d = dict(method="sgd", mu=mu, rate=rate, decay=decay,
                         score=score, lcl=model.lcl_,
                         rlcl=model.rlcl_, test=False)
                dicts.append(d)

    print "evaluating on test set"
    # get hyperparameters for highest accuracy on validation set
    mu, rate, decay = max(validation_results, key=validation_results.get)
    print "Using mu={} rate={} decay={}".format(mu, rate, decay)

    # train on entire train set and predict on test set
    model = LogisticRegression(method="sgd", mu=mu, rate=rate,
                               decay=decay, random_state=0)
    model.fit(data, labels)
    prediction = model.predict(data_test)
    score = accuracy_score(labels_test, prediction)

    print "SGD test score: {}, error rate: {}".format(score, 1 - score)

    d = dict(method="sgd", mu=mu, rate=rate, decay=decay, score=score,
             lcl=model.lcl_, rlcl=model.rlcl_, test=True)
    dicts.append(d)
    return pd.DataFrame(dicts)
Beispiel #8
0
def cross_valid():
    x = read_data()

    # Generates training set and cross validation set.
    y = x[:, 0]
    x = x[:, 1 : :]
    x = map_feature(x)
    num = int(x.shape[0] * .7)
    x_cv = x[num : :, :]
    y_cv = y[num : :]
    x = x[0 : num, :]
    y = y[0 : num]

    # Feature scaling.
    x, mu, sigma = scale_data(x)
    x_cv = (x_cv - mu) / sigma

    # Use cross validation set to find the best lambda for regularization.
    C_candidates = [0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    lambda_ = 0
    best_accuracy = 0
    for C in C_candidates:
        clf = LogisticRegression(x, y, C)
        clf.learn()
        p_cv = clf.predict(x_cv)
        accuracy = (p_cv == y_cv).mean()
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            lambda_ = C
    print 'Best regularization parameter lambda: %f' % lambda_

    clf = LogisticRegression(x, y, lambda_)
    clf.learn()
    p = clf.predict(x)
    p_cv = clf.predict(x_cv)
    print 'Accuracy in training set: %f'% (p == y).mean()
    print 'Accuracy in cv: %f' %  (p_cv == y_cv).mean()
Beispiel #9
0
def lbfgs(mus, data, labels, data_train, labels_train,
          data_valid, labels_valid, data_test, labels_test):
    print "starting grid search for L-BFGS"
    validation_results = {}
    dicts = []
    for mu in mus:
        print "trying mu={}".format(mu)
        model = LogisticRegression(method="lbfgs", mu=mu)
        model.fit(data_train, labels_train)
        prediction = model.predict(data_valid)
        score = accuracy_score(labels_valid, prediction)
        validation_results[mu] = score
        print "  score: {}".format(score)
        print "  error rate: {}".format(1 - score)

        d = dict(method="lbfgs", mu=mu, rate=-1, decay=-1,
                 score=score, lcl=model.lcl_, rlcl=model.rlcl_,
                 test=False)
        dicts.append(d)

    print "evaluating on test set"

    # get hyperparameters for highest accuracy on validation set
    mu = max(validation_results, key=validation_results.get)

    print "Using mu of {}".format(mu)

    # train on entire train set and predict on test set
    model = LogisticRegression(method="lbfgs", mu=mu)
    model.fit(data, labels)
    prediction = model.predict(data_test)
    score = accuracy_score(labels_test, prediction)

    print "L-BFGS test score: {}, error rate: {}".format(score, 1 - score)

    d = dict(method="lbfgs", mu=mu, rate=-1, decay=-1,
             score=score, lcl=model.lcl_, rlcl=model.rlcl_, test=True)
    dicts.append(d)
    return pd.DataFrame(dicts)
Beispiel #10
0
 def __init__(self,
              numpyRng,
              theanoRng=None,
              nIn=28*28,
              hiddenLayerSizes=[500,500],
              nOut=10):
     self.nLayers = len(hiddenLayerSizes)
     if not theanoRng:
         theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30))
     self.x = T.matrix('x')
     self.y = T.ivector('y')
     def makeSigmoidLayer(lastLayer,lastLayerSize,size):
         return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid)
     def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer):
         return DenoisingAutoEncoder(
             numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer,
             nVisible=lastLayerSize,
             nHidden=size,
             W=sigmoidLayer.W,
             bHidden=sigmoidLayer.b)
     def makeLayers(lastLayer,lastInputSize,nextLayerSizes):
         if nextLayerSizes:
             newList = list(nextLayerSizes)
             size = newList.pop()
             sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size)
             daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer)
             yield (sigmoidLayer,daLayer)
             for layer in makeLayers(sigmoidLayer.output,size,newList):
                 yield layer
     self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes)))
     print "created sda with layer shapes below."
     for da in self.dALayers:
         
         print "layersize:", da.W.get_value().shape
     self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut)
     self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)]
     self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y)
     self.errors = self.logLayer.errors(self.y)
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out
        )

        # Enforce L1 norm to be small
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )

        # Enforce square of L2 norm to be small
        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()
            + (self.logRegressionLayer.W ** 2).sum()
        )

        # negative log likelihood of MLP is negative log likelihood of model
        # which is NLL of LR layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood
        )

        self.errors = self.logRegressionLayer.errors

        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.input = input
X, y = bc.data, bc.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

print(X_train.shape)
print(X_train[0])

print(y_train.shape)
print(y_train[0])

from logistic_regression import LogisticRegression

regressor = LogisticRegression(lr=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predicted = regressor.predict(X_test)


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


print("LR classification accuracy:", accuracy(y_test, predicted))

cmap = ListedColormap(["#FF0000", "#00FF00"])
fig = plt.figure(figsize=(8, 6))

plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, edgecolors="k", s=20)
Beispiel #13
0
import numpy as np
import sys
sys.path.append('../../../')
from logistic_regression import LogisticRegression
from model import Data
from utilities import *

train_data = np.load('data/train_data.npy')
test_data = np.load('data/test_data.npy')

train_inputs, train_target = Data.normalize(
    train_data[:, :-1]), train_data[:, -1:].astype(int).flatten()
test_inputs, test_target = Data.normalize(
    test_data[:, :-1]), test_data[:, -1:].astype(int).flatten()

model = LogisticRegression(input_dim=7,
                           num_classes=3,
                           batch_size=8,
                           epochs=50,
                           learning_rate=1e-3)
model.train(train_inputs, train_target)

print('After training the model accuracy is about ',
      accuracy(model.predict(test_inputs), test_target))
confusion_plot(model,
               test_inputs,
               test_target,
               outfile='plots/confusion_matrix')
def evaluate_convnet(learning_rate=0.1, n_epochs=1,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = ConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4)
    layer1 = ConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost, # This is the negative-log-likelihood of the Logisitc Regression layer
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)
            
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Beispiel #15
0
audio_features = ['danceability', 'energy', 'speechiness', 'acousticness','instrumentalness', 'valence']
all_playlists = data_trans[audio_features].describe().T

rap = data_trans.loc[data_trans['playlist_id'] == 0][audio_features].describe().T
rap.rename(columns={'mean':'mean_rap'}, inplace=True)

jazz = data_trans.loc[data_trans['playlist_id'] == 1][audio_features].describe().T
jazz.rename(columns={'mean':'mean_jazz'}, inplace=True)

df1 = rap['mean_rap']
df2 = jazz['mean_jazz']
df3 = all_playlists['mean']

r = pd.concat([df1, df2, df3], axis=1)
r.plot(kind='bar', figsize=(8,5), title='Audio feature average value per playlist', colormap='viridis', rot=20);

features = data_trans.loc[:, 'danceability':'valence'].values
targets = data_trans.loc[:, 'playlist_id'].values
x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.30, random_state=100)

lr = LogisticRegression(iterations=15000, learning_rate=0.10)
pred_y = lr.fit(x_train, y_train).predict(x_test)
accuracy_score(pred_y, y_test)

confusion_matrix(y_test, pred_y)

gnb = GaussianNaiveBayes()
pred_y = gnb.fit(x_train, y_train).predict(x_test)
accuracy_score(y_test, pred_y)

confusion_matrix(y_test, pred_y)
Beispiel #16
0
def optimize_cnn_lenet(learning_rate=0.01, n_epochs=200, dataset='data/mnist.pkl.gz', batch_size=500, n_hidden=500, nkerns=[20, 50], rng=np.random.RandomState(23455)):
    print '... load training set'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindex
    index = T.lscalar()

    # dataシンボル
    x = T.matrix('x')
    # labelシンボル
    y = T.ivector('y')

    print '... building the model'
    # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする
    # 追加した1はチャンネル数
    # ここではグレイスケール画像なのでチャンネル数は1
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # filterのnkerns[0]は20
    layer0 = ConvLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5))
    
    layer1 = PoolLayer(layer0.output, poolsize=(2, 2))

    # filterのnkerns[1]は50
    layer2 = ConvLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5))

    layer3 = PoolLayer(layer2.output, poolsize=(2, 2))

    # layer2_input
    # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない
    # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする
    layer4_input = layer3.output.flatten(2)

    # 500ユニットの隠れレイヤー
    # layer2_inputで作成した入力ベクトルのサイズ=n_in
    layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[1]*4*4, n_out=n_hidden, activation=T.tanh)

    # 出力は500ユニット
    layer5 = LogisticRegression(input=layer4.output, n_in=n_hidden, n_out=10)
    
    # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる)
    cost = layer5.negative_log_likelihood(y)

    # testモデル
    # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する
    test_model = theano.function([index], layer5.errors(y), givens={x:test_set_x[index*batch_size : (index + 1)*batch_size], y: test_set_y[index*batch_size : (index + 1)*batch_size]})
    
    # validationモデル
    validate_model = theano.function([index], layer5.errors(y), givens={x:valid_set_x[index*batch_size : (index + 1)*batch_size], y: valid_set_y[index*batch_size : (index + 1)*batch_size]})

    # 微分用のパラメータ(pooling層にはパラメータがない)
    params = layer5.params + layer4.params + layer2.params + layer0.params

    # コスト関数パラメータについてのの微分
    grads = T.grad(cost, params)

    # パラメータの更新
    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]

    # trainモデル
    train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index*batch_size : (index + 1)*batch_size], y:train_set_y[index*batch_size : (index+1)*batch_size]})

    # optimize
    print "train model ..."
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience/2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    fp1 = open('log/lenet_validation_error.txt', 'w')
    fp2 = open('log/lenet_test_error.txt', 'w')

    while(epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.))
                fp1.write("%d\t%f\n" % (epoch, this_validation_loss*100))         

                if this_validation_loss < best_validation_loss:
                    if(this_validation_loss < best_validation_loss * improvement_threshold):
                        patience = max(patience, iter*patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    
                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ## 
                    print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.))
                    fp2.write("%d\t%f\n" % (epoch, test_score*100))
            if patience <= iter:
                done_looping = True
                break
    fp1.close()
    fp2.close()        
    end_time = timeit.default_timer()
    print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.))

    import cPickle
    cPickle.dump(layer0, open("model/cnn_layer0.pkl", "wb"))
    cPickle.dump(layer2, open("model/cnn_layer2.pkl", "wb"))
    cPickle.dump(layer4, open("model/cnn_layer4.pkl", "wb"))
    cPickle.dump(layer5, open("model/cnn_layer5.pkl", "wb"))
def test_regression_model_mnist(dataset_name='mnist.pkl.gz',
                     learning_rate=0.13,
                     n_epochs=1000,
                     batch_size=600):
    # Set up the dataset
    dataset = load_data(dataset_name)
    # Split the data into a training, validation and test set
    train_data, train_labels = dataset[0]
    test_data, test_labels = dataset[1]
    validation_data, validation_labels = dataset[2]
    # Compute number of minibatches for each set
    n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size
    data_dim = (28, 28) # The dimension of each image in the dataset
    data_classes = 10 # The number of classes within the data
    
    # Build the model
    # ---------------

    # Allocate symbolic variables for data
    index = T.lscalar() # This is the index to a minibatch
    x = T.matrix('x') # Data (rasterized images)
    y = T.ivector('y') # Labels (1d vector of ints)

    # Construct logistic regression class
    classifier = LogisticRegression(input=x, n_in=data_dim[0]*data_dim[1], n_out=data_classes)

    # Cost to minimize during training
    cost = classifier.negative_log_likelihood(y)

    # Compile a Theano function that computes mistakes made by the model on a minibatch
    test_model = th.function(inputs=[index], # This function is for the test data   
                             outputs=classifier.errors(y),
                             givens={x: test_data[index * batch_size: (index + 1) * batch_size],
                                     y: test_labels[index * batch_size: (index + 1) * batch_size]})
    validate_model = th.function(inputs=[index], # This function is for the validation data    
                                 outputs=classifier.errors(y),
                                 givens={x: validation_data[index * batch_size: (index + 1) * batch_size],
                                         y: validation_labels[index * batch_size: (index + 1) * batch_size]})
    # Compute the gradient of cost with respect to theta = (W,b)
    grad_W = T.grad(cost=cost, wrt=classifier.W)
    grad_b = T.grad(cost=cost, wrt=classifier.b)

    # Specify how to update model parameters as a list of (variable, update expression) pairs
    updates = [(classifier.W, classifier.W - learning_rate * grad_W),
               (classifier.b, classifier.b - learning_rate * grad_b)]

    # Compile Theano function that returns the cost and updates parameters of model based on update rules
    train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y   
                             outputs=cost, # Cost/loss associated with x,y
                             updates=updates,
                             givens={x: train_data[index * batch_size: (index + 1) * batch_size],
                                     y: train_labels[index * batch_size: (index + 1) * batch_size]})

    # Train the model
    # ---------------

    # Setup the early-stopping parameters
    patience = 5000 # Minimum number of examples to examine
    patience_increase = 2 # How much longer to wait once a new best is found
    improvement_threshold = 0.995 # Value of a significant relative improvement
    validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating
    best_validation_loss = np.inf
    test_score = 0
    start_time = time.clock()

    # Setup the training loop
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # Set the iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # Compute the zero-one loss on the validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch,
                                                                             minibatch_index + 1,
                                                                             n_train_batches,
                                                                             this_validation_loss * 100.))
                # Check if current validation score is the best
                if this_validation_loss < best_validation_loss:
                    # Improve the patience is loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    # Test on test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch,
                                                                                         minibatch_index + 1,
                                                                                         n_train_batches,
                                                                                         test_score * 100.))
            # Stop the loop if we have exhausted our patience
            if patience <= iter:
                done_looping = True
                break;
    # The loop has ended so record the time it took
    end_time = time.clock()
    # Print out results and timing information
    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100.,
                                                                                                      test_score * 100.)) 
    print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
Beispiel #18
0
# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescale label for Adaboost to {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = DecisionTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel)

# ........
#  TRAIN
# ........
print "Training:"
print "\tAdaboost"
adaboost.fit(X_train, rescaled_y_train)
print "\tNaive Bayes"
naive_bayes.fit(X_train, y_train)
print "\tLogistic Regression"
    def __init__(self, datasets, batch_size=500, nkerns=[20, 50], img_size=(28, 28), learning_rate=0.1):
        
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        
        self.batch_size = batch_size
        # compute number of minibatches for training, validation and testing
        self.n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        self.n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches /= batch_size
        self.n_valid_batches /= batch_size
        self.n_test_batches /= batch_size

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        rng = np.random.RandomState(23455)
        
        layer0_input = self.x.reshape((batch_size, 1, img_size[0], img_size[1]))
        
        # Create the two convolutional layers that also perform downsampling using maxpooling
        self.layer0 = ConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(batch_size, 1, img_size[0], img_size[1]),
                                    filter_shape=(nkerns[0], 1, 5, 5), 
                                    poolsize=(2,2))

        self.layer1 = ConvPoolLayer(rng,
                                    input=self.layer0.output,
                                    image_shape=(batch_size, nkerns[0], 12, 12),
                                    filter_shape=(nkerns[1], nkerns[0], 5, 5), 
                                    poolsize=(2,2))

        layer2_input = self.layer1.output.flatten(2)
       
        # Create the hidden layer of the MLP
        self.layer2 = HiddenLayer(rng,
                                  input=layer2_input,
                                  n_in=nkerns[1] * 4 * 4,
                                  n_out=500,
                                  activation=T.tanh)

        # Create the logistic regression layer for classifiying the results
        self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=10)

        self.cost = self.layer3.negative_log_likelihood(self.y)

        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params

        self.grads = T.grad(self.cost, self.params)

        # Update list for the paramters to be used when training the model
        updates = [(param_i, param_i - learning_rate * grad_i)
                   for param_i, grad_i in zip(self.params, self.grads)]

        # This function updates the model parameters using Stochastic Gradient Descent
        self.train_model = th.function([self.index],
                                       self.cost, # This is the negative-log-likelihood of the Logistic Regression layer
                                       updates=updates,
                                       givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                               self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]})
                                     
        # These are Theano functions for testing performance on our test and validation datasets
        self.test_model = th.function([self.index],
                                      self.layer3.errors(self.y),
                                      givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                              self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]})

        self.validate_model = th.function([self.index],
                                          self.layer3.errors(self.y),
                                          givens={self.x: valid_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                                  self.y: valid_set_y[self.index * batch_size: (self.index + 1) * batch_size]})
class CNN(object):
    ''' 
        Convolutional Neural Network with 2 convolutional pooling layers
        The default parameters are for the MNIST dataset
        NOTE: Dataset is required to be 28x28 images with three sub data sets 
    '''
    def __init__(self, datasets, batch_size=500, nkerns=[20, 50], img_size=(28, 28), learning_rate=0.1):
        
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        
        self.batch_size = batch_size
        # compute number of minibatches for training, validation and testing
        self.n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        self.n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches /= batch_size
        self.n_valid_batches /= batch_size
        self.n_test_batches /= batch_size

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        rng = np.random.RandomState(23455)
        
        layer0_input = self.x.reshape((batch_size, 1, img_size[0], img_size[1]))
        
        # Create the two convolutional layers that also perform downsampling using maxpooling
        self.layer0 = ConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(batch_size, 1, img_size[0], img_size[1]),
                                    filter_shape=(nkerns[0], 1, 5, 5), 
                                    poolsize=(2,2))

        self.layer1 = ConvPoolLayer(rng,
                                    input=self.layer0.output,
                                    image_shape=(batch_size, nkerns[0], 12, 12),
                                    filter_shape=(nkerns[1], nkerns[0], 5, 5), 
                                    poolsize=(2,2))

        layer2_input = self.layer1.output.flatten(2)
       
        # Create the hidden layer of the MLP
        self.layer2 = HiddenLayer(rng,
                                  input=layer2_input,
                                  n_in=nkerns[1] * 4 * 4,
                                  n_out=500,
                                  activation=T.tanh)

        # Create the logistic regression layer for classifiying the results
        self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=10)

        self.cost = self.layer3.negative_log_likelihood(self.y)

        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params

        self.grads = T.grad(self.cost, self.params)

        # Update list for the paramters to be used when training the model
        updates = [(param_i, param_i - learning_rate * grad_i)
                   for param_i, grad_i in zip(self.params, self.grads)]

        # This function updates the model parameters using Stochastic Gradient Descent
        self.train_model = th.function([self.index],
                                       self.cost, # This is the negative-log-likelihood of the Logistic Regression layer
                                       updates=updates,
                                       givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                               self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]})
                                     
        # These are Theano functions for testing performance on our test and validation datasets
        self.test_model = th.function([self.index],
                                      self.layer3.errors(self.y),
                                      givens={self.x: test_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                              self.y: test_set_y[self.index * batch_size: (self.index + 1) * batch_size]})

        self.validate_model = th.function([self.index],
                                          self.layer3.errors(self.y),
                                          givens={self.x: valid_set_x[self.index * batch_size: (self.index + 1) * batch_size],
                                                  self.y: valid_set_y[self.index * batch_size: (self.index + 1) * batch_size]})

    def train(self, n_epochs, patience=10000, patience_increase=2, improvement_threshold=0.995):
        ''' Train the CNN on the training data for a defined number of epochs '''
        # Setup the variables for training the model
        n_train_batches = self.n_train_batches
        n_valid_batches = self.n_valid_batches
        n_test_batches = self.n_test_batches
        validation_frequency = min(n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_iter = 0
        best_score = 0.
        epoch = 0
        done_looping = False
        # Train the CNN for a defined number of epochs
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):
                iter = (epoch - 1) * n_train_batches + minibatch_index
                # Every 100 iterations
                if iter % 100 == 0:
                    print 'Training iteration ', iter
                    cost_ij = self.train_model(minibatch_index)

                if (iter + 1) % validation_frequency == 0:
                    # Compute zero-one loss on validation set
                    validation_losses = [self.validate_model(i) for i
                                         in xrange(n_valid_batches)]
                    this_validation_loss = np.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                    # Check if current validation loss is best so far
                    if this_validation_loss < best_validation_loss:
                        # Improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss * improvement_threshold:
                            patience = max(patience, iter * patience_increase)
                        # Save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
                if patience <= iter:
                    done_looping = True
                    break
        print 'Optimization complete.'
        print('Best validation score of %f %% obtained at iteration %i' %
              (best_validation_loss * 100., best_iter + 1))

    def test(self, set_x, set_y):
        ''' Test data sets and return the test score '''
        # allocate symbolic variables for the data
        n_test_batches = set_x.get_value(borrow=True).shape[0]
        n_test_batches /= self.batch_size
        test_model = th.function(inputs=[self.index],
                                 outputs=self.layer3.errors(self.y),
                                 givens={self.x: set_x[self.index * self.batch_size: (self.index + 1) * self.batch_size],
                                         self.y: set_y[self.index * self.batch_size: (self.index + 1) * self.batch_size]})
        test_losses = [test_model(i)
                       for i in xrange(n_test_batches)]
        test_score = np.mean(test_losses)
        return test_score

    def classify(self, set):
        ''' 
           Return the labels for the given set
           NOTE: The batch size must be the same as the training set  
        '''
        n_test_batches = set.get_value(borrow=True).shape[0]
        n_test_batches /= self.batch_size
        classify_data = th.function(inputs=[self.index], # Input to this function is a mini-batch at index
                                    outputs=self.layer3.y_pred, # Output the y_predictions
                                    givens={self.x: set[self.index * batch_size: (self.index + 1) * batch_size]}) 
        # Generate labels for the given data
        labels = [classify_data(i)
                  for i in xrange(n_test_batches)]
        return np.array(labels)
Beispiel #21
0
from sklearn.cross_validation import train_test_split

# Read the training data
f = open("../data/train.csv")
reader = csv.reader(f)
next(reader, None)  # skip header
data = [data for data in reader]
f.close()

X = np.asarray([x[1:] for x in data], dtype=np.int16)
y = np.asarray([x[0] for x in data], dtype=np.int16)

X = np.true_divide(X, 255)
# normalize image data to 0-1

del data  # free up the memory
print("loaded training data")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RandomState())

lr = LogisticRegression(C=0.35)
lr.fit(X_train, y_train, 10)
guesses = lr.predict(X_test)

score = 0.0
for g in range(guesses.shape[0]):
    if guesses[g] == y_test[g]:
        score += 1

print("Score: ", score / len(guesses))
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

if __name__ == '__main__':

    raw_data = pd.read_csv('../data/train_binary.csv', header=0)
    data = raw_data.values

    imgs = data[0::, 1::]
    labels = data[::, 0]

    test_time = 10

    p = Perceptron()
    lr = LogisticRegression()

    writer = csv.writer(file('result.csv', 'wb'))

    for time in xrange(test_time):
        print 'iterater time %d' % time

        train_features, test_features, train_labels, test_labels = train_test_split(
            imgs, labels, test_size=0.33, random_state=23323)

        p.train(train_features, train_labels)
        lr.train(train_features, train_labels)

        p_predict = p.predict(test_features)
        lr_predict = lr.predict(test_features)
    def __init__(self,
                 random_generator,
                 theano_random_generator=None,
                 x_dim=28 * 28,
                 y_dim=10,
                 hidden_layer_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):
        """
        """
        # Declare empty sigmoid layer array for MLP
        self.sigmoid_layers = []

        # Declare an empty array of DenoisingAutoEncoder
        self.autoencoder_layers = []

        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        if theano_random_generator == None:
            self.theano_random_generator = RandomStreams(
                random_generator.randint(2**30))
        else:
            self.theano_random_generator = theano_random_generator

        # Inputs using Theano
        self.x = T.matrix("x")
        self.y = T.ivector("y")

        # Initialize all parameters
        for i in range(self.n_layers):
            # Define x and y dimensions
            if i == 0:
                internal_x_dim = x_dim
            else:
                internal_x_dim = hidden_layer_sizes[i - 1]
            internal_y_dim = hidden_layer_sizes[i]

            # Find inputs
            if i == 0:
                internal_input = self.x
            else:
                internal_input = self.sigmoid_layers[i - 1].output

            # Define Sigmoid Layer
            self.sigmoid_layers.append(
                HiddenLayer(internal_input,
                            internal_x_dim,
                            internal_y_dim,
                            random_generator,
                            activation=T.nnet.sigmoid))

            # Define input
            self.autoencoder_layers.append(
                DenoisingAutoEncoder(random_generator,
                                     theano_random_generator,
                                     internal_x_dim,
                                     internal_y_dim,
                                     internal_input,
                                     W=self.sigmoid_layers[i].W,
                                     b=self.sigmoid_layers[i].b))

            # Uppdate parameters
            self.params.extend(self.sigmoid_layers[i].params)

        # Finally add logistic layer
        self.logistic_layer = LogisticRegression(
            self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim)

        self.params.extend(self.logistic_layer.params)

        # These are two important costs
        # Finetuning after pretraining individual AutoEncoders
        self.finetune_cost = self.logistic_layer.negative_log_likelihood(
            self.y)

        # Error from prediction
        self.error = self.logistic_layer.error(self.y)
Beispiel #24
0
'''
# initialise the model
solver = 'svrg-sgd'
batchsize = 128
n_svrg_updates = 128 if solver == 'svrg-sgd' or solver == 'svrg-adagrad' or solver == 'svrg-rmsprop' else 1
n_epochs = 20
n_updates = int(np.ceil(n_epochs * n_train / batchsize / n_svrg_updates))
eval_freq = n_svrg_updates
learning_rate = 2**(-5)

# train the model
print(
    'Train Logistic Regression using %s with the optimal learning rate of %f.'
    % (solver, learning_rate))
model = LogisticRegression(np.random.normal(0, 1, X_train.shape[1]),
                           solver=solver,
                           batchsize=batchsize)
_, eval_log = model.fit(X_train,
                        y_train,
                        n_updates=n_updates,
                        learning_rate=learning_rate,
                        n_svrg_updates=n_svrg_updates,
                        eval_freq=eval_freq,
                        eval_fn=partial(model.predict, X_test, y_test, False,
                                        False),
                        debug=False)

eval_log_filename = './results/lr_eval_%s_lr%f' % (dataset, learning_rate)

# save results to files
np.save(eval_log_filename, eval_log)
Beispiel #25
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Beispiel #26
0
    def __init__(self, rng, input, n_in, n_hidden, n_out, \
                  W_hid=None, b_hid=None, W_out=None, b_out=None):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the 
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in 
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units 

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in 
        which the labels lie

        """

        if rng is None:
            rng = numpy.random.RandomState()

        # Since we are dealing with a one hidden layer MLP, this will
        # translate into a TanhLayer connected to the LogisticRegression
        # layer; this can be replaced by a SigmoidalLayer, or a layer
        # implementing any other nonlinearity
        self.hiddenLayer = HiddenLayer(rng=rng,
                                       input=input,
                                       n_in=n_in,
                                       n_out=n_hidden,
                                       W_values=W_hid,
                                       b_values=b_hid,
                                       activation=theano.tensor.nnet.sigmoid)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            rng=rng,
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out,
            W_values=None,
            b_values=None)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer.W).sum() #+ abs(self.hiddenLayer2.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W**2).sum() \
                    + (self.logRegressionLayer.W**2).sum() #+ (self.hiddenLayer2.W**2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # takes logs of the last softmax layer
        self.log_posteriors = self.logRegressionLayer.log_posteriors
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors
        # returns the labels and predictions
        self.log_error_results = self.logRegressionLayer.log_error_results

        self.cost = self.negative_log_likelihood

        self.sum = self.logRegressionLayer.negative_log_likelihood_sum

        #self.delta_params = self.hiddenLayer.delta_params + self.logRegressionLayer.delta_params
        #self.params = self.hiddenLayer1.params + self.logRegressionLayer.params + self.hiddenLayer2.params
        self.params = self.logRegressionLayer.params + self.hiddenLayer.params
        self.delta_params = self.logRegressionLayer.delta_params + self.hiddenLayer.delta_params
Beispiel #27
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES,
                 hidden_layers_sizes=[1024, 1024], n_phn=62 * 3, n_spkr=1,
                 rho=0.90, eps=1.E-6):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        #self._rho = shared(numpy.cast['float32'](rho), name='rho')  # for adadelta
        #self._eps = shared(numpy.cast['float32'](eps), name='eps')  # for adadelta
        self._rho = rho
        self._eps = eps
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.fmatrix('x')  # the data is presented as rasterized images
        self.y_phn = T.ivector('y_phn')  # the labels are presented as 1D vector
                                 # of [int] labels
        self.y_spkr = T.ivector('y_spkr')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)
            self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
            self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

            # Construct an RBM that shared weights with this layer
            if i == 0:
                rbm_layer = GRBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayerPhn = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_phn)
        self.params.extend(self.logLayerPhn.params)
        self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
        self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO
        self.logLayerSpkr = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_spkr)
        self.params.extend(self.logLayerSpkr.params)
        self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
        self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

        self.finetune_cost_sum_phn = self.logLayerPhn.negative_log_likelihood_sum(self.y_phn)
        self.finetune_cost_sum_spkr = self.logLayerSpkr.negative_log_likelihood_sum(self.y_spkr)
        self.finetune_cost_phn = self.logLayerPhn.negative_log_likelihood(self.y_phn)
        self.finetune_cost_spkr = self.logLayerSpkr.negative_log_likelihood(self.y_spkr)

        self.errors_phn = self.logLayerPhn.errors(self.y_phn)
        self.errors_spkr = self.logLayerSpkr.errors(self.y_spkr)
Beispiel #28
0
# 绘制散点图(x_axis, y_axis对应一个iris_types)
for iris_type in iris_types:
    plt.scatter(data[x_axis][data['class'] == iris_type],
                data[y_axis][data['class'] == iris_type],
                label=iris_type)
plt.show()

num_examples = data.shape[0]
x_train = data[[x_axis, y_axis]].values.reshape((num_examples, 2))
y_train = data['class'].values.reshape((num_examples, 1))

max_iterations = 1000
polynomial_degree = 0
sinusoid_degree = 0

logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree,
                                         sinusoid_degree)
thetas, loss_histories = logistic_regression.train(max_iterations)
labels = logistic_regression.unique_labels

# 绘制损失函数
plt.plot(range(len(loss_histories[0])), loss_histories[0], label=labels[0])
plt.plot(range(len(loss_histories[1])), loss_histories[1], label=labels[1])
plt.plot(range(len(loss_histories[2])), loss_histories[2], label=labels[2])
plt.show()

y_train_prections = logistic_regression.predict(x_train)
precision = np.sum(y_train_prections == y_train) / y_train.shape[0] * 100
print('precision: ' + str(precision) + "%")

# 生成随机数据,用于生成决策边界
x_min = np.min(x_train[:, 0])
Beispiel #29
0
def fit(n_windows, win_width, rand_state, data_set, data_labels, filename="LR_weights.pkl"):
    # Permuting data
    rng = np.random.RandomState(8000)
    indices = rng.permutation(len(data_set))
    data_set = np.array(data_set)
    data_labels = np.array(data_labels)
    data_set, data_labels = data_set[indices], data_labels[indices]
    print str(len(data_set)) + " all samples"

    train_len = int(len(data_set) * 9.0 / 10.0)
    valid_len = len(data_set) - train_len
    print "Train: " + str(train_len)
    print "Validate: " + str(valid_len)

    # Splitting fs
    train_dir = fs.File("LR_training.hdf5", "a")
    train_data = train_dir.create_dataset("LR_train_data", shape=((train_len + 1) * n_windows, 41, 41), dtype="i")
    train_labels = train_dir.create_dataset("LR_train_labels", shape=((train_len + 1) * n_windows,), dtype="i")

    valid_dir = fs.File("LR_validating.hdf5", "a")
    valid_data = valid_dir.create_dataset("LR_valid_data", shape=((valid_len + 1) * n_windows, 41, 41), dtype="i")
    valid_labels = valid_dir.create_dataset("LR_valid_labels", shape=((valid_len + 1) * n_windows,), dtype="i")
    counter = 0
    next_counter = 0
    for iter, data_sample in enumerate(data_set):
        if iter % 10000 == 0:
            print iter
        windows = WinExt.get_windows(data_sample, n_windows, win_width, rand_state)
        for window in windows:
            # First windows part for training
            # Second part for validation
            if iter < train_len:
                train_data[counter] = window
                train_labels[counter] = data_labels[iter]
                counter += 1
            else:
                valid_data[next_counter] = window
                valid_labels[next_counter] = data_labels[iter]
                next_counter += 1
    # Setting real length
    train_len = counter
    valid_len = next_counter
    print "Size of train is " + str(train_len)
    print "Size of valid is " + str(valid_len)
    print "Extracting has finished its work..."

    batch_size = 500

    if train_len % batch_size != 0:  # if the last batch is not full, just don't use the remainder
        whole = (train_len / batch_size) * batch_size
        train_len = whole
    if valid_len % batch_size != 0:
        whole = (valid_len / batch_size) * batch_size
        valid_len = whole

    n_train_batches = train_len / batch_size
    n_valid_batches = valid_len / batch_size

    data_tr = theano.shared(
        np.asarray(np.zeros((batch_size, 41, 41), dtype=np.int), dtype=theano.config.floatX), borrow=True
    )
    labels_tr = theano.shared(np.asarray(np.zeros(batch_size, dtype=np.int), dtype="int32"), borrow=True)
    data_val = theano.shared(
        np.asarray(np.zeros((batch_size, 41, 41), dtype=np.int), dtype=theano.config.floatX), borrow=True
    )
    labels_val = theano.shared(np.asarray(np.zeros(batch_size, dtype=np.int), dtype="int32"), borrow=True)

    print "Building logistic regression classifier..."
    x = T.dtensor3("x")  # dtensor3 for 3d array
    y = T.ivector("y")  # the labels are presented as 1D vector of [int] labels
    rng = np.random.RandomState(8000)

    classifier = LogisticRegression(input=x.flatten(2), n_in=41 * 41, n_out=2)

    cost = classifier.negative_log_likelihood(y)
    learning_rate = 0.03  # 0.3 / float(n_train_batches)

    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)]

    validate_model = theano.function(inputs=[], outputs=classifier.errors(y), givens={x: data_val, y: labels_val})

    # indices - for random shuffle
    train_model = theano.function(
        inputs=[], outputs=classifier.errors(y), updates=updates, givens={x: data_tr, y: labels_tr}
    )

    print "Training..."
    # GDM with batches
    epoch = 0
    n_epochs = 30
    min_error = 100.0
    errors = []
    indices = rng.permutation(train_len)
    while epoch < n_epochs:
        print "================= " + str(epoch + 1) + " epoch =============== "
        for minibatch_index in range(n_train_batches):
            if minibatch_index % 50 == 0:
                print str(minibatch_index) + " batch"
            data_tr.set_value(
                np.array([train_data[indices[minibatch_index * batch_size + i]] for i in range(batch_size)]),
                borrow=True,
            )
            labels_tr.set_value(
                np.array([train_labels[indices[minibatch_index * batch_size + i]] for i in range(batch_size)]),
                borrow=True,
            )
            train_model()
        # compute zero-one loss on validation set
        validation_losses = []
        for i in range(n_valid_batches):
            data_val.set_value(np.array(valid_data[i * batch_size : (i + 1) * batch_size]), borrow=True)
            labels_val.set_value(np.array(valid_labels[i * batch_size : (i + 1) * batch_size]), borrow=True)
            validation_losses.append(validate_model())
        this_validation_loss = np.mean(validation_losses) * 100
        errors.append(this_validation_loss)
        if this_validation_loss < min_error:
            print str(this_validation_loss) + "% error"
            min_error = this_validation_loss
            save_parameters(classifier, filename)
        epoch += 1
        print "Shuffling..."
        indices = rng.permutation(train_len)

    show_errors(errors, "LogReg: 4 windows, h=41")

    # Cleaning data
    train_dir.clear()
    valid_dir.clear()
    train_dir.close()
    valid_dir.close()
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from logistic_regression import LogisticRegression
#from regression import LogisticRegression


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)

print("LR classification accuracy:", accuracy(y_test, predictions))
Beispiel #31
0
def main():
    rng = np.random.RandomState(23455)
    datasets = load_data()
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    batch_size = 500
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    nkerns = [20, 50]

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    layer0_input = x.reshape(batch_size, 1, 28, 28)

    layer0 = LeNetConvPoolLayer(rng, layer0_input,
            filter_shape=(nkerns[0], 1, 5, 5),
            image_shape=(batch_size, 1, 28, 28), poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            filter_shape=(nkerns[1], nkerns[0], 5, 5),
            image_shape=(batch_size, nkerns[0], 12, 12), poolsize=(2, 2))

    layer2_input = layer1.output.flaten(2)
    layer2 = HiddenLayer(rng, layer2_input, n_in=nkerns[1] * 4 * 4,
            n_out=500)

    layer3 = LogisticRegression(layer2.output, n_in=500, n_out=10)
    cost = layer3.negative_log_likelihood(y)

    test_model = theano.function([index], layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]
            })
    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            })
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, params)

    learning_rate = 0.1

    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i
            in zip(params, grads)]
    train_model = theano.function([index], cost, updates=updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]
            })

    print "Start training..."
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    n_epochs = 200
    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_loss = np.inf
    test_score = 0.

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)  # NOQA

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_classification
from logistic_regression import LogisticRegression


def sigmoid(X):
    '''
    Computes the Sigmoid function of the input argument X.
    '''
    return 1.0 / (1 + np.exp(-X))


lr = LogisticRegression()
X, y = make_classification(n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1)
lr.fit(X, y)
H = lr.predict(X)
print("Training Accuracy : " + str(float(np.sum(H == y)) / y.shape[0]))
#Plot data
plt.scatter(X[y == 1, 0], X[y == 1, 1], marker='o', c='b')  #positive samples
plt.scatter(X[y == 0, 0], X[y == 0, 1], marker='x', c='r')  #negative samples

#Plot Decision Boundary
u = np.linspace(-2, 2, 50)
v = np.linspace(-2, 2, 50)
z = np.zeros(shape=(len(u), len(v)))
for i in range(len(u)):
def stochastic_gradient_descent_mnist(
        learning_rate=0.13,
        n_epochs=1000,
        path='/home/tao/Projects/machine-learning/data/mnist.pkl.gz',
        batch_size=600):

    datasets = load_data(path)

    train_set_data, train_set_label = datasets[0]
    validation_set_data, validation_set_label = datasets[1]
    test_set_data, test_set_label = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_data.get_value(
        borrow=True).shape[0] // batch_size
    n_valid_batches = validation_set_data.get_value(
        borrow=True).shape[0] // batch_size
    n_test_batches = test_set_data.get_value(
        borrow=True).shape[0] // batch_size

    print('... building the model')

    index = T.lscalar()  # index to a [mini]batch

    data = T.matrix('x')  # data, presented as rasterized images
    label = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    classifier = LogisticRegression(input=data,
                                    input_dim=28 * 28,
                                    output_dim=10)

    objective_function = classifier.negative_log_likelihood(label)

    # testing model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(label),
        givens={
            data: test_set_data[index * batch_size:(index + 1) * batch_size],
            label: test_set_label[index * batch_size:(index + 1) * batch_size]
        })
    # validation model
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(label),
        givens={
            data:
            validation_set_data[index * batch_size:(index + 1) * batch_size],
            label:
            validation_set_label[index * batch_size:(index + 1) * batch_size]
        })

    # gradients
    g_W = T.grad(cost=objective_function, wrt=classifier.W)
    g_b = T.grad(cost=objective_function, wrt=classifier.b)

    # update rule
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # training model
    train_model = theano.function(
        inputs=[index],
        outputs=objective_function,
        updates=updates,
        givens={
            data: train_set_data[index * batch_size:(index + 1) * batch_size],
            label: train_set_label[index * batch_size:(index + 1) * batch_size]
        })

    print('... training the model')
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    # go through this many minibatche before checking the network on the validation set; in this case we check every epoch
    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]  # grammar sugar
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print((
                        '     epoch %i, minibatch %i/%i, test error of best model %f %%'
                    ) % (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

                    with open('best_model.pkl', 'wb') as f:
                        pickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(
        'Optimization complete with best validation score of %f %%, with test performance %f %%'
        % (best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' %
          (epoch, 1. * epoch / (end_time - start_time)))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' %
         ((end_time - start_time))),
        file=sys.stderr)
Beispiel #34
0
def train_CNN_mini_batch(learning_rate, n_epochs, num_kernels, batch_size,
                         filter_size, is_multi_scale, num_of_classes, height,
                         width, use_interpolation, use_hidden_layer):
    train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \
    train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \
        = load_processed_img_data()

    n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = theano.tensor.lscalar()
    x_by_1 = theano.tensor.ftensor4('x_by_1')
    x_by_2 = theano.tensor.ftensor4('x_by_2')
    x_by_4 = theano.tensor.ftensor4('x_by_4')

    y = theano.tensor.ivector('y')

    print '... initialize the model'

    cnn_dir = 'models/CNN_'
    if is_multi_scale is True:
        cnn_dir += 'M_'
    else:
        cnn_dir += 'S_'

    if use_hidden_layer is True:
        cnn_dir += 'H_'
    else:
        cnn_dir += 'L_'

    if use_interpolation is True:
        cnn_dir += 'I_'
    else:
        cnn_dir += 'N_'

    cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str(
        num_kernels[1]) + '_' + str(
            num_kernels[2]) + '_' + str(batch_size) + '_'
    curr_date = str(datetime.date.today())
    curr_date = curr_date.replace('-', '_')
    cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S'))

    print 'CNN model is ', cnn_dir

    if not os.path.exists(cnn_dir):
        os.makedirs(cnn_dir)

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(cnn_dir + '/log.txt', 'w')

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

    sys.stdout = Logger()

    layer0 = CNN_Layer(
        name='Layer_0',
        W=None,
        b=None,
        filter_shape=(num_kernels[0], 3, filter_size, filter_size),
    )

    layer1 = CNN_Layer(
        name='Layer_1',
        W=None,
        b=None,
        filter_shape=(num_kernels[1], num_kernels[0], filter_size,
                      filter_size),
    )

    layer2 = CNN_Layer(
        name='Layer_2',
        W=None,
        b=None,
        filter_shape=(num_kernels[2], num_kernels[1], filter_size,
                      filter_size),
    )

    layer3 = HiddenLayer(name='Layer_3',
                         W=None,
                         b=None,
                         n_in=num_kernels[2] *
                         3 if is_multi_scale is True else num_kernels[2],
                         n_out=num_kernels[2] *
                         4 if is_multi_scale is True else num_kernels[2] * 2,
                         activation=theano.tensor.tanh)

    if is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 4
    elif is_multi_scale and not use_hidden_layer:
        layer4_in = num_kernels[2] * 3
    elif not is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 2
    else:
        layer4_in = num_kernels[2]

    layer4 = LogisticRegression(
        name='Layer_4',
        W=None,
        b=None,
        n_in=layer4_in,
        n_out=num_of_classes,
    )

    forward_propagation(layer0=layer0,
                        layer1=layer1,
                        layer2=layer2,
                        layer3=layer3,
                        layer4=layer4,
                        x_by_1=x_by_1,
                        x_by_2=x_by_2,
                        x_by_4=x_by_4,
                        num_kernels=num_kernels,
                        batch_size=batch_size,
                        filter_size=filter_size,
                        is_multi_scale=is_multi_scale,
                        height=height,
                        width=width,
                        use_interpolation=use_interpolation,
                        use_hidden_layer=use_hidden_layer)

    if use_hidden_layer is True:
        L2_norm = (layer4.W**2).sum() + (layer3.W**2).sum() + (
            layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum()
    else:
        L2_norm = (layer4.W**2).sum() + (layer2.W**2).sum() + (
            layer1.W**2).sum() + (layer0.W**2).sum()

    regularization = 0.00001
    cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm)

    if is_multi_scale is True:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                test_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                test_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                test_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                test_set_y[index * batch_size * height * width:(index + 1) *
                           batch_size * height * width]
            })
    else:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                test_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                test_set_y[index * batch_size * height * width:(index + 1) *
                           batch_size * height * width]
            })

    if is_multi_scale is True:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                valid_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                valid_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                valid_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                valid_set_y[index * batch_size * height * width:(index + 1) *
                            batch_size * height * width]
            })
    else:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                valid_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                valid_set_y[index * batch_size * height * width:(index + 1) *
                            batch_size * height * width]
            })

    if use_hidden_layer is True:
        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    else:
        params = layer4.params + layer2.params + layer1.params + layer0.params

    grads = theano.tensor.grad(cost, params)

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    if is_multi_scale is True:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1:
                train_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                train_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                train_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                train_set_y[index * batch_size * width * height:(index + 1) *
                            batch_size * width * height]
            })
    else:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1:
                train_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                train_set_y[index * batch_size * width * height:(index + 1) *
                            batch_size * width * height]
            })

    print '... training the model'
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    best_layer_0_W = numpy.zeros_like(layer0.W.get_value())
    best_layer_0_b = numpy.zeros_like(layer0.b.get_value())
    best_layer_1_W = numpy.zeros_like(layer1.W.get_value())
    best_layer_1_b = numpy.zeros_like(layer1.b.get_value())
    best_layer_2_W = numpy.zeros_like(layer2.W.get_value())
    best_layer_2_b = numpy.zeros_like(layer2.b.get_value())
    best_layer_3_W = numpy.zeros_like(layer3.W.get_value())
    best_layer_3_b = numpy.zeros_like(layer3.b.get_value())
    best_layer_4_W = numpy.zeros_like(layer4.W.get_value())
    best_layer_4_b = numpy.zeros_like(layer4.b.get_value())

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for mini_batch_index in xrange(n_train_batches):

            start = time.clock()
            iter = (epoch - 1) * n_train_batches + mini_batch_index
            cost_ij = train_model(mini_batch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, mini-batch %i/%i, validation error %f %%' %
                      (epoch, mini_batch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # save best filters
                    best_layer_0_W = layer0.W.get_value()
                    best_layer_0_b = layer0.b.get_value()
                    best_layer_1_W = layer1.W.get_value()
                    best_layer_1_b = layer1.b.get_value()
                    best_layer_2_W = layer2.W.get_value()
                    best_layer_2_b = layer2.b.get_value()
                    best_layer_3_W = layer3.W.get_value()
                    best_layer_3_b = layer3.b.get_value()
                    best_layer_4_W = layer4.W.get_value()
                    best_layer_4_b = layer4.b.get_value()

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, mini-batch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, mini_batch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

            print 'training @ iter = %d, time taken = %f' % (iter,
                                                             (time.clock() -
                                                              start))

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    if not os.path.exists(cnn_dir + '/params'):
        os.makedirs(cnn_dir + '/params')

    numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W)
    numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b)
    numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W)
    numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b)
    numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W)
    numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b)
    numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W)
    numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b)
    numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W)
    numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b)
    numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels)
    numpy.save(cnn_dir + '/params/filter_size.npy', filter_size)

    return cnn_dir
plt.ylabel(y_axis)
plt.title('Microchips Tests')
plt.legend()
plt.show()

num_examples = data.shape[0]
x_train = data[[x_axis, y_axis]].values.reshape((num_examples, 2))
y_train = data['validity'].values.reshape((num_examples, 1))

# 训练参数
max_iterations = 100000
regularization_param = 0
polynomial_degree = 5
sinusoid_degree = 0
# 逻辑回归
logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree,
                                         sinusoid_degree)

# 训练
(thetas, costs) = logistic_regression.train(max_iterations)

columns = []
for theta_index in range(0, thetas.shape[1]):
    columns.append('Theta ' + str(theta_index))

# 训练结果
labels = logistic_regression.unique_labels

plt.plot(range(len(costs[0])), costs[0], label=labels[0])
plt.plot(range(len(costs[1])), costs[1], label=labels[1])

plt.xlabel('Gradient Steps')
Beispiel #36
0
def generate_segmented_image_tensors(img_by_1, img_by_2, img_by_4, model_dir,
                                     batch_size, height, width,
                                     num_of_classes):
    layer_0_W = numpy.load(model_dir + '/params/layer_0_W.npy')
    layer_0_b = numpy.load(model_dir + '/params/layer_0_b.npy')
    layer_1_W = numpy.load(model_dir + '/params/layer_1_W.npy')
    layer_1_b = numpy.load(model_dir + '/params/layer_1_b.npy')
    layer_2_W = numpy.load(model_dir + '/params/layer_2_W.npy')
    layer_2_b = numpy.load(model_dir + '/params/layer_2_b.npy')
    layer_3_W = numpy.load(model_dir + '/params/layer_3_W.npy')
    layer_3_b = numpy.load(model_dir + '/params/layer_3_b.npy')
    layer_4_W = numpy.load(model_dir + '/params/layer_4_W.npy')
    layer_4_b = numpy.load(model_dir + '/params/layer_4_b.npy')
    num_kernels = numpy.load(model_dir + '/params/filer_kernels.npy')
    filter_size = numpy.load(model_dir + '/params/filter_size.npy')

    if model_dir[11] == 'M':
        is_multi_scale = True
    elif model_dir[11] == 'S':
        is_multi_scale = False
    else:
        return NotImplemented

    if model_dir[13] == 'H':
        use_hidden_layer = True
    elif model_dir[13] == 'L':
        use_hidden_layer = False
    else:
        return NotImplemented

    if model_dir[15] == 'I':
        use_interpolation = True
    elif model_dir[13] == 'L':
        use_interpolation = False
    else:
        return NotImplemented

    layer0 = CNN_Layer(
        name='Layer_0',
        W=layer_0_W,
        b=layer_0_b,
        filter_shape=(num_kernels[0], 3, filter_size, filter_size),
    )

    layer1 = CNN_Layer(
        name='Layer_1',
        W=layer_1_W,
        b=layer_1_b,
        filter_shape=(num_kernels[1], num_kernels[0], filter_size,
                      filter_size),
    )

    layer2 = CNN_Layer(
        name='Layer_2',
        W=layer_2_W,
        b=layer_2_b,
        filter_shape=(num_kernels[2], num_kernels[1], filter_size,
                      filter_size),
    )

    layer3 = HiddenLayer(name='Layer_3',
                         W=layer_3_W,
                         b=layer_3_b,
                         n_in=num_kernels[2] *
                         3 if is_multi_scale is True else num_kernels[2],
                         n_out=num_kernels[2] *
                         4 if is_multi_scale is True else num_kernels[2] * 2,
                         activation=theano.tensor.tanh)

    layer4 = LogisticRegression(
        name='Layer_4',
        W=layer_4_W,
        b=layer_4_b,
        n_in=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] *
        2,
        n_out=num_of_classes,
    )

    x_by_1 = theano.tensor.ftensor4('x_by_1')
    x_by_2 = theano.tensor.ftensor4('x_by_2')
    x_by_4 = theano.tensor.ftensor4('x_by_4')

    forward_propagation(
        layer0=layer0,
        layer1=layer1,
        layer2=layer2,
        layer3=layer3,
        layer4=layer4,
        x_by_1=x_by_1,
        x_by_2=x_by_2,
        x_by_4=x_by_4,
        num_kernels=num_kernels,
        batch_size=batch_size,
        filter_size=filter_size,
        is_multi_scale=is_multi_scale,
        height=height,
        width=width,
        use_interpolation=use_interpolation,
        use_hidden_layer=use_hidden_layer,
    )

    # create a function to compute the mistakes that are made by the model
    if is_multi_scale is True:
        test_model = theano.function([x_by_1, x_by_2, x_by_4],
                                     layer4.y_prediction)
    else:
        test_model = theano.function([x_by_1], layer4.y_prediction)

    if is_multi_scale is True:
        op = test_model(img_by_1, img_by_2, img_by_4)
    else:
        op = test_model(img_by_1)

    y = theano.tensor.reshape(op, (batch_size, height, width))
    return y.eval()
Beispiel #37
0
class StackedDenoisingAutoencoder:
    def __init__(self,
                 numpyRng,
                 theanoRng=None,
                 nIn=28*28,
                 hiddenLayerSizes=[500,500],
                 nOut=10):
        self.nLayers = len(hiddenLayerSizes)
        if not theanoRng:
            theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30))
        self.x = T.matrix('x')
        self.y = T.ivector('y')
        def makeSigmoidLayer(lastLayer,lastLayerSize,size):
            return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid)
        def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer):
            return DenoisingAutoEncoder(
                numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer,
                nVisible=lastLayerSize,
                nHidden=size,
                W=sigmoidLayer.W,
                bHidden=sigmoidLayer.b)
        def makeLayers(lastLayer,lastInputSize,nextLayerSizes):
            if nextLayerSizes:
                newList = list(nextLayerSizes)
                size = newList.pop()
                sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size)
                daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer)
                yield (sigmoidLayer,daLayer)
                for layer in makeLayers(sigmoidLayer.output,size,newList):
                    yield layer
        self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes)))
        print "created sda with layer shapes below."
        for da in self.dALayers:
            
            print "layersize:", da.W.get_value().shape
        self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut)
        self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)]
        self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def pretrainingFunctions(self,trainSetX,batchSize):
        index = T.lscalar("index")
        corruptionLevel = T.scalar('corruption')
        learningRate = T.scalar("learning")
        batchBegin = batchSize * index
        batchEnd = batchBegin + batchSize
        for dA in self.dALayers:
            cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate)
            f = theano.function(
                inputs=[
                    index,
                    theano.Param(corruptionLevel,default=0.2),
                    theano.Param(learningRate,default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={self.x:trainSetX[batchBegin:batchEnd]},
            )
            yield f
            
    def pretrainingFunctionsWithOptimizer(self,trainSetX,batchSize,optimizer):
        """
        with optimizer.
        optimizer(params,grads)
        """
        index = T.lscalar("index")
        corruptionLevel = T.scalar('corruption')
        learningRate = T.scalar("learning")
        batchBegin = batchSize * index
        batchEnd = batchBegin + batchSize
        for dA in self.dALayers:
            #cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate)
            cost, param, grads = dA.costParamGrads(corruptionLevel)
            updates = optimizer(param,grads)
            f = theano.function(
                inputs=[
                    index,
                    theano.Param(corruptionLevel,default=0.2),
                ],
                outputs=cost,
                updates=updates,
                givens={self.x:trainSetX[batchBegin:batchEnd]},
            )
            yield f
            
    def fineTuneFunctions(self,datasets,batchSize,learningRate):
        index = T.lscalar('i')
        trainSetX,trainSetY = datasets[0]
        validSetX,validSetY = datasets[1]
        testSetX,testSetY = datasets[2]
        gparams = T.grad(self.fineTuneCost,self.params)
        updates = [
            (param,param-gparam*learningRate)
            for param,gparam in zip(self.params,gparams)
        ]
        def makeGivens(x,y):
            return {self.x:x[index*batchSize:(index+1)*batchSize],
                    self.y:y[index*batchSize:(index+1)*batchSize]}
        trainer = theano.function(
            inputs=[index],
            outputs=self.fineTuneCost,
            updates=updates,
            givens=makeGivens(trainSetX,trainSetY),
            name='train'
        )
        testScoreI=theano.function(
            inputs=[index],
            outputs=self.errors,
            givens=makeGivens(testSetX,testSetY),
            name='test'
        )
        validScoreI=theano.function(
            inputs=[index],
            outputs=self.errors,
            givens=makeGivens(validSetX,validSetY),
            name='valid'
        )

        def validationScore():
            return [validScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)]

        def testScore():
            return [testScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)]

        return trainer,validationScore,testScore

    def preTrain(self,
                 data,
                 batchSize=20,
                 preLearningRate=0.1,
                 corruptionLevels=(.1,.2,.3)):
        import numpy,util
        preTrainer = list(self.pretrainingFunctions(data,batchSize=batchSize))
        assert len(corruptionLevels) == len(preTrainer) , "given corruption levels do not correspond to the layers!!!"
        for i,(trainer,corruptionLevel) in enumerate(zip(preTrainer,corruptionLevels)):
            for epoch in xrange(15):
                print 'Pre-training layer %i, epoch %d start' % (i,epoch)
                trainScores = [trainer(batchIndex,corruptionLevel,preLearningRate) for batchIndex in xrange(data.get_value(borrow=True).shape[0]/batchSize)]
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),numpy.mean(trainScores)
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES,
                 hidden_layers_sizes=[1024, 1024], n_outs=62 * 3,
                 rho=0.90, eps=1.E-6):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        #self._rho = shared(numpy.cast['float32'](rho), name='rho')  # for adadelta
        #self._eps = shared(numpy.cast['float32'](eps), name='eps')  # for adadelta
        self._rho = rho
        self._eps = eps
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.fmatrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)
            self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
            self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

            # Construct an RBM that shared weights with this layer
            if i == 0:
                rbm_layer = GRBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
        self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, k):
        batch_x = T.fmatrix('batch_x')
        learning_rate = T.scalar('lr')  # learning rate to use

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            #markov_chain = shared(numpy.empty((batch_size, rbm.n_hidden), dtype='float32'), borrow=True)
            markov_chain = None
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=markov_chain, k=k)

            # compile the theano function
            fn = theano.function(inputs=[batch_x,
                            theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x: batch_x})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def get_SGD_trainer(self):
        """ Returns a plain SGD minibatch trainer with learning rate as param.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * learning_rate 

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_adadelta_trainer(self):
        """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads,
                self._accudeltas, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_adagrad_trainer(self):
        """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, param, gparam in zip(self._accugrads, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = accugrad + gparam * gparam
            dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_SAG_trainer(self):
        """ Returns a Stochastic Averaged Gradient (Bach & Moulines 2011) trainer.

        This is based on Bach 2013 slides: 
        PRavg(theta_n) = Polyak-Ruppert averaging = (1+n)^{-1} * \sum_{k=0}^n theta_k
        theta_n = theta_{n-1} - gamma [ f'_n(PR_avg(theta_{n-1})) + f''_n(PR_avg(
                  theta_{n-1})) * (theta_{n-1} - PR_avg(theta_{n-1}))]

        That returns two trainers: one for the first epoch, one for subsequent epochs.
        We use self._accudeltas to but the Polyak-Ruppert averaging,
        and self._accugrads for the number of iterations (updates).
        """
        print "UNFINISHED, see TODO in get_SAG_trainer()"
        sys.exit(-1)

        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum

        # First trainer:
        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for accudelta, accugrad, param, gparam in zip(self._accudeltas, self._accugrads, self.params, gparams):
            theta = param - gparam * learning_rate 
            updates[accudelta] = (theta + accudelta * accugrad) / (accugrad + 1.)
            updates[param] = theta
            updates[accugrad] = accugrad + 1.

        train_fn_init = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        # Second trainer:
        gparams = T.grad(cost, self._accudeltas)  # TODO recreate the network with 
        # (TODO) self._accudeltas instead of self.params so that we can compute the cost
        hparams = T.grad(cost, gparams)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accudelta, accugrad, param, gparam, hparam in zip(self._accudeltas, self._accugrads, self.params, gparams, hparams):
            theta = param - learning_rate * (gparam + hparam * (param - accudelta))
            updates[accudelta] = (theta + accudelta * accugrad) / (accugrad + 1.)
            updates[param] = theta
            updates[accugrad] = accugrad + 1.

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn_init, train_fn

    def get_SGD_ld_trainer(self):
        """ Returns an SGD-ld trainer (Schaul et al. 2012).
        """
        print "UNFINISHED, see TODO in get_SGD_ld_trainer()"
        sys.exit(-1)

        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)
        # INIT TODO

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, accuhess, param, gparam in zip(self._accugrads, self._accudeltas, self._accuhess, self.params, gparams):
            pass  # TODO
            # TODO 
            # TODO 

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def score_classif(self, given_set):
        """ Returns functions to get current classification scores. """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)],
                outputs=self.errors,
                givens={self.x: batch_x, self.y: batch_y})

        # Create a function that scans the entire set given as input
        def scoref():
            return [score(batch_x, batch_y) for batch_x, batch_y in given_set]

        return scoref
class StackedDenoisingAutoEncoders(object):
    def __init__(self,
                 random_generator,
                 theano_random_generator=None,
                 x_dim=28 * 28,
                 y_dim=10,
                 hidden_layer_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):
        """
        """
        # Declare empty sigmoid layer array for MLP
        self.sigmoid_layers = []

        # Declare an empty array of DenoisingAutoEncoder
        self.autoencoder_layers = []

        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        if theano_random_generator == None:
            self.theano_random_generator = RandomStreams(
                random_generator.randint(2**30))
        else:
            self.theano_random_generator = theano_random_generator

        # Inputs using Theano
        self.x = T.matrix("x")
        self.y = T.ivector("y")

        # Initialize all parameters
        for i in range(self.n_layers):
            # Define x and y dimensions
            if i == 0:
                internal_x_dim = x_dim
            else:
                internal_x_dim = hidden_layer_sizes[i - 1]
            internal_y_dim = hidden_layer_sizes[i]

            # Find inputs
            if i == 0:
                internal_input = self.x
            else:
                internal_input = self.sigmoid_layers[i - 1].output

            # Define Sigmoid Layer
            self.sigmoid_layers.append(
                HiddenLayer(internal_input,
                            internal_x_dim,
                            internal_y_dim,
                            random_generator,
                            activation=T.nnet.sigmoid))

            # Define input
            self.autoencoder_layers.append(
                DenoisingAutoEncoder(random_generator,
                                     theano_random_generator,
                                     internal_x_dim,
                                     internal_y_dim,
                                     internal_input,
                                     W=self.sigmoid_layers[i].W,
                                     b=self.sigmoid_layers[i].b))

            # Uppdate parameters
            self.params.extend(self.sigmoid_layers[i].params)

        # Finally add logistic layer
        self.logistic_layer = LogisticRegression(
            self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim)

        self.params.extend(self.logistic_layer.params)

        # These are two important costs
        # Finetuning after pretraining individual AutoEncoders
        self.finetune_cost = self.logistic_layer.negative_log_likelihood(
            self.y)

        # Error from prediction
        self.error = self.logistic_layer.error(self.y)

    def pretrain(self, train_x, batch_size):
        """Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
        for training the dA
        
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
        the dA layer
        """
        index = T.iscalar("index")
        corruption_level = T.scalar("corruption_level")
        learning_rate = T.scalar("learning_rate")

        pretrain_functions = []
        for autoencoder in self.autoencoder_layers:

            # Find cost and updates for the layer
            cost, updates = autoencoder.cost_updates(corruption_level,
                                                     learning_rate)

            f = theano.function(inputs=[
                index,
                theano.Param(corruption_level, default=0.2),
                theano.Param(learning_rate, default=0.1)
            ],
                                outputs=cost,
                                updates=updates,
                                givens={
                                    self.x:
                                    train_x[index * batch_size:(index + 1) *
                                            batch_size]
                                })

            pretrain_functions.append(f)

        return pretrain_functions

    def finetune(self, train_x, train_y, valid_x, valid_y, test_x, test_y,
                 batch_size, learning_rate):
        """Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set
        
        :type batch_size: int
        :param batch_size: size of a minibatch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        """
        # Define index
        index = T.iscalar("index")

        # Cost and updates in SGD
        grad = T.grad(self.finetune_cost, wrt=self.params)
        updates = list()
        for i in range(len(self.params)):
            updates.append(
                (self.params[i], self.params[i] - learning_rate * grad[i]))

        # Define train, valid and test models
        train_model = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_x[index * batch_size:(index + 1) * batch_size],
                self.y: train_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_model = theano.function(
            inputs=[index],
            outputs=self.error,
            givens={
                self.x: valid_x[index * batch_size:(index + 1) * batch_size],
                self.y: valid_y[index * batch_size:(index + 1) * batch_size]
            })

        test_model = theano.function(
            inputs=[index],
            outputs=self.error,
            givens={
                self.x: test_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_y[index * batch_size:(index + 1) * batch_size]
            })

        return (train_model, valid_model, test_model)
Beispiel #40
0
def train_CNN_mini_batch(learning_rate,
                         n_epochs,
                         num_kernels,
                         batch_size,
                         filter_size,
                         is_multi_scale,
                         num_of_classes,
                         height,
                         width,
                         use_interpolation,
                         use_hidden_layer):
    train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \
    train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \
        = load_processed_img_data()

    n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = theano.tensor.lscalar()
    x_by_1 = theano.tensor.ftensor4('x_by_1')
    x_by_2 = theano.tensor.ftensor4('x_by_2')
    x_by_4 = theano.tensor.ftensor4('x_by_4')

    y = theano.tensor.ivector('y')

    print '... initialize the model'

    cnn_dir = 'models/CNN_'
    if is_multi_scale is True:
        cnn_dir += 'M_'
    else:
        cnn_dir += 'S_'

    if use_hidden_layer is True:
        cnn_dir += 'H_'
    else:
        cnn_dir += 'L_'

    if use_interpolation is True:
        cnn_dir += 'I_'
    else:
        cnn_dir += 'N_'

    cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str(num_kernels[1]) + '_' + str(num_kernels[2]) + '_' + str(
        batch_size) + '_'
    curr_date = str(datetime.date.today())
    curr_date = curr_date.replace('-', '_')
    cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S'))

    print 'CNN model is ', cnn_dir

    if not os.path.exists(cnn_dir):
        os.makedirs(cnn_dir)

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(cnn_dir + '/log.txt', 'w')

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

    sys.stdout = Logger()

    layer0 = CNN_Layer(
        name='Layer_0',
        W=None,
        b=None,
        filter_shape=(num_kernels[0], 3, filter_size, filter_size),
    )

    layer1 = CNN_Layer(
        name='Layer_1',
        W=None,
        b=None,
        filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size),
    )

    layer2 = CNN_Layer(
        name='Layer_2',
        W=None,
        b=None,
        filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size),
    )

    layer3 = HiddenLayer(
        name='Layer_3',
        W=None,
        b=None,
        n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2],
        n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2,
        activation=theano.tensor.tanh
    )

    if is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 4
    elif is_multi_scale and not use_hidden_layer:
        layer4_in = num_kernels[2] * 3
    elif not is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 2
    else:
        layer4_in = num_kernels[2]

    layer4 = LogisticRegression(
        name='Layer_4',
        W=None,
        b=None,
        n_in=layer4_in,
        n_out=num_of_classes,
    )

    forward_propagation(
        layer0=layer0,
        layer1=layer1,
        layer2=layer2,
        layer3=layer3,
        layer4=layer4,
        x_by_1=x_by_1,
        x_by_2=x_by_2,
        x_by_4=x_by_4,
        num_kernels=num_kernels,
        batch_size=batch_size,
        filter_size=filter_size,
        is_multi_scale=is_multi_scale,
        height=height,
        width=width,
        use_interpolation=use_interpolation,
        use_hidden_layer=use_hidden_layer
    )

    if use_hidden_layer is True:
        L2_norm = (layer4.W ** 2).sum() + (layer3.W ** 2).sum() + (layer2.W ** 2).sum() + (layer1.W ** 2).sum() + (
        layer0.W ** 2).sum()
    else:
        L2_norm = (layer4.W ** 2).sum() + (layer2.W ** 2).sum() + (layer1.W ** 2).sum() + (layer0.W ** 2).sum()

    regularization = 0.00001
    cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm)

    if is_multi_scale is True:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1: test_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                x_by_2: test_set_x_by_2[index * batch_size: (index + 1) * batch_size],
                x_by_4: test_set_x_by_4[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width]
            }
        )
    else:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1: test_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width]
            }
        )

    if is_multi_scale is True:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1: valid_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                x_by_2: valid_set_x_by_2[index * batch_size: (index + 1) * batch_size],
                x_by_4: valid_set_x_by_4[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width]
            }
        )
    else:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1: valid_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size * height * width: (index + 1) * batch_size * height * width]
            }
        )

    if use_hidden_layer is True:
        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    else:
        params = layer4.params + layer2.params + layer1.params + layer0.params

    grads = theano.tensor.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    if is_multi_scale is True:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1: train_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                x_by_2: train_set_x_by_2[index * batch_size: (index + 1) * batch_size],
                x_by_4: train_set_x_by_4[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size * width * height: (index + 1) * batch_size * width * height]
            }
        )
    else:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1: train_set_x_by_1[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size * width * height: (index + 1) * batch_size * width * height]
            }
        )

    print '... training the model'
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    best_layer_0_W = numpy.zeros_like(layer0.W.get_value())
    best_layer_0_b = numpy.zeros_like(layer0.b.get_value())
    best_layer_1_W = numpy.zeros_like(layer1.W.get_value())
    best_layer_1_b = numpy.zeros_like(layer1.b.get_value())
    best_layer_2_W = numpy.zeros_like(layer2.W.get_value())
    best_layer_2_b = numpy.zeros_like(layer2.b.get_value())
    best_layer_3_W = numpy.zeros_like(layer3.W.get_value())
    best_layer_3_b = numpy.zeros_like(layer3.b.get_value())
    best_layer_4_W = numpy.zeros_like(layer4.W.get_value())
    best_layer_4_b = numpy.zeros_like(layer4.b.get_value())

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for mini_batch_index in xrange(n_train_batches):

            start = time.clock()
            iter = (epoch - 1) * n_train_batches + mini_batch_index
            cost_ij = train_model(mini_batch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, mini-batch %i/%i, validation error %f %%' %
                      (epoch, mini_batch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # save best filters
                    best_layer_0_W = layer0.W.get_value()
                    best_layer_0_b = layer0.b.get_value()
                    best_layer_1_W = layer1.W.get_value()
                    best_layer_1_b = layer1.b.get_value()
                    best_layer_2_W = layer2.W.get_value()
                    best_layer_2_b = layer2.b.get_value()
                    best_layer_3_W = layer3.W.get_value()
                    best_layer_3_b = layer3.b.get_value()
                    best_layer_4_W = layer4.W.get_value()
                    best_layer_4_b = layer4.b.get_value()

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, mini-batch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, mini_batch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

            print 'training @ iter = %d, time taken = %f' % (iter, (time.clock() - start))

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    if not os.path.exists(cnn_dir + '/params'):
        os.makedirs(cnn_dir + '/params')

    numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W)
    numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b)
    numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W)
    numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b)
    numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W)
    numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b)
    numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W)
    numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b)
    numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W)
    numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b)
    numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels)
    numpy.save(cnn_dir + '/params/filter_size.npy', filter_size)

    return cnn_dir
def sgd_optimize(learning_rate=0.1,
                 n_epochs=200,
                 batch_size=500,
                 nkerns=[20, 50]):
    # Load input
    train, valid, test = util.load()
    print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory"
    train_x, train_y = util.create_theano_shared(train)

    print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory"
    valid_x, valid_y = util.create_theano_shared(valid)

    print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory"
    test_x, test_y = util.create_theano_shared(test)

    # Define symbolic input matrices
    print "Building Model..."
    index = T.iscalar()
    x = T.matrix("x")
    y = T.ivector("y")
    random_generator = numpy.random.RandomState(1)

    # Create Layer0 of Lenet Model
    layer0_input = x.reshape( (batch_size, 1, 28, 28) )
    filter_shape0 = (nkerns[0], 1, 5, 5)
    image_shape0 = (batch_size, 1, 28, 28) 
    layer0 = LeNetConvPoolLayer(layer0_input, filter_shape0, image_shape0, random_generator)
    
    # Create Layer1 of Lenet model
    filter_shape1 = (nkerns[1], nkerns[0], 5, 5)
    image_shape1 = (batch_size, nkerns[0], 12, 12)
    layer1 = LeNetConvPoolLayer(layer0.output, filter_shape1, image_shape1, random_generator)

    # Create Layer2 which is a simple MLP hidden layer
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(layer2_input, nkerns[1] * 4 * 4, 500, random_generator)

    # Finally, Layer3 is LogisticRegression layer
    layer3 = LogisticRegression(layer2.output, 500, 10)

    # Define error
    error = layer3.error(y)

    # Create cost function
    cost = layer3.negative_log_likelihood(y)

    # Gradient and update functions
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, wrt=params)
    updates = list()
    for i in range(len(params)):
        updates.append( (params[i], params[i] - learning_rate * grads[i]) )

    # Train model
    train_model = theano.function(
                    inputs=[index],
                    outputs=cost,
                    updates=updates,
                    givens = {
                       x: train_x[index*batch_size : (index+1)*batch_size],
                       y: train_y[index*batch_size : (index+1)*batch_size]
                    })

    # Valid model
    valid_model = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens = {
                       x: valid_x[index*batch_size : (index+1)*batch_size],
                       y: valid_y[index*batch_size : (index+1)*batch_size]
                    })
    
    # Test Model 
    test_model  = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens={
                       x: test_x[index*batch_size : (index+1)*batch_size],
                       y: test_y[index*batch_size : (index+1)*batch_size]
                    })

    # Create number of minibatches
    n_train_batches = train[0].shape[0] / batch_size
    n_valid_batches = valid[0].shape[0] / batch_size
    n_test_batches = test[0].shape[0] / batch_size

    # Finally, main loop for training
    util.train_test_model(n_epochs, train_model, valid_model, test_model,
                          n_train_batches, n_valid_batches, n_test_batches)
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from logistic_regression import LogisticRegression

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=1234)


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


re = LogisticRegression(lr=0.0001, n_iters=1000)
re.fit(X_train, y_train)
prediction = re.predict(X_test)

print('accuracy: ', accuracy(y_test, prediction))
for i in range(len(prediction)):
    print(y_test[i], prediction[i])
Beispiel #43
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None,
                                                 k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.scalar('lr')  # learning rate to used

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append(
                (param, param -
                 gparam * T.cast(learning_rate, dtype=theano.config.floatX)))

        train_fn = theano.function(
            inputs=[index, theano.Param(learning_rate, default=0.1)],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                test_set_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score
Beispiel #44
0
    count = np.zeros((len(X), VOCAB_SIZE))
    for i, indices in enumerate(X):
        for idx in indices:
            count[i, idx] += 1
    print("%.2f secs ==> Document-Term Matrix" % (time.time() - t0))

    t0 = time.time()
    X = tfidf.fit_transform(count)
    print("%.2f secs ==> TF-IDF transform" % (time.time() - t0))
    return X


if __name__ == '__main__':
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(
        num_words=VOCAB_SIZE)

    tfidf = TfidfTransformer()
    X_train = transform(X_train, tfidf)
    X_test = transform(X_test, tfidf)

    model = LogisticRegression(VOCAB_SIZE, 2)
    model.fit(X_train,
              y_train,
              n_epoch=2,
              batch_size=32,
              val_data=(X_test, y_test))
    y_pred = model.predict(X_test)

    final_acc = (y_pred == y_test).mean()
    print("final testing accuracy: %.4f" % final_acc)
Beispiel #45
0
def evaluate_model(learning_rate=0.001,
                   n_epochs=100,
                   nkerns=[16, 40, 50, 60],
                   batch_size=20):
    """ 
    Network for classification of MNIST database

    :type learning_rate: float
    :param learning_rate: this is the initial learning rate used
                            (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_size: the batch size for training
    """

    print("Evaluating model")

    rng = numpy.random.RandomState(23455)

    # loading the data1
    datasets = load_test_data(3)

    valid_set_x, valid_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    loaded_params = numpy.load('../saved_models/model3.npy')
    layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('Building the model...')

    # Reshape matrix of rasterized images of shape (batch_size, 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (32, 32) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 64, 88))

    # Construct the first convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = MyConvPoolLayer(rng,
                             input=layer0_input,
                             image_shape=(batch_size, 1, 64, 88),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[0], 1, 5, 5),
                             poolsize=(2, 2),
                             W=layer0_W,
                             b=layer0_b)

    # Construct the second convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = MyConvPoolLayer(rng,
                             input=layer0.output,
                             image_shape=(batch_size, nkerns[0], 32, 44),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[1], nkerns[0], 5, 5),
                             poolsize=(2, 2),
                             W=layer1_W,
                             b=layer1_b)

    # Construct the third convolutional pooling layer
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
    layer2 = MyConvPoolLayer(rng,
                             input=layer1.output,
                             image_shape=(batch_size, nkerns[1], 16, 22),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[2], nkerns[1], 5, 5),
                             poolsize=(2, 2),
                             W=layer2_W,
                             b=layer2_b)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
    # or (500, 20 * 4 * 4) = (500, 320) with the default values.
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 8 * 11,
                         n_out=800,
                         activation=T.tanh,
                         W=layer3_W,
                         b=layer3_b)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output,
                                n_in=800,
                                n_out=6,
                                W=layer4_W,
                                b=layer4_b)

    cost = layer4.negative_log_likelihood(y)

    predicted_output = layer4.y_pred

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    val_model_preds = theano.function(
        [index],
        layer4.prediction(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    val_preds = [val_model_preds(i) for i in range(n_valid_batches)]

    #print(val_preds)
    #preds = numpy(val_preds)

    preds = []
    for pred in val_preds:
        for p in pred:
            preds.append(p)

    #preds = val_preds.reshape(valid_set_x.get_value(borrow=True).shape[0])

    actual_labels = load_test_data(2, 2)
    n = len(actual_labels)

    confusion_matrix = numpy.zeros((6, 6))

    for i in range(n):
        confusion_matrix[int(actual_labels[i])][preds[i]] += 1

    print(confusion_matrix)

    correct = 0.0
    for i in range(n):
        if (preds[i] == int(actual_labels[i])):
            correct += 1.0

    accuracy = correct / n
    print("Number of correctly classified : ", correct)
    print("Test accuracy is", accuracy * 100)
    file_num = 80
    motif_num = 10
    data_size = feature_list.shape[0]

    input_size = feature_list.shape[1]
    output_size = motif_num

    W = load_features(data_path, 'W_1.txt')

    makeFolder()

    # label = numpy.zeros((data_size, output_size))

    # for i in xrange(data_size):
    #     index = i / file_num
    #     label[i][index] = 1

    # LR = LogisticRegression(feature_list, label, input_size, output_size, data_size, fine_tune_lr)
    LR = LogisticRegression(feature_list, None, input_size, output_size, data_size, fine_tune_lr)
    LR.W = W
    for i in xrange(fine_tune_epoch):
        print 'epoch: ' + str(i)
        LR.fine_tune()
    # output_list = LR.predict(feature_list)
    # output_list = LR.predict_direct(feature_list)
    output_list = LR.predict_sigmoid(feature_list)

    saveW(LR.getW(), 'LR_after_train')
    saveFeatures(output_list, 'LR_judge.txt')
    # saveFeatures(label, 'label.txt')
Beispiel #47
0
# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescaled labels {-1, 1}
rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = DecisionTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel)
lda = LDA()

# ........
#  TRAIN
# ........
print "Training:"
print "\tAdaboost"
adaboost.fit(X_train, rescaled_y_train)
print "\tNaive Bayes"
naive_bayes.fit(X_train, y_train)
Beispiel #48
0
def evaluate_cnn(image_shape=[32],
                 channels=3,
                 nkerns=[64, 128],
                 filter_shapes=[5, 5],
                 hidden_layer=[1024],
                 outputs=10,
                 pools=[2, 2],
                 dropouts=[0.1, 0.25, 0.5],
                 learning_rate=0.1,
                 momentum=0.5,
                 n_epochs=2000,
                 minibatch_size=1024):

    rng = np.random.RandomState(12345)

    # calculate shapes at each CNN layer
    for i in range(len(filter_shapes)):
        if (image_shape[-1] - filter_shapes[i] + 1) % pools[i] != 0:
            return -1
        image_shape = image_shape + [
            (image_shape[-1] - filter_shapes[i] + 1) // pools[i]
        ]

    # specify shape of filters
    shapes = [(nkerns[0], channels, filter_shapes[0], filter_shapes[0]),
              (nkerns[1], nkerns[0], filter_shapes[1], filter_shapes[1]),
              (nkerns[1] * image_shape[-1]**2, hidden_layer[0]),
              (hidden_layer[0], outputs)]

    # load parameters
    paramDataManager = ParamDataManager(image_shape, channels, nkerns,
                                        filter_shapes, hidden_layer, outputs,
                                        pools, dropouts, momentum,
                                        learning_rate, n_epochs,
                                        minibatch_size)
    toLoadParameters = False  # Not loading parameters now
    toSaveParameters = True
    paramData = [None] * 8
    if toLoadParameters:
        paramData, shapeData = paramDataManager.loadData()
        shapeMatched = True
        for i in range(len(shapes)):
            if (shapes[-i - 1] != shapeData[2 * i]):
                paramData[2 * i] = None
                paramData[2 * i + 1] = None
                print(".. Shape problem for %d .." % (2 * i), shapes[-i],
                      shapeData[2 * i])
                shapeMatched = False
            else:
                print('... Data loaded for layer %d ...' % i)
        if (shapeMatched == False):
            print('... Shape did not match ...')

    #######################
    # Variables for model #
    #######################

    x = T.matrix('x')
    y = T.ivector('y')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    layer0_input = x.reshape(
        (minibatch_size, channels, image_shape[0], image_shape[0]))

    ######################
    #     TRAIN AREA     #
    ######################
    # Construct the first convolutional pooling layer:
    layer0 = ConvPoolLayer(rng,
                           input=layer0_input,
                           image_shape=(minibatch_size, channels,
                                        image_shape[0], image_shape[0]),
                           filter_shape=shapes[0],
                           poolsize=(pools[0], pools[0]),
                           activation=T.nnet.relu,
                           dropout=dropouts[0],
                           W=paramData[6],
                           b=paramData[7])

    # Construct the second convolutional pooling layer
    layer1 = ConvPoolLayer(rng,
                           input=layer0.output,
                           image_shape=(minibatch_size, nkerns[0],
                                        image_shape[1], image_shape[1]),
                           filter_shape=shapes[1],
                           poolsize=(pools[1], pools[1]),
                           activation=T.nnet.relu,
                           dropout=dropouts[1],
                           W=paramData[4],
                           b=paramData[5])

    # the HiddenLayer being fully-connected, it operates on 2D matrices of

    layer2_input = layer1.output.flatten(2)  # shape = (7*7*128 , 64)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=shapes[2][0],
                         n_out=shapes[2][1],
                         activation=T.nnet.relu,
                         dropout=dropouts[2],
                         W=paramData[2],
                         b=paramData[3])

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output,
                                n_in=shapes[3][0],
                                n_out=shapes[3][1],
                                W=paramData[0],
                                b=paramData[1])

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params
    velocity = []
    for i in range(len(params)):
        velocity = velocity + [
            theano.shared(T.zeros_like(params[i]).eval(), borrow=True)
        ]

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(velocity_i, momentum * velocity_i + learning_rate * grad_i)
               for velocity_i, grad_i in zip(velocity, grads)]
    updates = updates + [(param_i, param_i - velocity_i)
                         for param_i, velocity_i in zip(params, velocity)]

    train_model = theano.function(
        [x, y],
        cost,
        updates=updates,
    )

    ######################
    #     TEST AREA      #
    ######################
    # Test layer 0
    layer0_test_input = x.reshape(
        (minibatch_size, channels, image_shape[0], image_shape[0]))

    # Test layer 0
    layer0_test_output = convPoolLayerTest(
        input=layer0_test_input,
        image_shape=(minibatch_size, channels, image_shape[0], image_shape[0]),
        filter_shape=shapes[0],
        poolsize=(pools[0], pools[0]),
        activation=T.nnet.relu,
        W=layer0.params[0],
        b=layer0.params[1])

    # Test layer 1
    layer1_test_output = convPoolLayerTest(
        input=layer0_test_output,
        image_shape=(minibatch_size, nkerns[0], image_shape[1],
                     image_shape[1]),
        filter_shape=shapes[1],
        poolsize=(pools[1], pools[1]),
        activation=T.nnet.relu,
        W=layer1.params[0],
        b=layer1.params[1])

    # the test HiddenLayer
    layer2_test_input = layer1_test_output.flatten(2)

    # test fully-connected sigmoidal layer
    layer2_test_output = hiddenLayerTest(input=layer2_test_input,
                                         activation=T.nnet.relu,
                                         W=layer2.params[0],
                                         b=layer2.params[1])

    # test the fully-connected sigmoidal layer
    y_pred = logisticRegressionTest(input=layer2_test_output,
                                    W=layer3.params[0],
                                    b=layer3.params[1])

    # function to validation scores
    validate_model = theano.function([x, y], classificationErrors(y_pred, y))

    # create a function to compute test scores
    test_model = theano.function([x, y], classificationErrors(y_pred, y))

    #########################
    # TRAIN CONFIGURATION   #
    #########################

    patience = 10000
    patience_increase = 2

    improvement_threshold = 0.995
    momentum_limit = 0.9

    # Initialize training variables
    epoch = 0
    done_looping = False
    minibatch_iteration = 0

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    # Initialize sample loader for loading train, val, test samples
    sampleLoader = SampleLoader()

    validation_frequency = min(sampleLoader.n_train_batches, patience // 2)
    #validation_frequency = 10
    ############
    # TRAINING #
    ############
    print "Training ..."

    while (epoch < n_epochs) and (not done_looping):
        #sys.stdout.flush()
        epoch = epoch + 1
        learning_rate = learning_rate * 0.99
        momentum = momentum + (momentum_limit - momentum) / 32
        print('Learning rate = %f, Momentum = %f' % (learning_rate, momentum))

        train_batch_data = sampleLoader.loadNextTrainBatch()
        print train_batch_data[0].shape.eval()

        while train_batch_data is not None:
            train_x, train_y = train_batch_data
            train_x = train_x.get_value()
            train_y = train_y.eval()
            n_minibatches = train_x.shape[0] / minibatch_size
            print type(n_minibatches)
            for minibatch_index in range(n_minibatches):
                minibatch_iteration += 1
                x = train_x[minibatch_index *
                            minibatch_size:(minibatch_index + 1) *
                            minibatch_size].reshape((-1, train_x.shape[-1]))
                y = train_y[minibatch_index *
                            minibatch_size:(minibatch_index + 1) *
                            minibatch_size]
                print "minibatch_iteration ", minibatch_iteration
                cost_minibatch = train_model(x, y)
                print cost_minibatch
                # Validate with a frequency of validation_frequency
                if minibatch_iteration % validation_frequency == 0:
                    validation_loss = get_validation_loss(
                        sampleLoader, validate_model, minibatch_size)
                    # if we got the best validation score until now
                    print "validation_loss: ", validation_loss, " validation_loss: ", best_validation_loss
                    if validation_loss < best_validation_loss:

                        #improve patience if loss improvement is good enough
                        if validation_loss < best_validation_loss * improvement_threshold:
                            patience = max(
                                patience,
                                minibatch_iteration * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = validation_loss
                        best_iter = minibatch_iteration
                        """
                            Check for overfitting logic here
                        """
                        # compute test loss
                        test_loss = get_test_loss(sampleLoader, test_model,
                                                  minibatch_size)
                        print
                        print "validation loss improved!"
                        print
                        print "validation_loss: ", validation_loss, " test_loss: ", test_loss
                        if toSaveParameters:
                            paramDataManager.saveData(params)

                if patience <= minibatch_iteration:
                    done_looping = True
                    break

            train_batch_data = sampleLoader.loadNextTrainBatch()

    end_time = timeit.default_timer()
    print "Training complete."
    print "Best Validation Score: ", best_validation_loss, " obtained at ", best_iter, " With test score ", test_score
    print "Program ran for ", ((end_time - start_time) / 60), "m"
    return (best_validation_loss, test_score,
            paramDataManager.getParamDataAddress())
def run_models_with_cross_validation(num_classes=2, learning_rate=0.5):

    #GET DATA
    #- expect data_0 ... data_4
    data_groups = list()
    data_type = 'int'
    data_groups.append(FileManager.get_csv_file_data_array(
        'data_0', data_type))
    data_groups.append(FileManager.get_csv_file_data_array(
        'data_1', data_type))
    data_groups.append(FileManager.get_csv_file_data_array(
        'data_2', data_type))
    data_groups.append(FileManager.get_csv_file_data_array(
        'data_3', data_type))
    data_groups.append(FileManager.get_csv_file_data_array(
        'data_4', data_type))

    NUM_GROUPS = len(data_groups)

    #For each data_group, train on all others and test on me
    model1_culminating_result = 0
    model2_culminating_result = 0
    model1_final_average_result = 0
    model2_final_average_result = 0

    for test_group_id in range(NUM_GROUPS):
        print()
        #Form training data as 4/5 data
        train_data = list()
        for train_group_id in range(len(data_groups)):
            if (train_group_id != test_group_id):
                #Initialize train_data if necessary
                if (len(train_data) == 0):
                    train_data = data_groups[train_group_id]
                else:
                    train_data = train_data + data_groups[train_group_id]

        print('train_data group', str(test_group_id), 'length: ',
              len(train_data))
        #print(train_data)

        test_data = data_groups[test_group_id]

        model1_result = 0
        model2_result = 0
        model1 = NaiveBayes(num_classes)
        model2 = LogisticRegression(pd.DataFrame(train_data))
        model1.train(train_data)
        model2.train(pd.DataFrame(train_data), learning_rate)
        print_classifications = False
        if (test_group_id == 0
            ):  #Required to print classifications for one fold
            print_classifications = True
        model1_result = model1.test(
            test_data,
            print_classifications)  # returns (attempts, fails, success)
        #print('result:', result)
        model1_accuracy = (model1_result[2] / model1_result[0]) * 100
        print('Naive Bayes Accuracy (%):', model1_accuracy)
        model2_result = model2.test(
            pd.DataFrame(test_data),
            print_classifications)  # returns (% accuracy)
        print('Logistic Regression Accuracy (%):', model2_result)
        model1_culminating_result = model1_culminating_result + model1_accuracy
        model2_culminating_result = model2_culminating_result + model2_result

    model1_final_average_result = model1_culminating_result / NUM_GROUPS
    model2_final_average_result = model2_culminating_result / NUM_GROUPS
    #print()
    #print('final average result:')
    #print(final_average_result)
    #print()

    return (model1_final_average_result, model2_final_average_result)
Beispiel #50
0
def test_weight_dimension():
    from logistic_regression import LogisticRegression
    model = LogisticRegression(input_dimensions=2)
    assert model.weights.ndim == 2 and model.weights.shape[
        0] == 3 and model.weights.shape[1] == 1
from logistic_regression import LogisticRegression
import numpy as np
from sklearn import svm, datasets

# import some data to play with
iris = datasets.load_iris()

# Take the first two features. We could avoid this by using a two-dim dataset
X = iris.data[:, :2]
y = iris.target
lr = LogisticRegression(method='OneVsAll')
lr.fit(X, y)
H = lr.predict(X)

print("Training Accuracy : " + str(float(np.sum(H == y)) / y.shape[0]))
    neg_label = 'Not Admitted'
    xlabel = 'Exam 1 Score'
    ylabel = 'Exam 2 Score'
    title = 'Admission Based on Exam Scores'

    data = load_data('ex2data1.txt')
    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    plot_data(data, xlabel, ylabel, title, pos_label, neg_label)

    X.insert(0, 'ones', 1)
    X = X.to_numpy()
    y = y.to_numpy().reshape((100, 1))
    theta = np.zeros((X.shape[1], 1))
    iterations = 2000
    alpha = 0.00001

    classifier = LogisticRegression()
    gradient, cost_history = classifier.gradient_descent(
        X, y, theta, iterations, alpha)

    plot_computeCost(cost_history, iterations)

    predictions = classifier.predict(X, gradient)
    correct = [
        1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0
        for (a, b) in zip(predictions, y)
    ]
    accuracy = sum(correct) % len(correct)
    print('Accuracy: {0}%'.format(accuracy))
Beispiel #53
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES,
                 hidden_layers_sizes=[1024, 1024], n_phn=62 * 3, n_spkr=1,
                 rho=0.90, eps=1.E-6):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        #self._rho = shared(numpy.cast['float32'](rho), name='rho')  # for adadelta
        #self._eps = shared(numpy.cast['float32'](eps), name='eps')  # for adadelta
        self._rho = rho
        self._eps = eps
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.fmatrix('x')  # the data is presented as rasterized images
        self.y_phn = T.ivector('y_phn')  # the labels are presented as 1D vector
                                 # of [int] labels
        self.y_spkr = T.ivector('y_spkr')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)
            self._accugrads.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
            self._accudeltas.extend([shared(value=numpy.zeros((input_size, hidden_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((hidden_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

            # Construct an RBM that shared weights with this layer
            if i == 0:
                rbm_layer = GRBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayerPhn = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_phn)
        self.params.extend(self.logLayerPhn.params)
        self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
        self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_phn), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_phn, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO
        self.logLayerSpkr = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_spkr)
        self.params.extend(self.logLayerSpkr.params)
        self._accugrads.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accugrad_b', borrow=True)]) # TODO
        self._accudeltas.extend([shared(value=numpy.zeros((hidden_layers_sizes[-1], n_spkr), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_spkr, ), dtype='float32'), name='accudelta_b', borrow=True)]) # TODO

        self.finetune_cost_sum_phn = self.logLayerPhn.negative_log_likelihood_sum(self.y_phn)
        self.finetune_cost_sum_spkr = self.logLayerSpkr.negative_log_likelihood_sum(self.y_spkr)
        self.finetune_cost_phn = self.logLayerPhn.negative_log_likelihood(self.y_phn)
        self.finetune_cost_spkr = self.logLayerSpkr.negative_log_likelihood(self.y_spkr)

        self.errors_phn = self.logLayerPhn.errors(self.y_phn)
        self.errors_spkr = self.logLayerSpkr.errors(self.y_spkr)

    def get_SGD_trainer(self):
        """ Returns a plain SGD minibatch trainer with learning rate as param.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * learning_rate 

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_adadelta_trainer(self):
        """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y_phn = T.ivector('batch_y_phn')
        batch_y_spkr = T.ivector('batch_y_spkr')
        cost_phn = self.finetune_cost_sum_phn
        cost_spkr = self.finetune_cost_sum_spkr
        # compute the gradients with respect to the model parameters
        gparams_phn = T.grad(cost_phn, self.params[:-2])
        gparams_spkr = T.grad(cost_spkr, self.params[:-4] + self.params[-2:])

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads[:-2],
                self._accudeltas[:-2], self.params[:-2], gparams_phn):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad
        for accugrad, accudelta, param, gparam in zip(self._accugrads[:-4] + self._accugrads[-2:], self._accudeltas[:-4] + self._accudeltas[-2:], self.params[:-4] + self.params[-2:], gparams_spkr):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y_phn),
            theano.Param(batch_y_spkr)],
            outputs=(cost_phn, cost_spkr),
            updates=updates,
            givens={self.x: batch_x, self.y_phn: batch_y_phn, self.y_spkr: batch_y_spkr})

        return train_fn

    def get_adadelta_trainers(self):
        """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y_phn = T.ivector('batch_y_phn')
        batch_y_spkr = T.ivector('batch_y_spkr')
        #cost_phn = self.finetune_cost_sum_phn
        cost_phn = self.finetune_cost_phn
        #cost_spkr = self.finetune_cost_sum_spkr
        cost_spkr = self.finetune_cost_spkr
        # compute the gradients with respect to the model parameters
        gparams_phn = T.grad(cost_phn, self.params[:-2])
        gparams_spkr = T.grad(cost_spkr, self.params[:-4] + self.params[-2:])

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads[:-2],
                self._accudeltas[:-2], self.params[:-2], gparams_phn):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad
        train_fn_phn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y_phn)],
            outputs=cost_phn,
            updates=updates,
            givens={self.x: batch_x, self.y_phn: batch_y_phn})

        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads[:-4] + self._accugrads[-2:], self._accudeltas[:-4] + self._accudeltas[-2:], self.params[:-4] + self.params[-2:], gparams_spkr):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad
        train_fn_spkr = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y_spkr)],
            outputs=cost_spkr,
            updates=updates,
            #givens={self.x: batch_x[20:24,:], self.y_spkr: batch_y_spkr[20:24]})
            givens={self.x: batch_x, self.y_spkr: batch_y_spkr})

        return train_fn_phn, train_fn_spkr

    def train_only_classif(self):
        batch_x = T.fmatrix('batch_x')
        batch_y_phn = T.ivector('batch_y_phn')
        batch_y_spkr = T.ivector('batch_y_spkr')
        #cost_phn = self.finetune_cost_sum_phn
        cost_phn = self.finetune_cost_phn
        #cost_spkr = self.finetune_cost_sum_spkr
        cost_spkr = self.finetune_cost_spkr
        # compute the gradients with respect to the model parameters
        gparams_phn = T.grad(cost_phn, self.params[-4:-2])
        gparams_spkr = T.grad(cost_spkr, self.params[-2:])

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads[-4:-2],
                self._accudeltas[-4:-2], self.params[-4:-2], gparams_phn):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad
        train_fn_phn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y_phn)],
            outputs=cost_phn,
            updates=updates,
            givens={self.x: batch_x, self.y_phn: batch_y_phn})

        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads[-2:], self._accudeltas[-2:], self.params[-2:], gparams_spkr):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad
        train_fn_spkr = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y_spkr)],
            outputs=cost_spkr,
            updates=updates,
            #givens={self.x: batch_x[20:24,:], self.y_spkr: batch_y_spkr[20:24]})
            givens={self.x: batch_x, self.y_spkr: batch_y_spkr})

        return train_fn_phn, train_fn_spkr

    def score_classif(self, given_set):
        """ Returns functions to get current classification scores. """
        batch_x = T.fmatrix('batch_x')
        batch_y_phn = T.ivector('batch_y_phn')
        batch_y_spkr = T.ivector('batch_y_spkr')
        score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y_phn), theano.Param(batch_y_spkr)],
                outputs=(self.errors_phn, self.errors_spkr),
                givens={self.x: batch_x, self.y_phn: batch_y_phn, self.y_spkr: batch_y_spkr})

        # Create a function that scans the entire set given as input
        def scoref():
            return [score(batch_x, batch_y_phn, batch_y_spkr) for batch_x, batch_y_phn, batch_y_spkr in given_set]

        return scoref
                                          batch_size=batch_size,
                                          shuffle=False)

# * Hiển thị dữ liệu từ các pixel
# plt.imshow(features[100].reshape(28,28))
# plt.axis("off")
# plt.savefig('graph.png')
# plt.show()

# ! Xây dựng neuron network
# * Bộ dữ liệu vào gồm 28 * 28 pixel là thuộct tính
input_dim = 28 * 28
# * Đầu ra là tỉ lệ của mỗi lớp (10 lớp)
output_dim = 10
# * Khởi tạo model
model = LogisticRegression(input_dim, output_dim)
# * Cross entropy loss function
error = nn.CrossEntropyLoss()
# * SGD
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# * Chạy thuật toán với training set để tối ưu model
i = 0
loss_list = []
for epoch in range(epochs):
    for it, (feature, label) in enumerate(train_loader):
        train = Variable(feature.view(-1, 28 * 28))
        label = Variable(label)
        optimizer.zero_grad()
        predict = model(train)
Beispiel #55
0
@author: sandicalhoun
"""

import numpy as np
from util import read_file
import ffs
import tags
from logistic_regression import LogisticRegression


"""Import a small sample dataset and run calcgis. Export the output to a csv."""

data_sample, labels_sample = read_file('sample')

lr = LogisticRegression(method="collins", max_iters=1)

labels_proc = lr.preproclabels(labels_sample)

i = int(np.random.rand() * len(data_sample))
n = len(data_sample[i])
ws = np.random.rand(ffs.numJ)
x = data_sample[i]
y = labels_proc[i]

#lr.calcgis(ws, x, n)
print data_sample[i]
print labels_sample[i],y
print ws

lr.calcAs(x, n)
Beispiel #56
0
class RRNN(object):
    """Recurrent ReLU Neural Network
    """

    def __init__(self, numpy_rng, theano_rng=None, 
            n_ins=N_FEATURES * N_FRAMES,
            relu_layers_sizes=[1024, 1024, 1024],
            recurrent_connections=[2],  # layer(s), can only be i^t -> i^{t+1}
            n_outs=62 * 3,
            rho=0.9, eps=1.E-6):
        """ TODO 
        """

        self.relu_layers = []
        self.params = []
        self.n_layers = len(relu_layers_sizes)
        self._rho = rho  # ``momentum'' for adadelta
        self._eps = eps  # epsilon for adadelta
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta
        self.n_outs = n_outs

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.fmatrix('x')
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = relu_layers_sizes[i-1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.relu_layers[-1].output

            if i in recurrent_connections:
                inputr_size = relu_layers_sizes[i]
                previous_output = T.fmatrix('previous_output')
                relu_layer = RecurrentReLU(rng=numpy_rng,
                        input=layer_input, in_stack=previous_output,
                        n_in=input_size, n_in_stack=inputr_size,
                        n_out=inputr_size)
                #relu_layer.in_stack = relu_layer.output # TODO TODO TODO

                self.params.extend(relu_layer.params)
                self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)])
            else:
                relu_layer = ReLU(rng=numpy_rng,
                        input=layer_input,
                        n_in=input_size,
                        n_out=relu_layers_sizes[i])

                self.params.extend(relu_layer.params)
                self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)])

            self.relu_layers.append(relu_layer)


        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.relu_layers[-1].output,
            n_in=relu_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)])
        self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)])

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def get_SGD_trainer(self):
        """ Returns a plain SGD minibatch trainer with learning rate as param.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * learning_rate 

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_adadelta_trainer(self):
        """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and self._eps params.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads,
                self._accudeltas, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps) / (agrad + self._eps)) * gparam
            updates[accudelta] = self._rho * accudelta + (1 - self._rho) * dx * dx
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def get_adagrad_trainer(self):
        """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate to use
        cost = self.finetune_cost_sum
        # compute the gradients with respect to the model parameters
        gparams = T.grad(cost, self.params)

        # compute list of fine-tuning updates
        updates = OrderedDict()
        for accugrad, param, gparam in zip(self._accugrads, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = accugrad + gparam * gparam
            dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn

    def score_classif(self, given_set):
        """ Returns functions to get current classification scores. """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        score = theano.function(inputs=[theano.Param(batch_x), theano.Param(batch_y)],
                outputs=self.errors,
                givens={self.x: batch_x, self.y: batch_y})

        # Create a function that scans the entire set given as input
        def scoref():
            return [score(batch_x, batch_y) for batch_x, batch_y in given_set]

        return scoref
from matplotlib import pyplot as pp

from util import read_file
from logistic_regression import LogisticRegression


data, labels = read_file('../1571/train.txt')
data_train, data_valid, labels_train, labels_valid = \
    train_test_split(data, labels, test_size=0.3, random_state=0)

mus = list(10 ** x for x in range(-8, 2))

sgd_scores = []
for mu in mus:
    sgd_model = LogisticRegression(method="sgd", mu=mu, rate=0.1,
                                   decay=0.6, random_state=0)
    sgd_model.fit(data_train, labels_train)
    predicted = sgd_model.predict(data_valid)
    sgd_scores.append(accuracy_score(labels_valid, predicted))

pp.figure()
pp.xscale('log')
pp.scatter(mus, sgd_scores)
pp.xlabel('regularization strength')
pp.ylabel('accuracy')
pp.savefig('./sgd_regularization.png')


lbfgs_scores = []
for mu in mus:
    sgd_model = LogisticRegression(method="lbfgs", mu=mu, rate=0.1,
Beispiel #58
0
    def __init__(self, numpy_rng, theano_rng=None, 
            n_ins=N_FEATURES * N_FRAMES,
            relu_layers_sizes=[1024, 1024, 1024],
            recurrent_connections=[2],  # layer(s), can only be i^t -> i^{t+1}
            n_outs=62 * 3,
            rho=0.9, eps=1.E-6):
        """ TODO 
        """

        self.relu_layers = []
        self.params = []
        self.n_layers = len(relu_layers_sizes)
        self._rho = rho  # ``momentum'' for adadelta
        self._eps = eps  # epsilon for adadelta
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta
        self.n_outs = n_outs

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.fmatrix('x')
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = relu_layers_sizes[i-1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.relu_layers[-1].output

            if i in recurrent_connections:
                inputr_size = relu_layers_sizes[i]
                previous_output = T.fmatrix('previous_output')
                relu_layer = RecurrentReLU(rng=numpy_rng,
                        input=layer_input, in_stack=previous_output,
                        n_in=input_size, n_in_stack=inputr_size,
                        n_out=inputr_size)
                #relu_layer.in_stack = relu_layer.output # TODO TODO TODO

                self.params.extend(relu_layer.params)
                self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)])
            else:
                relu_layer = ReLU(rng=numpy_rng,
                        input=layer_input,
                        n_in=input_size,
                        n_out=relu_layers_sizes[i])

                self.params.extend(relu_layer.params)
                self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)])

            self.relu_layers.append(relu_layer)


        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.relu_layers[-1].output,
            n_in=relu_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)])
        self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)])

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Beispiel #59
0
    num = int(x.shape[0] * .7)
    x_cv = x[num : :, :]
    y_cv = y[num : :]
    x = x[0 : num, :]
    y = y[0 : num]

    # Feature scaling.
    x, mu, sigma = scale_data(x)
    x_cv = (x_cv - mu) / sigma

    # Use cross validation set to find the best lambda for regularization.
    C_candidates = [0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    lambda_ = 0
    best_accuracy = 0
    for C in C_candidates:
        clf = LogisticRegression(x, y, C)
        clf.learn()
        p_cv = clf.predict(x_cv)
        accuracy = (p_cv == y_cv).mean()
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            lambda_ = C
    print 'Best regularization parameter lambda: %f' % lambda_

    clf = LogisticRegression(x, y, lambda_)
    clf.learn()
    p = clf.predict(x)
    p_cv = clf.predict(x_cv)
    print 'Accuracy in training set: %f'% (p == y).mean()
    print 'Accuracy in cv: %f' %  (p_cv == y_cv).mean()
Beispiel #60
0
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

if __name__ == '__main__':

    raw_data = pd.read_csv('../data/train_binary.csv', header=0)
    data = raw_data.values

    imgs = data[0::, 1::]
    labels = data[::, 0]

    test_time = 10

    p = Perceptron()
    lr = LogisticRegression()

    writer = csv.writer(file('result.csv', 'wb'))

    for time in xrange(test_time):
        print 'iterater time %d' % time

        train_features, test_features, train_labels, test_labels = train_test_split(
            imgs, labels, test_size=0.33, random_state=23323)

        p.train(train_features, train_labels)
        lr.train(train_features, train_labels)

        p_predict = p.predict(test_features)
        lr_predict = lr.predict(test_features)