Ejemplo n.º 1
0
def test_mlp(learning_rate=0.01, L1_reg=0.0, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
    # get the datasets
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    # compute number of minibatches for training, validation & testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
    
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')
    
    index = T.lscalar() # index to minibatch
    x = T.matrix(name='x')
    y = T.ivector(name='y')
    rng = numpy.random.RandomState(1234)
    
    # MLP class
    classifier = MLP(rng, x, 28*28, n_hidden, 10)
    # cost
    cost = (classifier.negative_log_likelihood(y) + \
        L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr)
    # test function on minibatch
    test_model = theano.function(inputs=[index], outputs=classifier.errors(y), \
                                 givens={
                                     x: test_set_x[index*batch_size : (index+1)*batch_size],
                                     y: test_set_y[index*batch_size : (index+1)*batch_size]
                                 })
    # validation function on minibatch
    validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), \
                                     givens={
                                         x: valid_set_x[index*batch_size : (index+1)*batch_size],
                                         y: valid_set_y[index*batch_size : (index+1)*batch_size]
                                     })
    # gradient params
    gparams = [T.grad(cost, param) for param in classifier.params]
    # updates for training
    updates = [(param, param - learning_rate * gparam) \
               for param, gparam in zip(classifier.params, gparams)]
    # train model on minibatch
    train_model = theano.function(inputs=[index], outputs=cost, updates=updates,
                                  givens={
                                      x: train_set_x[index*batch_size : (index+1)*batch_size],
                                      y: train_set_y[index*batch_size : (index+1)*batch_size]
                                  })
    
    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                    # go through this many
                                    # minibatche before checking the network
                                    # on the validation set; in this case we
                                    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
Ejemplo n.º 2
0
                print('error: %f' % sum(error[(len(error) - 100):]))
                if seePred:
                    acc = float(correct) / (showProgress / batch_size)
                    print('accuracy: ' + str(acc))
                    correct = 0
            i += 1
    # end for loop: loop through all file
        if (k + 1) % 1 == 0:
            if doDrop:
                model.setDropoutOff()
                # redefine trainer since the argument has changed
            X = np.array(cv_feat_new, dtype=theano.config.floatX)
            Y = np.array(cv_y, dtype=theano.config.floatX)
            pred_max = theano.function([x], model.predict_max(x))
            #print(cv_y)
            cv_error = model.errors(
                X, np.array(np.where(Y == 1)[1], dtype=theano.config.floatX))
            cve.append(cv_error)
            if doDrop:
                model.setDropoutOn()
            print('.........................')
            print('CV error rate:' + str(cv_error))
            print('.........................')
    # manually interupt the model learning
    except KeyboardInterrupt:
        temp_epoch = k
        print("Ctrl+C detect")
        break
    # cross validation

# end for loop: epoch
t2 = time()
Ejemplo n.º 3
0
        # print(model.layers[2].mask)
        print('error: %f' % sum(error[ (len(error)-100) :]))
        if seePred:
          acc = float(correct) / (showProgress/batch_size)          
          print('accuracy: '+str(acc))
          correct = 0
      i += 1
  	# end for loop: loop through all file
    if (k+1) % 1 == 0:  
      if doDrop:
        model.setDropoutOff()
        # redefine trainer since the argument has changed
      X = np.array(cv_feat_new,dtype = theano.config.floatX)
      Y = np.array(cv_y,dtype = theano.config.floatX)
      #print(cv_y)
      cv_error = model.errors( X, np.array(np.where(Y==1)[1],dtype = theano.config.floatX) )
      if doDrop:
        model.setDropoutOn()
      print('.........................')
      print('CV error rate:' + str(cv_error))
      print('.........................')
  # manually interupt the model learning
  except KeyboardInterrupt:
    temp_epoch = k
    print("Ctrl+C detect")
    break
  # cross validation 
  

# end for loop: epoch  
t2 = time()
Ejemplo n.º 4
0
def trainLeNet(train_x, train_y, validation_x, validation_y, test_x, test_y,
               convolution_layer_size = None, rate = 0.1, batch_size = 500, n_epochs = 200):
    rng = np.random.RandomState(274563533)

    x = T.matrix('x')
    y = T.ivector('y')
    layer_0_input = x.reshape((batch_size, 1, 28, 28))

    layer_0 = LeNetConvPoolLayer(rng, input = layer_0_input,
                                 layer_shape = (convolution_layer_size[0], 1, 5, 5),
                                 input_shape = (batch_size, 1, 28, 28),
                                 pool_size = (2,2))
    layer_1 = LeNetConvPoolLayer(rng, input = layer_0.output,
                                 layer_shape = (convolution_layer_size[1], convolution_layer_size[0], 5, 5),
                                 input_shape = (batch_size, convolution_layer_size[0], 12, 12),
                                 pool_size = (2,2))

    MLP_input = layer_1.output.flatten(2)
    layer_final = MLP(MLP_input, convolution_layer_size[1] * 4 * 4, 500, 10)

    cost = layer_final.negativeLogLikelihood(y)
    error = layer_final.errors(y)

    index = T.lscalar('index')

    validation_model = function([index], error,
                                givens={x: validation_x[index * batch_size : (index + 1) * batch_size],
                                        y: validation_y[index * batch_size : (index + 1) * batch_size]})

    test_model = function([index], error,
                          givens={x: test_x[index * batch_size : (index + 1) * batch_size],
                                  y: test_y[index * batch_size : (index + 1) * batch_size]})

    params = layer_final.params + layer_1.params + layer_0.params
        #for param in params:
        #    pickle.dump(param, serial)

    param_grad = T.grad(cost, params)
    updates = [(p, p - rate * pg) for p, pg in zip(params, param_grad)]

    train_model = function([index], cost,
                           givens={x:train_x[index * batch_size : (index + 1) * batch_size],
                                   y:train_y[index * batch_size : (index + 1) * batch_size]},
                           updates = updates)

    n_train_batches = train_x.get_value().shape[0] // batch_size
    n_test_batches = test_x.get_value().shape[0] // batch_size
    n_validation_batches = validation_x.get_value().shape[0] // batch_size

    epoch = 0
    best_validation_cost = np.Inf
    patience = 1000000
    improvement_thread = 0.995
    patience_increase = 2
    validation_frequency = min(n_train_batches, patience / 2)
    loop_done = False
    while epoch <= n_epochs and not loop_done:
        epoch += 1
        for minibatch_index in range(n_train_batches):
            batch_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter

            if(iter + 1) % validation_frequency == 0:
                validation_losses = [validation_model(i) for i in range(n_validation_batches)]
                this_validation_loss = np.mean(validation_losses)
                print 'epoch %i, minibatch %i / %i, validation error %f %%' \
                      % (epoch, minibatch_index+1, n_train_batches, this_validation_loss * 100)
                if this_validation_loss < best_validation_cost:
                    with open('LeNet_params.pkl', 'w') as serial:
                        pickle.dump(params, serial)
                    if this_validation_loss < best_validation_cost * improvement_thread:
                        patience = max(patience, iter * patience_increase)
                    best_validation_cost = this_validation_loss
                    test_losses = [test_model(i) for i in range(n_test_batches)]#lkfanldnfaklfnklasnfklasnklfnalksdfnkl
                    test_loss = np.mean(test_losses)
                    print 'test error: %f %%'%(test_loss * 100)
            if patience <= iter:
                loop_done = True
                break
def evaluate_lenet5(learning_rate=0.33, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[32, 32, 32], batch_size=500):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, "int32")

    data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1")
    data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2")
    data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3")
    data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4")
    data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5")
    test = unpickle("cifar-10-batches-py/test_batch")

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate(
        (
            data_batch_1["labels"],
            data_batch_2["labels"],
            data_batch_3["labels"],
            data_batch_4["labels"],
            data_batch_5["labels"],
        )
    )

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)

    """whitening"""

    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""

    """whitening"""

    # Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28)
    # maxpooling reduces this further to (28/2, 28/2) = (14, 14)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14)
    layer0 = LeNetConvPoolLayer(
        rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10)
    # maxpooling reduces this further to (10/2, 10/2) = (5, 5)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 14, 14),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2),
    )

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4)
    # maxpooling reduces this further to (4/2, 4/2) = (2, 2)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2)

    layer2conv = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, nkerns[1], 5, 5),
        filter_shape=(nkerns[2], nkerns[1], 2, 2),
        poolsize=(2, 2),
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer3_input = layer2conv.output.flatten(2)
    print(layer3_input.shape)
    # construct a fully-connected sigmoidal layer

    layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 2 * 2, n_out=64, activation=relu)

    layer3_1 = MLP(rng, input=layer3.output, n_in=64, n_hidden=200, n_out=10)

    # classify the values of the fully-connected sigmoidal layer
    # layer4 = LogisticRegression(input=layer3_1.output, n_in=10, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.005
    L2_sqr_model = (
        (layer0.W ** 2).sum()
        + (layer1.W ** 2).sum()
        + (layer2conv.W ** 2).sum()
        + (layer3.W ** 2).sum()
        + (layer0.W ** 2).sum()
        + (layer3_1.L2_sqr ** 2).sum()
    )

    cost = layer3_1.negative_log_likelihood(y) + L2_reg * L2_sqr_model

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3_1.errors(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer3_1.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3_1.params + layer3.params + layer2conv.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        if epoch == 20:
            learning_rate.set_value(0.1)
        if epoch >= 21 and learning_rate.get_value() >= 0.1 * (0.9 ** 6):
            learning_rate.set_value(learning_rate.get_value() * 0.9)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname="epoc_cost.csv", X=epoch_loss_np, fmt="%1.3f")
            numpy.savetxt(fname="epoc_val_error.csv", X=epoch_val_np, fmt="%1.3f")

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print("training @ iter = ", iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print(
                    "epoch %i, minibatch %i/%i, validation error %f %%"
                    % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0)
                )
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(
                        ("     epoch %i, minibatch %i/%i, test error of " "best model %f %%")
                        % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0)
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print("Optimization complete.")
    print(
        "Best validation score of %f %% obtained at iteration %i, "
        "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0)
    )
    print(
        ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)),
        file=sys.stderr,
    )

    epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame(
        {"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]}
    )
    epoch_vall = pandas.DataFrame(
        {"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]}
    )
    epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(["epoch"]).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(["epoch"]).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]})
    epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]})
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind="line", x="epoch", y="val_error")
    plt.show()
Ejemplo n.º 6
0
def MLP_demo(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=1,
             n_hidden=309):
    datasets = load_multi()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building the model'

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    rng = np.random.RandomState(1234)

    classifier = MLP(rng, x, y, n_in=103, n_hidden=n_hidden, n_out=9)

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    cost, updates = classifier.get_cost_updates(learning_rate=learning_rate,
                                                L1_reg=L1_reg,
                                                L2_reg=L2_reg)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print '... training'

    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            train_model(minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:

                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 7
0
def MLP_demo(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, n_hidden=309):
    datasets = load_multi()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building the model'
    
    index = T.lscalar()  
    x = T.matrix('x')  
    y = T.ivector('y') 
    
    rng = np.random.RandomState(1234)

    classifier = MLP(rng, x, y, n_in=103, n_hidden=n_hidden, n_out=9)
    
    test_model = theano.function(inputs=[index],
                                 outputs=classifier.errors(),
                                 givens={x: test_set_x[index * batch_size:(index + 1) * batch_size],
                                         y: test_set_y[index * batch_size:(index + 1) * batch_size]})

    validate_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(),
                                     givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                             y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    cost, updates = classifier.get_cost_updates(learning_rate=learning_rate, L1_reg=L1_reg, L2_reg=L2_reg)
    train_model = theano.function(inputs=[index], 
                                  outputs=cost,
                                  updates=updates,
                                  givens={x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                          y: train_set_y[index * batch_size:(index + 1) * batch_size]})
    
    print '... training'

    patience = 10000  
    patience_increase = 2  
    improvement_threshold = 0.995  
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            train_model(minibatch_index)
            
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                     (epoch, minibatch_index + 1, n_train_batches,
                      this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                    done_looping = True
                    break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 8
0
class NeuralNet():
    """
        Attributes:
            features: Numpy array matrix that represents features
            targets: Numpy array matrix that represents the
    """
    def __init__(self,
                 n_hidden_units,
                 batch_size,
                 output_size,
                 metric_list="none",
                 learning_rate=1,
                 l1_term=0,
                 l2_term=0,
                 n_epochs=100,
                 activation_function='tanh',
                 train_p=.6,
                 dropout=False,
                 dropout_rate=.5,
                 momentum=False,
                 momentum_term=.9,
                 adaptive_learning_rate=False):
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  #
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        self.dropout = dropout
        self.dropout_rate = dropout_rate

        if metric_list == "none":
            self.metrics = {
                "F1": 0,
                "Accuracy": 0,
                "AUC": 0,
                "Precision": 0,
                "Recall": 0
            }
        else:
            self.metrics = metric_list

        self.learning_rate = learning_rate
        self.L1_reg = l1_term
        self.L2_reg = l2_term
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.train_percent = train_p

        #Define new ReLU activation function
        def relu(x):
            return T.switch(x < 0, 0, x)

        if activation_function == 'relu':
            self.activation_function = relu
        elif activation_function == 'tanh':
            self.activation_function = T.tanh
        elif activation_function == 'sigmoid':
            self.activation_function = T.nnet.sigmoid

        self.output_size = output_size
        self.hidden_layer_sizes = n_hidden_units
        self.n_epochs = n_epochs
        self.momentum = momentum
        self.momentum_term = momentum_term

    def train(self, x_input, y_input):
        """
        Demonstrate stochastic gradient descent optimization for a multilayer
        perceptron

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic gradient

        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)

        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: /datasets/ACEInhibitors_processed.csv

        """

        index = T.lscalar('index')  # index to a [mini]batch
        train_size = x_input.shape[0] * self.train_percent
        max_size = x_input.shape[0] - (x_input.shape[0] % 10)
        train_set_x = x_input[:train_size, :]
        train_set_y = y_input[:train_size]
        valid_set_x = x_input[(train_size + 1):max_size, :]
        valid_set_y = y_input[(train_size + 1):max_size]

        #compute number of minibatches for training, validation and testing
        n_train_batches = int(train_set_x.shape[0] / self.batch_size)
        n_valid_batches = int(valid_set_x.shape[0] / self.batch_size)
        #  n_test_batches = int(test_set_x.shape[0] / batch_size)

        number_in = train_set_x.shape[1]

        valid_set_x = theano.shared(valid_set_x, 'valid_set_x')
        valid_set_y = theano.shared(valid_set_y, 'valid_set_y')
        train_set_x = theano.shared(train_set_x, 'train_set_x')
        train_set_y = theano.shared(train_set_y, 'train_set_y')

        # start-snippet-4
        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically

        self.mlp = MLP(rng=numpy.random.RandomState(),
                       input=self.x,
                       n_in=number_in,
                       n_out=self.output_size,
                       a_function=self.activation_function,
                       n_hidden_sizes=self.hidden_layer_sizes,
                       dropout=self.dropout,
                       dropout_rate=self.dropout_rate)

        cost = (self.mlp.negative_log_likelihood(self.y) +
                self.L1_reg * self.mlp.L1 + self.L2_reg * self.mlp.L2_sqr)

        # end-snippet-4
        # compiling a Theano function that computes the mistakes that are made
        # by the model on a minibatch

        validate_model = theano.function(
            inputs=[index],
            outputs=self.mlp.errors(self.y),
            givens={
                self.x:
                valid_set_x[index * self.batch_size:(index + 1) *
                            self.batch_size],
                self.y:
                valid_set_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
            })

        training_errors = theano.function(
            inputs=[index],
            outputs=self.mlp.errors(self.y),
            givens={
                self.x:
                train_set_x[index * self.batch_size:(index + 1) *
                            self.batch_size],
                self.y:
                train_set_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
            })

        # start-snippet-5
        # compute the gradient of cost with respect to theta (sotred in params)
        # the resulting gradients will be stored in a list gparams
        parameter_gradients = [
            T.grad(cost, param) for param in self.mlp.params
        ]

        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs

        updates = []

        if self.momentum:
            delta_before = []
            for param_i in self.mlp.params:
                delta_before_i = theano.shared(
                    value=numpy.zeros(param_i.get_value().shape))
                delta_before.append(delta_before_i)

            for param, parameter_gradients, delta_before_i in zip(
                    self.mlp.params, parameter_gradients, delta_before):
                delta_i = -self.learning_rate * parameter_gradients + self.momentum_term * delta_before_i

                updates.append((param, param + delta_i))
                updates.append((delta_before_i, delta_i))
        else:
            for param, parameter_gradients in zip(self.mlp.params,
                                                  parameter_gradients):
                updates.append(
                    (param, param - self.learning_rate * parameter_gradients))

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * self.batch_size:(index + 1) *
                            self.batch_size],
                self.y:
                train_set_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
            })

        ###############
        # TRAIN MODEL #
        ###############
        print('... training')

        # early-stopping parameters
        patience = 1000000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
        # found
        improvement_threshold = 0.995  # a relative improvement of this much is
        # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

        best_validation_loss = numpy.inf
        best_iter = 0
        test_score = 0.
        start_time = timeit.default_timer()

        epoch = 0
        done_looping = False

        while (epoch < self.n_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in range(n_train_batches):

                minibatch_avg_cost = train_model(minibatch_index)
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    # compute zero-one loss on validation set
                    validation_losses = [
                        validate_model(i) for i in range(n_valid_batches)
                    ]
                    this_validation_loss = numpy.mean(validation_losses)

                    print(
                        'epoch %i, minibatch %i/%i, validation error %f %%, cost %f'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100., cost))

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        #improve patience if loss improvement is good enough
                        if (this_validation_loss <
                                best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)

                        best_validation_loss = this_validation_loss
                        best_iter = iter

                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))

                if patience <= iter:
                    done_looping = True
                    break

        end_time = timeit.default_timer()
        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print(sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                           ' ran for %.2fm' % ((end_time - start_time) / 60.)))

    def setup_labels(self, y):

        assert "There is no need to relabel if n_classes < 2 ", y < 2

        negative_example_label = 2

        #Transform matrices and relabel them for the neural network
        for i, yi in enumerate(y):
            if i > 0:
                negative_example_label = negative_example_label + 2
            positive_example_label = negative_example_label + 1

            relabeled_y = yi
            relabeled_y[relabeled_y == 0] = negative_example_label
            relabeled_y[relabeled_y == 1] = positive_example_label

            if i == 0:
                neural_net_y = relabeled_y
            else:
                neural_net_y = numpy.hstack((neural_net_y, relabeled_y))
        neural_net_y = numpy.intc(neural_net_y)
        return neural_net_y

    def test(self, x, y):
        prediction = self.predict(x)
        f1 = f1_score(y, prediction)
        precision = precision_score(y, prediction)
        recall = recall_score(y, prediction)
        auc = roc_auc_score(y, prediction)
        accuracy = accuracy_score(y, prediction)

        self.metrics["F1"] = f1
        self.metrics["Precision"] = precision
        self.metrics["Recall"] = recall
        self.metrics["AUC"] = auc
        self.metrics["Accuracy"] = accuracy

    def predict(self, x):
        #Create a theano shared variable for the input x: the data to be predicted
        test_set_x = theano.shared(x, 'test_set_x')
        input = test_set_x

        #Iterate over all the hidden layers in the MLP
        for i_hidden_layer, hidden_layer in enumerate(self.mlp.hidden_layers):
            hl_W = hidden_layer.W
            hl_b = hidden_layer.b

            if self.dropout:
                hl_W *= self.dropout_rate
            weight_matrix = self.activation_function(T.dot(input, hl_W) + hl_b)

            #Multiply the weights by the expected value of the dropout which is just the
            #dropoutrate so in most cases half the weights but only at test time

            input = weight_matrix

        #Get the weights and bias from the softmax output layer
        W = self.mlp.logRegressionLayer.W
        b = self.mlp.logRegressionLayer.b

        #compile the thenao function for calculating the outputs from the softmax layer
        get_y_prediction = theano.function(
            inputs=[],
            outputs=T.argmax(T.nnet.softmax(T.dot(weight_matrix, W) + b),
                             axis=1),
            on_unused_input='ignore',
        )
        return get_y_prediction()

    def transfer_learned_weights(self, x):
        a_function = self.activation_function

        final_hidden_layer = self.mlp.hidden_layers[-1]
        w = final_hidden_layer.W
        b = final_hidden_layer.b

        if self.dropout:
            transformation_function = theano.function(
                inputs=[],
                outputs=a_function(T.dot(x, (w * self.dropout_rate)) + b),
                on_unused_input='ignore',
            )
        else:
            transformation_function = theano.function(
                inputs=[],
                outputs=a_function(T.dot(x, w) + b),
                on_unused_input='ignore',
            )
        return transformation_function()

    def __str__(self):
        return "MLP:\nF1 Score: {}\nPrecision: {}\n" \
                 "Recall: {}\nAccuracy: {}\nROC: {}\n".format(self.metrics['F1'],
                                               self.metrics['Precision'],
                                               self.metrics['Recall'],
                                               self.metrics['Accuracy'],
                                              self.metrics['AUC'])
def evaluate_lenet5(learning_rate=0.33,
                    n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[32, 32, 32],
                    batch_size=500):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate(
        (train_set_1, train_set_2, train_set_3, train_set_4, train_set_5),
        axis=0)

    y_train = numpy.concatenate(
        (data_batch_1["labels"], data_batch_2["labels"],
         data_batch_3["labels"], data_batch_4["labels"],
         data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)
    """whitening"""
    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""
    """whitening"""

    #Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
                (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28)
    # maxpooling reduces this further to (28/2, 28/2) = (14, 14)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10)
    # maxpooling reduces this further to (10/2, 10/2) = (5, 5)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 14, 14),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4)
    # maxpooling reduces this further to (4/2, 4/2) = (2, 2)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2)

    layer2conv = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(batch_size, nkerns[1], 5, 5),
                                    filter_shape=(nkerns[2], nkerns[1], 2, 2),
                                    poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer3_input = layer2conv.output.flatten(2)
    print(layer3_input.shape)
    # construct a fully-connected sigmoidal layer

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 2 * 2,
                         n_out=64,
                         activation=relu)

    layer3_1 = MLP(rng, input=layer3.output, n_in=64, n_hidden=200, n_out=10)

    # classify the values of the fully-connected sigmoidal layer
    #layer4 = LogisticRegression(input=layer3_1.output, n_in=10, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.005
    L2_sqr_model = ((layer0.W**2).sum() + (layer1.W**2).sum() +
                    (layer2conv.W**2).sum() + (layer3.W**2).sum() +
                    (layer0.W**2).sum() + (layer3_1.L2_sqr**2).sum())

    cost = layer3_1.negative_log_likelihood(y) + L2_reg * L2_sqr_model

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3_1.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3_1.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3_1.params + layer3.params + layer2conv.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        if epoch == 20:
            learning_rate.set_value(0.1)
        if epoch >= 21 and learning_rate.get_value() >= 0.1 * (0.9**6):
            learning_rate.set_value(learning_rate.get_value() * 0.9)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list,
                                          newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list,
                                         newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np, fmt='%1.3f')
            numpy.savetxt(fname='epoc_val_error.csv',
                          X=epoch_val_np,
                          fmt='%1.3f')

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    epoch_loss_np = numpy.reshape(epoch_loss_list,
                                  newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list,
                                 newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame({
        "iter": epoch_loss_np[:, 0],
        "epoch": epoch_loss_np[:, 1],
        "cost": epoch_loss_np[:, 2]
    })
    epoch_vall = pandas.DataFrame({
        "iter": epoch_val_np[:, 0],
        "epoch": epoch_val_np[:, 1],
        "val_error": epoch_val_np[:, 2]
    })
    epoc_avg_loss = pandas.DataFrame(
        epoch_loss.groupby(['epoch']).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(
        epoch_vall.groupby(['epoch']).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({
        "epoch": epoc_avg_loss.index.values,
        "cost": epoc_avg_loss["cost"]
    })
    epoc_avg_loss_val = pandas.DataFrame({
        "epoch": epoc_avg_val.index.values,
        "val_error": epoc_avg_val["val_error"]
    })
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error")
    plt.show()
Ejemplo n.º 10
0
def test_pickle_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, 
	n_epochs=10, dataset='../data/mnist.pkl.gz', batch_size=20,
	pickle_file='/scratch/z/zhaolei/lzamparo/gpu_tests/mlp_results/MLP_pickle.pkl',n_hidden=500):
	""" Interrupt the training of an MLP, pickle the MLP object, unpickle, and continue """

	datasets = load_data(dataset)

	train_set_x, train_set_y = datasets[0]
	valid_set_x, valid_set_y = datasets[1]
	test_set_x, test_set_y = datasets[2]

	# compute number of minibatches for each set
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
	n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
	n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

	### Build the model ###
	print '... building the model'

	# allocate symbolic variables for the data
	index = T.lscalar()
	x = T.matrix('x')
	y = T.ivector('y')

	rng = numpy.random.RandomState(1234)

	# construct the MLP class
	classifier = MLP(rng = rng, input = x, n_in=28*28, n_hidden=n_hidden, n_out=10)

	# cost to be minimized
	cost = classifier.negative_log_likelihood(y) \
		+ L1_reg * classifier.L1 \
		+ L2_reg * classifier.L2_sqr

	# theano function that computes the mistakes made by the model on a minibatch
	test_model = theano.function(inputs=[index],
		outputs = classifier.errors(y),
		givens={
			x: test_set_x[index * batch_size:(index + 1) * batch_size],
			y: test_set_y[index * batch_size:(index + 1) * batch_size]})

	# theano function to validate the model
	validate_model = theano.function(inputs=[index],
		outputs = classifier.errors(y),
		givens = {
			x: valid_set_x[index * batch_size:(index + 1) * batch_size],
			y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

	# compute the gradient of the cost function w.r.t theta
	gparams = []
	for param in classifier.params:
		gparam = T.grad(cost, param)
		gparams.append(gparam)

	# build the list of parameter updates.  This consists of tuples of paramters and values
	updates = []

	for param, gparam in zip(classifier.params, gparams):
		updates.append((param, param - learning_rate * gparam))

	# compile a Theano function to return the cost, update the parameters based on the 
	# updates list
	train_model = theano.function(inputs=[index], outputs=cost,
		updates=updates,
		givens={
			x: train_set_x[index * batch_size:(index + 1) * batch_size],
			y: train_set_y[index * batch_size:(index + 1) * batch_size]})

	### train the model ###
	print '... training'

	# early-stopping parameters
	patience = 10000 		# look at this number of examples regardless
	patience_increase = 2 	# wait this many more epochs when a new best comes up
	improvement_threshold = 0.995	# a relative improvement threshold for significance

	validation_frequency = min(n_train_batches, patience / 2) 
		# train for this many minibatches before checking the model on the validation set

	best_params = None
	best_validation_loss = numpy.inf
	best_iter = 0
	test_score = 0.
	start_time = time.clock()

	epoch = 0
	done_looping = False
	halfway_point = n_epochs / 2

	while (epoch < halfway_point) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			minibatch_avg_cost = train_model(minibatch_index)
			iter = (epoch - 1) * n_train_batches + minibatch_index
		
			# do we validate?
			if (iter + 1) % validation_frequency == 0:
				validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)

				print('epoch %i, minibatch %i/%i, validation error %f %%' % 
					(epoch, minibatch_index + 1, n_train_batches,
					 this_validation_loss * 100.))

				if this_validation_loss < best_validation_loss:
					# increase patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss * \
							improvement_threshold:
						patience = max(patience, iter * patience_increase)

					best_validation_loss = this_validation_loss
					best_iter = iter
		
					# test on the test set
					test_losses = [test_model(i) for i in xrange(n_test_batches)]
					test_scores = numpy.mean(test_losses)

					print(('	epoch %i, minibatch %i/%i, test error of '
						'best model %f %%') %
						(epoch, minibatch_index + 1, n_train_batches,
						test_score * 100.))

			if patience <= iter:
				done_looping = True
				break

	end_time = time.clock()
	print(('Halfway point reached.  Best validation score of %f %% '
                   'obtained at iteration %i, with test performance %f %%') %
                   (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
                                                        os.path.split(__file__)[1] +
                                                        ' ran for %.2fm' % ((end_time - start_time) / 60.))
	
	print "Pickling model..."
	f = file(pickle_file, 'wb')
	cPickle.dump(classifier, f, protocol=cPickle.HIGHEST_PROTOCOL)
	f.close()
	
	print "Unpickling the model..."
	f = file(pickle_file, 'rb')
	unpickled_classifier = cPickle.load(f)
	unpickled_classifier.reconstruct_state(x, T.tanh)
	f.close()
	
	### Re-establish the cost, grad, parameter updates ###
	# cost to be minimized
	cost = unpickled_classifier.negative_log_likelihood(y) \
                + L1_reg * unpickled_classifier.L1 \
                + L2_reg * unpickled_classifier.L2_sqr

	# theano function that computes the mistakes made by the model on a minibatch
	test_model = theano.function(inputs=[index],
                outputs = unpickled_classifier.errors(y),
                givens={
                        x: test_set_x[index * batch_size:(index + 1) * batch_size],
                        y: test_set_y[index * batch_size:(index + 1) * batch_size]})

	# theano function to validate the model
	validate_model = theano.function(inputs=[index],
                outputs = unpickled_classifier.errors(y),
                givens = {
                        x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                        y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

	# compute the gradient of the cost function w.r.t theta
	gparams = []
	for param in unpickled_classifier.params:
		gparam = T.grad(cost, param)
		gparams.append(gparam)

	# build the list of parameter updates.  This consists of tuples of paramters and values
	updates = []

	for param, gparam in zip(unpickled_classifier.params, gparams):
		updates.append((param, param - learning_rate * gparam))	
	
	print(("Continue training for %i epochs ") % (n_epochs - epoch))
	start_time = time.clock()
	
	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			minibatch_avg_cost = train_model(minibatch_index)
			iter = (epoch - 1) * n_train_batches + minibatch_index
		
			# do we validate?
			if (iter + 1) % validation_frequency == 0:
				validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)

				print('epoch %i, minibatch %i/%i, validation error %f %%' % 
			                (epoch, minibatch_index + 1, n_train_batches,
			                 this_validation_loss * 100.))

				if this_validation_loss < best_validation_loss:
					# increase patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss * \
				                        improvement_threshold:
						patience = max(patience, iter * patience_increase)

					best_validation_loss = this_validation_loss
					best_iter = iter
		
					# test on the test set
					test_losses = [test_model(i) for i in xrange(n_test_batches)]
					test_scores = numpy.mean(test_losses)

					print(('	epoch %i, minibatch %i/%i, test error of '
				                'best model %f %%') %
				                (epoch, minibatch_index + 1, n_train_batches,
				                test_score * 100.))

			if patience <= iter:
				done_looping = True
				break

	end_time = time.clock()
	print(('End point reached.  Best validation score of %f %% '
                   'obtained at iteration %i, with test performance %f %%') %
                   (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
                                                        os.path.split(__file__)[1] +
                                                        ' ran for %.2fm' % ((end_time - start_time) / 60.))	
Ejemplo n.º 11
0
def train_model(filename):
    learning_rate = 0.05
    patience = 10000
    size = 1000
    batch = 100

    loader = DataLoader(filename, batch)
    rng = numpy.random.RandomState()


    print '... building the model'

    x = T.matrix('x')
    y = T.ivector('y')

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=12*12*5,
        n_hidden=size,
        n_out=12
    )

    cost = (
        classifier.negative_log_likelihood(y)
    )

    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    print '... training'

    for i in xrange(patience):
        ip, op = loader.get_data();

        test_model = theano.function(
            inputs=[],
            outputs=classifier.errors(y),
            givens={
                x: ip,
                y: op
            }
        )

        train_model = theano.function(
            inputs=[],
            outputs=cost,
            updates=updates,
            givens={
                x: ip,
                y: op
            }
        )
        before = test_model()
        train_model()
        after = test_model()

        print 100.0 * i / patience, '%', before, after


    W1 = classifier.params[0].get_value()
    b1 = classifier.params[1].get_value()

    W2 = classifier.params[2].get_value()
    b2 = classifier.params[3].get_value()

    W3 = classifier.params[4].get_value()
    b3 = classifier.params[5].get_value()


    out = open('W1.txt', 'w')
    print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W1])
    out.close()

    out = open('b1.txt', 'w')
    print >> out, '\n'.join(['%.6f'%item for item in b1])
    out.close()

    out = open('W2.txt', 'w')
    print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W2])
    out.close()

    out = open('b2.txt', 'w')
    print >> out, '\n'.join(['%.6f'%item for item in b2])
    out.close()

    out = open('W3.txt', 'w')
    print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W3])
    out.close()

    out = open('b3.txt', 'w')
    print >> out, '\n'.join(['%.6f'%item for item in b3])
    out.close()
Ejemplo n.º 12
0
class NeuralNet():
    """
        Attributes:
            features: Numpy array matrix that represents features
            targets: Numpy array matrix that represents the
    """
    def __init__(self, n_hidden_units, batch_size, output_size, metric_list="none", learning_rate=1, l1_term=0,
                 l2_term=0, n_epochs=100, activation_function='tanh', train_p=.6, dropout=False, dropout_rate=.5,
                 momentum=False, momentum_term=.9, adaptive_learning_rate=False):
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  #
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels
        self.dropout = dropout
        self.dropout_rate = dropout_rate

        if metric_list == "none":
            self.metrics = {"F1": 0, "Accuracy": 0, "AUC": 0, "Precision": 0, "Recall": 0}
        else:
            self.metrics = metric_list

        self.learning_rate = learning_rate
        self.L1_reg = l1_term
        self.L2_reg = l2_term
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.train_percent = train_p

        #Define new ReLU activation function
        def relu(x):
            return T.switch(x < 0, 0, x)

        if activation_function == 'relu':
            self.activation_function = relu
        elif activation_function == 'tanh':
            self.activation_function = T.tanh
        elif activation_function == 'sigmoid':
            self.activation_function = T.nnet.sigmoid

        self.output_size = output_size
        self.hidden_layer_sizes = n_hidden_units
        self.n_epochs = n_epochs
        self.momentum = momentum
        self.momentum_term = momentum_term


    def train(self, x_input, y_input):
        """
        Demonstrate stochastic gradient descent optimization for a multilayer
        perceptron

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic gradient

        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)

        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: /datasets/ACEInhibitors_processed.csv

        """


        index = T.lscalar('index')  # index to a [mini]batch
        train_size = x_input.shape[0] * self.train_percent
        max_size = x_input.shape[0] - (x_input.shape[0] % 10)
        train_set_x = x_input[:train_size, :]
        train_set_y = y_input[:train_size]
        valid_set_x = x_input[(train_size + 1 ):max_size, :]
        valid_set_y = y_input[(train_size + 1):max_size]

        #compute number of minibatches for training, validation and testing
        n_train_batches = int(train_set_x.shape[0] / self.batch_size)
        n_valid_batches = int(valid_set_x.shape[0] / self.batch_size)
      #  n_test_batches = int(test_set_x.shape[0] / batch_size)


        number_in = train_set_x.shape[1]

        valid_set_x = theano.shared(valid_set_x, 'valid_set_x')
        valid_set_y = theano.shared(valid_set_y, 'valid_set_y')
        train_set_x = theano.shared(train_set_x, 'train_set_x')
        train_set_y = theano.shared(train_set_y, 'train_set_y')

        # start-snippet-4
        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically

        self.mlp = MLP(
            rng= numpy.random.RandomState(),
            input=self.x,
            n_in = number_in,
            n_out=self.output_size,
            a_function = self.activation_function,
            n_hidden_sizes=self.hidden_layer_sizes,
            dropout=self.dropout,
            dropout_rate=self.dropout_rate
        )

        cost = (
            self.mlp.negative_log_likelihood(self.y)
            + self.L1_reg * self.mlp.L1
            + self.L2_reg * self.mlp.L2_sqr
        )

        # end-snippet-4
        # compiling a Theano function that computes the mistakes that are made
        # by the model on a minibatch

        validate_model = theano.function(
            inputs=[index],
            outputs=self.mlp.errors(self.y),
            givens={
                self.x: valid_set_x[index * self.batch_size:(index + 1) * self.batch_size],
                self.y: valid_set_y[index * self.batch_size:(index + 1) * self.batch_size]
            }
        )

        training_errors = theano.function(
            inputs=[index],
            outputs=self.mlp.errors(self.y),
            givens={
                self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size],
                self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size]
            }
        )

        # start-snippet-5
        # compute the gradient of cost with respect to theta (sotred in params)
        # the resulting gradients will be stored in a list gparams
        parameter_gradients = [T.grad(cost, param) for param in self.mlp.params]


        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs

        updates = []

        if self.momentum:
            delta_before=[]
            for param_i in self.mlp.params:
                delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape))
                delta_before.append(delta_before_i)

            for param, parameter_gradients, delta_before_i in zip(self.mlp.params, parameter_gradients, delta_before):
                delta_i = -self.learning_rate * parameter_gradients + self.momentum_term*delta_before_i

                updates.append((param, param + delta_i))
                updates.append((delta_before_i,delta_i))
        else:
            for param, parameter_gradients in zip(self.mlp.params, parameter_gradients):
                updates.append((param, param - self.learning_rate * parameter_gradients))



        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size],
                self.y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size]
            }
        )


        ###############
        # TRAIN MODEL #
        ###############
        print('... training')

        # early-stopping parameters
        patience = 1000000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
                               # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch

        best_validation_loss = numpy.inf
        best_iter = 0
        test_score = 0.
        start_time = timeit.default_timer()

        epoch = 0
        done_looping = False

        while (epoch < self.n_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in range(n_train_batches):

                minibatch_avg_cost = train_model(minibatch_index)
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    # compute zero-one loss on validation set
                    validation_losses = [validate_model(i) for i
                                         in range(n_valid_batches)]
                    this_validation_loss = numpy.mean(validation_losses)

                    print(
                        'epoch %i, minibatch %i/%i, validation error %f %%, cost %f' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100.,
                            cost

                        )
                    )

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                            patience = max(patience, iter * patience_increase)

                        best_validation_loss = this_validation_loss
                        best_iter = iter

                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))

                if patience <= iter:
                    done_looping = True
                    break

        end_time = timeit.default_timer()
        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print(sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.)))


    def setup_labels(self, y):

        assert "There is no need to relabel if n_classes < 2 ", y < 2

        negative_example_label = 2

        #Transform matrices and relabel them for the neural network
        for i, yi in enumerate(y):
            if i > 0:
                negative_example_label = negative_example_label+2
            positive_example_label = negative_example_label+1

            relabeled_y = yi
            relabeled_y[relabeled_y == 0] = negative_example_label
            relabeled_y[relabeled_y == 1] = positive_example_label

            if i == 0:
                neural_net_y = relabeled_y
            else:
                neural_net_y = numpy.hstack((neural_net_y, relabeled_y))
        neural_net_y = numpy.intc(neural_net_y)
        return neural_net_y

    def test(self, x, y):
        prediction = self.predict(x)
        f1 = f1_score(y, prediction)
        precision = precision_score(y, prediction)
        recall = recall_score(y, prediction)
        auc = roc_auc_score(y, prediction)
        accuracy = accuracy_score(y, prediction)

        self.metrics["F1"] = f1
        self.metrics["Precision"] = precision
        self.metrics["Recall"] = recall
        self.metrics["AUC"] = auc
        self.metrics["Accuracy"] = accuracy

    def predict(self, x):
        #Create a theano shared variable for the input x: the data to be predicted
        test_set_x = theano.shared(x, 'test_set_x')
        input = test_set_x


        #Iterate over all the hidden layers in the MLP
        for i_hidden_layer, hidden_layer in enumerate(self.mlp.hidden_layers):
            hl_W = hidden_layer.W
            hl_b = hidden_layer.b

            if self.dropout:
                hl_W *= self.dropout_rate
            weight_matrix = self.activation_function(T.dot(input, hl_W) + hl_b)

            #Multiply the weights by the expected value of the dropout which is just the
            #dropoutrate so in most cases half the weights but only at test time

            input = weight_matrix

        #Get the weights and bias from the softmax output layer
        W = self.mlp.logRegressionLayer.W
        b = self.mlp.logRegressionLayer.b

        #compile the thenao function for calculating the outputs from the softmax layer
        get_y_prediction = theano.function(
            inputs=[],
            outputs=T.argmax(T.nnet.softmax(T.dot(weight_matrix, W) + b), axis=1),
            on_unused_input='ignore',
        )
        return get_y_prediction()

    def transfer_learned_weights(self, x):
        a_function = self.activation_function

        final_hidden_layer = self.mlp.hidden_layers[-1]
        w = final_hidden_layer.W
        b = final_hidden_layer.b

        if self.dropout:
            transformation_function = theano.function(
                inputs=[],
                outputs=a_function(T.dot(x, (w * self.dropout_rate)) + b),
                on_unused_input='ignore',
            )
        else:
            transformation_function = theano.function(
                inputs=[],
                outputs=a_function(T.dot(x, w) + b),
                on_unused_input='ignore',
            )
        return transformation_function()
    def __str__(self):
        return "MLP:\nF1 Score: {}\nPrecision: {}\n" \
                 "Recall: {}\nAccuracy: {}\nROC: {}\n".format(self.metrics['F1'],
                                               self.metrics['Precision'],
                                               self.metrics['Recall'],
                                               self.metrics['Accuracy'],
                                              self.metrics['AUC'])
Ejemplo n.º 13
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=None, n_outs=(None, None),
                 continuous=False):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: tuple of ints
        :param n_outs: dimensions of the sigmoid layers of the network
        """

        if n_outs == (None, None):
            n_outs = (10, 10)
        if hidden_layers_sizes is None:
            hidden_layers_sizes = [500, 500]
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector

        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if continuous and i == 0:
                rbm_layer = CRBM(numpy_rng=numpy_rng,
                                 theano_rng=theano_rng,
                                 input=layer_input,
                                 n_visible=input_size,
                                 n_hidden=hidden_layers_sizes[i],
                                 W=sigmoid_layer.W,
                                 hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.topLayer = MLP(
            rng=numpy_rng,
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_hidden=n_outs[0],
            n_out=n_outs[1])
        self.params.extend(self.topLayer.params)

        # self.logLayer = LogisticRegression(
        #     input=self.sigmoid_layers[-1].output,
        #     n_in=hidden_layers_sizes[-1],
        #     n_out=n_outs)
        # self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.topLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.topLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        """Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        """

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:
            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            cost, updates = rbm.contrastive_divergence(learning_rate,
                                                       persistent=None, k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.In(learning_rate, value=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_function(self, train_x, train_y, batch_size,
                                learning_rate):
        """Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :param train_x: train dataset, theano.tensor.TensorType
        :param train_y: labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        """

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                    train_x[index * batch_size: (index + 1) * batch_size],
                self.y:
                    train_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        return train_fn

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                        index * batch_size: (index + 1) * batch_size
                        ],
                self.y: train_set_y[
                        index * batch_size: (index + 1) * batch_size
                        ]
            }
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                        index * batch_size: (index + 1) * batch_size
                        ],
                self.y: test_set_y[
                        index * batch_size: (index + 1) * batch_size
                        ]
            }
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                        index * batch_size: (index + 1) * batch_size
                        ],
                self.y: valid_set_y[
                        index * batch_size: (index + 1) * batch_size
                        ]
            }
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score

    def train(self, X, y, finetune_lr=1e-11, pretraining_epochs=0,
              pretrain_lr=0.01, k=1, training_epochs=5, batch_size=1000,
              verbose=False):
        """
        Train method.
        :param verbose: verbosity level
        :param X: data
        :param y: labels
        :type finetune_lr: float
        :param finetune_lr: learning rate used in the finetune stage
        :type pretraining_epochs: int
        :param pretraining_epochs: number of epoch to do pretraining
        :type pretrain_lr: float
        :param pretrain_lr: learning rate to be used during pre-training
        :type k: int
        :param k: number of Gibbs steps in CD/PCD
        :type training_epochs: int
        :param training_epochs: maximal number of iterations ot run
        the optimizer
        :type batch_size: int
        :param batch_size: the size of a minibatch
        """
        train_x, train_y = shared_dataset((X, y))
        print "Train set shape:", train_x.get_value(borrow=True).shape
        n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size

        print '... building the model'

        #########################
        # PRETRAINING THE MODEL #
        #########################
        print '... getting the pretraining functions'
        pretraining_fns = self.pretraining_functions(train_set_x=train_x,
                                                     batch_size=batch_size,
                                                     k=k)

        print '... pre-training the model'
        start_time = timeit.default_timer()
        # Pre-train layer-wise
        for i in range(self.n_layers):
            # go through pretraining epochs
            for epoch in range(pretraining_epochs):
                # go through the training set
                c = []
                for batch_index in range(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                lr=pretrain_lr))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)

        end_time = timeit.default_timer()
        # end-snippet-2
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % (
                                  (end_time - start_time) / 60.))
        ########################
        # FINETUNING THE MODEL #
        ########################

        # get the training function for the model
        print '... getting the finetuning function'
        train_fn = self.build_finetune_function(
            train_x=train_x,
            train_y=train_y,
            batch_size=batch_size,
            learning_rate=finetune_lr
        )

        print '... finetuning the model'
        epoch = 0
        while epoch < training_epochs:
            epoch += 1
            for minibatch_index in xrange(n_train_batches):
                minibatch_avg_cost = train_fn(minibatch_index)
            if verbose and epoch % verbose == 0:
                print "Epoch {0}, cost: {1}".format(epoch, minibatch_avg_cost)

        return self

    def predict(self, X):
        predict_fn = theano.function(inputs=[self.sigmoid_layers[0].input],
                                     outputs=self.topLayer.y_pred)

        return predict_fn(X)

    def predict_proba(self, X):
        predict_fn = theano.function(inputs=[self.sigmoid_layers[0].input],
                                     outputs=self.topLayer.p_y_given_x)

        return predict_fn(X)