コード例 #1
0
ファイル: moe.py プロジェクト: zermelozf/esn-lm
class MixtureOfExperts:
    """ The Mixture of Experts model"""
    
    def __init__(self, input_dim, nb_experts, output_dim):
        self.nb_experts = nb_experts
        self.output_dim = output_dim
        self.gates = LogisticRegression(input_dim, nb_experts)
        self.experts = [LogisticRegression(input_dim, output_dim) for k in range(nb_experts)]
    
    def pz_given_x(self, x):
        return self.gates.py_given_x(x)
        
    def py_given_x(self, x):
        pz = self.gates.py_given_x(x)
        py = np.zeros((x.shape[0], self.output_dim))
        for z in range(self.nb_experts):
            pzb = np.tile(np.expand_dims(pz[:, z], axis=1), (1, self.output_dim))
            py += pzb*self.experts[z].py_given_x(x)
        return py
    
    def py_given_xz(self, x, z):
        return self.experts[z].py_given_x(x)
    
    def lik_y_for_every_z(self, x, y):
        py = np.zeros((x.shape[0], self.nb_experts))
        for z in range(self.nb_experts):
            py[:, z] = np.sum(y*self.py_given_xz(x, z), axis=1)
        return py
    
    def pz_given_xy(self, x, y):
        pz_given_x = self.pz_given_x(x)
        lik_y_forallz = self.lik_y_for_every_z(x, y)
        pz_given_xy = lik_y_forallz*pz_given_x
        renorm = np.tile(np.expand_dims(np.sum(pz_given_xy, axis=1), axis=1), (1, pz_given_xy.shape[1]))
        pz_given_xy = pz_given_xy/renorm
        return pz_given_xy
        
    
    def sample_y_given_x(self,x):
        py = self.py_given_x(x)
        y = np.array([np.random.multinomial(1,py[i,:]) for i in range(x.shape[0])])
        return y
    
    def log_likelihood(self, x, y):
        lik_y = self.lik_y_for_every_z(x, y)
        pz_given_x = self.pz_given_x(x)
        return np.sum(np.log(np.sum(pz_given_x*lik_y, 1)))
    
    def fit(self, x, y, method='CG', max_iter=15):
        """ The model is trained using Generalized Expectation-Maximization.
            In the Maximization step the Conjugate-Gradient algorithm provided by scipy.optimize is used
            by default.
        """
        if type(y) == type([]):
            y = np.eye(self.output_dim)[y]
        ll, Q1, Q2 = expectation_maximization2(self, x, y, max_iter=max_iter)
        return ll, Q1, Q2
        
        
        
コード例 #2
0
ファイル: moe.py プロジェクト: neuronalX/esn-lm
 def __init__(self, input_dim, nb_experts, output_dim):
     self.nb_experts = nb_experts
     self.output_dim = output_dim
     self.gates = LogisticRegression(input_dim, nb_experts)
     self.experts = [
         LogisticRegression(input_dim, output_dim)
         for k in range(nb_experts)
     ]
コード例 #3
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = theano_rng = RandomStreams(numpy_rng.randint(2**30))

        self.x = T.matrix('x')
        self.y = T.ivector('y')
        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.params.extend(self.logLayer.params)

        self.finetune_cost = self.logLayer.negative_log_likehood(self.y)

        self.errors = self.logLayer.errors(self.y)
コード例 #4
0
def main():
    X_train, y_train, X_test, y_test = load_income('./income.csv')

    lr = LogisticRegression(C=1000, lr_decay='step').fit(X_train, y_train)

    # lr.score(X_train, y_train)
    # lr.score(X_test, y_test)
    y_pred = lr.predict(X_train)
    print('\n==> train:\n', classification_report(y_train, y_pred))
    y_pred = lr.predict(X_test)
    print('\n==> test:\n', classification_report(y_test, y_pred))
コード例 #5
0
def test_lr_newton_method():
    X, y = read_data()

    lr_clf = LogisticRegression(solver="newton_method")
    lr_clf.fit(X, y)

    # test intercept
    intercept = lr_clf.intercept_
    assert (abs(intercept - -2.618) < 0.01)

    # test coefficient
    coef = lr_clf.coef_
    assert (abs(coef[0] - 0.76) < 0.01)
    assert (abs(coef[1] - 1.17) < 0.01)
コード例 #6
0
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        self.hiddenLayer = HiddenLayer(
            rng = rng,
            input = input,
            n_in = n_in,
            n_out = n_hidden,
            activation = T.tanh
        )
        self.logRegressionLayer = LogisticRegression(
            input = self.hiddenLayer.output,
            n_in = n_hidden,
            n_out = n_out
        )

        self.L1 = (
            abs(self.hiddenLayer.W).sum()+abs(self.logRegressionLayer.W).sum()
        )

        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()+(self.logRegressionLayer.W ** 2).sum()
        )

        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likehood
        )

        self.errors = self.logRegressionLayer.errors

        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.input = input
コード例 #7
0
ファイル: CNN.py プロジェクト: zhangyipin/MyDeepByTheano
    def __init__(self, rng, input, n_hidden_out, n_out, nkerns, batch_size):

        self.layer0 = LeNetConvPoolLayer(rng,
                                         input=input.reshape(
                                             (batch_size, 1, 28, 28)),
                                         image_shape=(batch_size, 1, 28, 28),
                                         filter_shape=(nkerns[0], 1, 5, 5),
                                         poolsize=(2, 2))

        self.layer1 = LeNetConvPoolLayer(rng,
                                         input=self.layer0.output,
                                         image_shape=(batch_size, nkerns[0],
                                                      12, 12),
                                         filter_shape=(nkerns[1], nkerns[0], 5,
                                                       5),
                                         poolsize=(2, 2))
        self.layer2 = HiddenLayer(rng,
                                  input=self.layer1.output.flatten(2),
                                  n_in=nkerns[1] * 4 * 4,
                                  n_out=n_hidden_out,
                                  activation=T.tanh)
        self.logRegressionLayer = LogisticRegression(input=self.layer2.output,
                                                     n_in=n_hidden_out,
                                                     n_out=n_out)
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likehood)

        self.errors = self.logRegressionLayer.errors

        self.params = self.layer0.params + self.layer1.params + self.layer2.params + self.logRegressionLayer.params

        self.input = input
コード例 #8
0
def test_lr_stochastic_gradient_descent():
    X, y = read_data()

    lr_clf = LogisticRegression(learning_rate=0.001,
                                max_iter=10000,
                                solver="stochastic_gradient_descent")
    lr_clf.fit(X, y)

    # test intercept
    intercept = lr_clf.intercept_
    assert (abs(intercept - -2.618) < 0.01)

    # test coefficient
    coef = lr_clf.coef_
    assert (abs(coef[0] - 0.76) < 0.01)
    assert (abs(coef[1] - 1.17) < 0.01)
コード例 #9
0
ファイル: model.py プロジェクト: JunjieHu/dl
  def __init__(self,input=None, y=None, Cparams=None, Mparams=None):

    c_w0, c_b0, c_w1, c_b1, c_w2, c_b2, c_w3, c_b3 = Cparams
    m_w1, m_b1, o_w1, o_b1 = Mparams

    c_layer0 = LeNetConvPoolLayer(input=input, filter_shape=filter_shape0, image_shape=image_shape0,W=c_w0, b=c_b0, poolsize=poolsize0)
    
    c_layer1 = LeNetConvPoolLayer(input=c_layer0.output, filter_shape=filter_shape1, image_shape=image_shape1,W=c_w1, b=c_b1, poolsize=poolsize1)
    
    c_layer2 = LeNetConvPoolLayer(input=c_layer1.output, filter_shape=filter_shape2, image_shape=image_shape2,W=c_w2, b=c_b2, poolsize=poolsize2)
    c_layer3 = LeNetConvPoolLayer(input=c_layer2.output, filter_shape=filter_shape3, image_shape=image_shape3,W=c_w3, b=c_b3, poolsize=poolsize3)
   
    m_input = c_layer3.output
    m_input = m_input.flatten(2)
    m_layer1 = HiddenLayer(m_input, W=m_w1, b=m_b1)
    s_layer = LogisticRegression(m_layer1.output, W=o_w1, b=o_b1)
    self.cost= s_layer.negative_log_likelihood(y)
コード例 #10
0
 def get_by_name(name: str, dataset: AbstractDataset) -> nn.Module:
     name = name.lower()
     if name == ModelType.LOGISTIC.name.lower():
         return LogisticRegression(dataset)
     elif name == ModelType.MLP.name.lower():
         return MLP(dataset)
     elif name == ModelType.VGG.name.lower():
         return Vgg(dataset)
コード例 #11
0
    def __init__(self, input, n_in, n_hidden, n_out, n_layers, n_total, batch,
                 mask):

        # adjust the input
        input = input.dimshuffle(1, 0, 2)

        # hidden layers
        self.params = []
        self.hiddenLayers = []
        self.velo = []
        input_list = []
        input_list.append(input)
        input_list.append(input[::-1])
        self.hiddenLayers.append(
            HiddenLayer(input_list=input_list,
                        n_in=n_in,
                        n_out=n_hidden,
                        BATCH=batch))
        self.params.extend(self.hiddenLayers[0].params)
        self.velo.extend(self.hiddenLayers[0].velo)
        for i in range(1, n_layers):
            self.hiddenLayers.append(
                HiddenLayer(input_list=self.hiddenLayers[i - 1].output_list,
                            n_in=n_hidden,
                            n_out=n_hidden,
                            BATCH=batch))
            self.params.extend(self.hiddenLayers[i].params)
            self.velo.extend(self.hiddenLayers[i].velo)
        # output layer
        self.logRegressionLayer = LogisticRegression(
            input_list=self.hiddenLayers[n_layers - 1].output_list,
            n_in=n_hidden,
            n_out=n_out,
            n_total=n_total,
            mask=mask,
            batch=batch)
        self.params.extend(self.logRegressionLayer.params)
        self.velo.extend(self.logRegressionLayer.velo)
        # L1 regularization
        l1_sum = 0
        for layer in self.hiddenLayers:
            l1_sum += abs(layer.W2).sum() + abs(layer.W1).sum() + abs(
                layer.U1).sum() + abs(layer.U2).sum()
        self.L1 = l1_sum + abs(self.logRegressionLayer.W).sum()
        # L2 squared regularization
        l2_sum = 0
        for layer in self.hiddenLayers:
            l2_sum += abs(layer.W2**2).sum() + abs(layer.W1**2).sum() + abs(
                layer.U1**2).sum() + abs(layer.U2**2).sum()

        self.L2_sqr = l2_sum + (self.logRegressionLayer.W**2).sum() + (
            self.logRegressionLayer.M**2).sum()
        # negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # errors
        self.errors = self.logRegressionLayer.errors
        # predict
        self.y_pred = self.logRegressionLayer.y_pred
コード例 #12
0
ファイル: learn.py プロジェクト: rfaulkner/easyML
    def __init__(self, rng, _input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will
        # translate into a TanhLayer connected to the LogisticRegression
        # layer; this can be replaced by a SigmoidalLayer, or a layer
        # implementing any other nonlinearity
        self.hiddenLayer = HiddenLayer(rng=rng,
                                       input=input,
                                       n_in=n_in,
                                       n_out=n_hidden,
                                       activation=T.tanh)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            _input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                    + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params
コード例 #13
0
    def __init__(self, input, rng, n_in, n_out, n_hidden):

        self.hidden = HiddenLayer(
            input=input,
            rng=rng,
            n_in=n_in,
            n_out=n_hidden,
        )

        self.logistic_reg = LogisticRegression(input=self.hidden.output,
                                               n_in=n_hidden,
                                               n_out=n_out)
コード例 #14
0
 def __init__(self,rng,input,n_in,n_h,n_out):
     self.hidden_layer = HiddenLayer(rng,input=input,n_in=n_in,n_out=n_h)
     self.output_layer = LogisticRegression(input=self.hidden_layer.output, n_in=n_h,n_out=n_out)
     #regularization
     self.L1 = abs(self.hidden_layer.w).sum() + abs(self.output_layer.w).sum()
     self.L2 = (self.hidden_layer.w**2).sum() + (self.output_layer.w**2).sum()
     # Negative Log Likelihood
     self.neg_log_likelihood = (self.output_layer.neg_log_likelihood)
     # errors function
     self.errors = (self.output_layer.errors)
     # params
     self.params = self.hidden_layer.params + self.output_layer.params
     
     self.input = input
コード例 #15
0
    def __init__(self, input=None, y=None, Cparams=None, Mparams=None):

        c_w0, c_b0, c_w1, c_b1, c_w2, c_b2, c_w3, c_b3 = Cparams
        m_w1, m_b1, o_w1, o_b1 = Mparams

        c_layer0 = LeNetConvPoolLayer(input=input,
                                      filter_shape=filter_shape0,
                                      image_shape=image_shape0,
                                      W=c_w0,
                                      b=c_b0,
                                      poolsize=poolsize0)

        c_layer1 = LeNetConvPoolLayer(input=c_layer0.output,
                                      filter_shape=filter_shape1,
                                      image_shape=image_shape1,
                                      W=c_w1,
                                      b=c_b1,
                                      poolsize=poolsize1)

        c_layer2 = LeNetConvPoolLayer(input=c_layer1.output,
                                      filter_shape=filter_shape2,
                                      image_shape=image_shape2,
                                      W=c_w2,
                                      b=c_b2,
                                      poolsize=poolsize2)
        c_layer3 = LeNetConvPoolLayer(input=c_layer2.output,
                                      filter_shape=filter_shape3,
                                      image_shape=image_shape3,
                                      W=c_w3,
                                      b=c_b3,
                                      poolsize=poolsize3)

        m_input = c_layer3.output
        m_input = m_input.flatten(2)
        m_layer1 = HiddenLayer(m_input, W=m_w1, b=m_b1)
        s_layer = LogisticRegression(m_layer1.output, W=o_w1, b=o_b1)
        self.cost = s_layer.negative_log_likelihood(y)
コード例 #16
0
 def __init__(self, input, n_in, n_hidden, n_out, n_layers):
     # hidden layers
     self.params = []
     self.hiddenLayers = []
     self.velo = []
     self.hiddenLayers.append(
         HiddenLayer(input=input,
                     n_in=n_in,
                     n_out=n_hidden,
                     activation=a.relu))
     self.params.extend(self.hiddenLayers[0].params)
     self.velo.extend(self.hiddenLayers[0].velo)
     for i in range(1, n_layers):
         self.hiddenLayers.append(
             HiddenLayer(input=self.hiddenLayers[i - 1].output,
                         n_in=n_hidden,
                         n_out=n_hidden,
                         activation=a.relu))
         self.params.extend(self.hiddenLayers[i].params)
         self.velo.extend(self.hiddenLayers[i].velo)
     # output layer
     self.logRegressionLayer = LogisticRegression(
         input=self.hiddenLayers[n_layers - 1].output,
         n_in=n_hidden,
         n_out=n_out)
     self.params.extend(self.logRegressionLayer.params)
     self.velo.extend(self.logRegressionLayer.velo)
     # L1 regularization
     l1_sum = 0
     for layer in self.hiddenLayers:
         l1_sum += abs(layer.W).sum()
     self.L1 = l1_sum + abs(self.logRegressionLayer.W).sum()
     # L2 squared regularization
     l2_sum = 0
     for layer in self.hiddenLayers:
         l2_sum += (layer.W**2).sum()
     self.L2_sqr = l2_sum + (self.logRegressionLayer.W**2).sum()
     # negative log likelihood
     self.negative_log_likelihood = (
         self.logRegressionLayer.negative_log_likelihood)
     # errors
     self.errors = self.logRegressionLayer.errors
     # predict
     self.y_pred = self.logRegressionLayer.y_pred
     self.output = self.logRegressionLayer.y.T
コード例 #17
0
    def __init__(self, input=None, Cparams=None, Mparams=None):

        c_w0, c_b0, c_w1, c_b1, c_w2, c_b2, c_w3, c_b3 = Cparams
        m_w1, m_b1, o_w1, o_b1 = Mparams

        c1_layer0 = LeNetConvPoolLayer(input=input,
                                       filter_shape=filter_shape0,
                                       image_shape=image_shape0,
                                       W=c_w0,
                                       b=c_b0,
                                       poolsize=poolsize0)

        c1_layer1 = LeNetConvPoolLayer(input=c1_layer0.output,
                                       filter_shape=filter_shape1,
                                       image_shape=image_shape1,
                                       W=c_w1,
                                       b=c_b1,
                                       poolsize=poolsize1)

        c1_layer2 = LeNetConvPoolLayer(input=c1_layer1.output,
                                       filter_shape=filter_shape2,
                                       image_shape=image_shape2,
                                       W=c_w2,
                                       b=c_b2,
                                       poolsize=poolsize2)

        c1_layer3 = LeNetConvPoolLayer(input=c1_layer2.output,
                                       filter_shape=filter_shape3,
                                       image_shape=image_shape3,
                                       W=c_w3,
                                       b=c_b3,
                                       poolsize=poolsize3)

        m_input = c1_layer3.output
        m_input = m_input.flatten(2)
        m_layer1 = HiddenLayer(m_input, W=m_w1, b=m_b1)
        s_layer = LogisticRegression(m_layer1.output, W=o_w1, b=o_b1)
        #self.y_pred= s_layer.getlabel()
        self.y_pred = c1_layer3.output.flatten(1)
コード例 #18
0
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    dataset='../data/mnist.pkl.gz',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (28, 28)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(
                        ('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') % (epoch, minibatch_index + 1,
                                           n_train_batches, test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
コード例 #19
0
ファイル: dbn.py プロジェクト: songjmcn/deep_learning_tools
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, nkerns=[20, 50]):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        batch_size=500
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.
        rng = numpy.random.RandomState(23455)
        self.layer0_input = self.x.reshape((batch_size, 1, 28, 28))
         # Construct the first convolutional pooling layer:
         # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
         # maxpooling reduces this further to (24/2,24/2) = (12,12)
         # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(rng, input=self.layer0_input,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
         # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        self.layer1 = LeNetConvPoolLayer(rng, input=self.layer0.output,
            image_shape=(batch_size, nkerns[0], 12, 12),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        self.layer2=HiddenLayer(rng,input=self.layer2_input,n_in=nkerns[1]*4*4,n_out=500,activation=T.tanh)
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = 500
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.layer2.output
            else:
                layer_input = self.sigmoid_layers[-1].output
            
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
コード例 #20
0
ファイル: dbn.py プロジェクト: songjmcn/deep_learning_tools
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, nkerns=[20, 50]):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        batch_size=500
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.
        rng = numpy.random.RandomState(23455)
        self.layer0_input = self.x.reshape((batch_size, 1, 28, 28))
         # Construct the first convolutional pooling layer:
         # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
         # maxpooling reduces this further to (24/2,24/2) = (12,12)
         # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(rng, input=self.layer0_input,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
         # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        self.layer1 = LeNetConvPoolLayer(rng, input=self.layer0.output,
            image_shape=(batch_size, nkerns[0], 12, 12),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        self.layer2=HiddenLayer(rng,input=self.layer2_input,n_in=nkerns[1]*4*4,n_out=500,activation=T.tanh)
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = 500
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.layer2.output
            else:
                layer_input = self.sigmoid_layers[-1].output
            
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            # compile the theano function
            fn = theano.function(inputs=[index,
                            theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x:
                                    train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(inputs=[index],
              outputs=self.finetune_cost,
              updates=updates,
              givens={self.x: train_set_x[index * batch_size:
                                          (index + 1) * batch_size],
                      self.y: train_set_y[index * batch_size:
                                          (index + 1) * batch_size]})

        test_score_i = theano.function([index], self.errors,
                 givens={self.x: test_set_x[index * batch_size:
                                            (index + 1) * batch_size],
                         self.y: test_set_y[index * batch_size:
                                            (index + 1) * batch_size]})

        valid_score_i = theano.function([index], self.errors,
              givens={self.x: valid_set_x[index * batch_size:
                                          (index + 1) * batch_size],
                      self.y: valid_set_y[index * batch_size:
                                          (index + 1) * batch_size]})

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score
コード例 #21
0
        # Store predictions at the right positions in the result vector.
        predictions[cat_indices_te] = predictions_cat.reshape(
            predictions[cat_indices_te].shape)

    return predictions


if __name__ == "__main__":
    # Import data
    y_train, x_train, ids_train = helper.load_csv_data('train.csv')
    y_test, x_test, ids_test = helper.load_csv_data('test.csv')
    y_train[y_train < 0] = 0

    # Define 1 model per category
    models = [
        LogisticRegression(degree=3, gamma=0.1),
        LogisticRegression(degree=6, gamma=0.1),
        LogisticRegression(degree=6, gamma=0.1),
        LogisticRegression(degree=6, gamma=0.1)
    ]

    # Train and predict
    predictions = train_predict_categories(y_train, x_train, x_test, *models)

    # Prepare for export
    predictions[predictions == 0] = -1

    # Export results
    helper.create_csv_submission(ids_test, predictions, 'predictions.csv')
コード例 #22
0
ファイル: lenet.py プロジェクト: suriyadeepan/theano

# In[ ]:

## Layer 2 : Hidden Layer setup ##
# layer1 output shape : batch_sizex50x4x4
# layer2_h input shape req : batch_size x (50*4*4)
layer2_h_input = layer1.output.flatten(2)
# n_in = 50x4x4 pixels; n_out = 500 hidden nodes
layer2_h = HiddenLayer(rng=rng,input=layer2_h_input,n_in=50*4*4,n_out=500)


# In[ ]:

# Layer 3 : Output layer : LogisticRegression
layer3_o = LogisticRegression(input=layer2_h.output,n_in=500,n_out=10)


# In[ ]:

# cost 
cost = layer3_o.neg_log_likelihood(y)
# >> setup gradient expression <<
### Need :parameters
params = layer3_o.params + layer2_h.params + layer1.params + layer0.params
gparams = T.grad(cost,params)


# In[ ]:

## Updates ##
コード例 #23
0
ファイル: cnn.py プロジェクト: Fuchai/tangkk-mirex-ace
def test_cnn(trainpath,
             trainlist,
             validset,
             dumppath,
             learning_rate=0.01,
             n_epochs=200,
             batch_size=100,
             earlystop=True):
    """

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(123)

    # datasets = load_data(dataset)
    datasets = loadmat(trainpath=trainpath,
                       trainlist=trainlist,
                       validset=validset,
                       shuffle=shuffle,
                       datasel=datasel,
                       scaling=scaling,
                       robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]

    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # H - height; W - width
    # when the input is note salience matrix
    # idim0_H = 42
    # idim0_W = 36
    # fdim0_H = 6
    # fdim0_W = 6

    # when the input is chromagram
    idim0_H = 12
    idim0_W = 12

    fdim0_H = 2
    fdim0_W = 2
    pdim0_H = 2
    pdim0_W = 2

    idim1_H = (idim0_H - fdim0_H + 1) / pdim0_H
    idim1_W = (idim0_W - fdim0_W + 1) / pdim0_W

    fdim1_H = 2
    fdim1_W = 2
    pdim1_H = 2
    pdim1_W = 2

    idim2_H = (idim1_H - fdim1_H + 1) / pdim1_H
    idim2_W = (idim1_W - fdim1_W + 1) / pdim1_W

    fdim2 = 800

    nkerns = [20, 20]

    # the below comments are examples of using this cnn to deal with chromagram with input feature size 144 = 12*12
    # Reshape matrix of rasterized images of shape (batch_size, 12 * 12)
    # to a 4D tensor, compatible with our ConvPoolLayer
    # (12, 12) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (12-2+1 , 12-2+1) = (11, 11)
    # maxpooling reduces this further to (11/2, 11/2) = (5, 5)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 5, 5)
    layer0 = ConvPoolLayer(rng,
                           input=layer0_input,
                           input_shape=(batch_size, 1, idim0_H, idim0_W),
                           filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W),
                           poolsize=(pdim0_H, pdim0_W))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4)
    # maxpooling reduces this further to (4/2, 4/2) = (2, 2)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 2, 2)

    layer1 = ConvPoolLayer(rng,
                           input=layer0.output,
                           input_shape=(batch_size, nkerns[0], idim1_H,
                                        idim1_W),
                           filter_shape=(nkerns[1], nkerns[0], fdim1_H,
                                         fdim1_W),
                           poolsize=(pdim1_H, pdim1_W))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 2 * 2),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * idim2_H * idim2_W,
                         n_out=fdim2,
                         activation=T.nnet.relu)

    # classify the values of the fully-connected sigmoidal layer
    nclass = max(train_set_y.eval()) + 1
    layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    train_score = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.996  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    training_history = []
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                #training_losses = [train_score(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                #this_training_loss = numpy.mean(training_losses)

                #training_history.append([iter,this_training_loss,this_validation_loss])
                training_history.append([iter, this_validation_loss])

                #                print('epoch %i, minibatch %i/%i, training error %f %%' %
                #                      (epoch, minibatch_index + 1, n_train_batches,
                #                       this_training_loss * 100.))
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                print('iter = %d' % iter)
                print('patience = %d' % patience)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    numpy.savez(dumppath,
                                model=params,
                                training_history=training_history,
                                best_validation_loss=best_validation_loss)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    print('best_validation_loss %f' % best_validation_loss)

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    # final save
    numpy.savez(dumppath,
                model=params,
                training_history=training_history,
                best_validation_loss=best_validation_loss)

    print(('Optimization complete with best validation score of %f %%, '
           'obtained at iteration %i, ') %
          (best_validation_loss * 100., best_iter + 1))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
コード例 #24
0
ファイル: cnn.py プロジェクト: tangkk/tangkk-mirex-ace
def evaluate_lenet5(learning_rate=0.01, n_epochs=200,
                    dataset='../testnn.mat',
                    nkerns=[20, 20], batch_size=100):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(123)

    # datasets = load_data(dataset)
    datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    
    # the below comments are examples of using this cnn to deal with MNIST with input feature size 784 = 28*28
    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, idim0_H, idim0_W),
        filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W),
        poolsize=(pdim0_H, pdim0_W)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], idim1_H, idim1_W),
        filter_shape=(nkerns[1], nkerns[0], fdim1_H, fdim1_W),
        poolsize=(pdim1_H, pdim1_W)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)
    
    
    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * idim2_H * idim2_W,
        n_out=fdim2,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    nclass = max(train_set_y.eval()) + 1
    layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    
    train_score = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                training_losses = [train_score(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                this_training_loss = numpy.mean(training_losses)
                print('epoch %i, minibatch %i/%i, training error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_training_loss * 100.))
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
コード例 #25
0
ファイル: dbn.py プロジェクト: wyx1227/tangkk-mirex-ace
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, L1_reg=0, L2_reg=0, first_layer='grbm',model=None):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.L1 = 0
        self.L2_sqr = 0

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].output
            
            if model is None:
                W = None
                b = None
            else:
                W = model[i*2]
                b = model[i*2 + 1]
                
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        W = W,
                                        b = b,
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.L1 += (abs(sigmoid_layer.W).sum())
            self.L2_sqr += ((sigmoid_layer.W ** 2).sum())

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if i == 0: # first layer GBRBM - dealing with continous value
                if first_layer == 'grbm':
                    rbm_layer = GRBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
                if first_layer == 'rbm':
                    rbm_layer = RBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
            # elif i == self.n_layers-1: # last layer GGRBM
                # rbm_layer = GRBM(numpy_rng=numpy_rng,
                                # theano_rng=theano_rng,
                                # input=layer_input,
                                # n_visible=input_size,
                                # n_hidden=hidden_layers_sizes[i],
                                # W=sigmoid_layer.W,
                                # hbias=sigmoid_layer.b)
            else: # subsequence layers BBRBM - binary RBM to cope with regularization
                rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if model is None:
            W = None
            b = None
        else:
            W = model[-2]
            b = model[-1]
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            W = W,
            b = b,
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        
        self.L1 += (abs(self.logLayer.W).sum())

        self.L2_sqr += ((self.logLayer.W ** 2).sum())

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = (self.logLayer.negative_log_likelihood(self.y) +
            + L1_reg * self.L1
            + L2_reg * self.L2_sqr
        )
            

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
        self.predprobs = self.logLayer.p_y_given_x
        self.preds = self.logLayer.y_pred
コード例 #26
0
ファイル: sdA.py プロジェクト: raoqiyu/Learning-DL
class SdA(object):
    """Stacked denoising autoencoder class (sdA)

    A stacked denoising autoencoder mode is obtained by stacking several dAs.
    The hidden layer of the dA at layer `i` becomes the input of the dA at layer
    `i+1`. The first layer dA gets as input the input of the sdA. and the hidden
    layer of the last dA represents the output. Note that after pretraining, the
    sdA is dealt with as a normal MLP, the dAs are only used to initialize
    the weights.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_in=784,
            hidden_layers_sizes=[500, 500], n_out=10,corruption_levels=[0.,0.1]):
        """This class is made to support a variable number of layers.

        :type  theano_rng: theano.tensor.shared_randomstream.RandomSteam
        :param theano_rng: Thenao random generator used to draw initial weights

        :type  n_in: int
        :param n_in: dimension of the input to the sdA

        :type  hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                                    at least one value

        :type  n_out: int
        :param n_out: dimension of the output of the network

        :type  corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each layer
        """
        # self.dA_layers will store the denoising autoencoder associated
        # with the layers of the MLP
        self.dA_layers = []
        # self.sigmoid_layers will store the sigmoid layers of the MLP facade
        self.sigmoid_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasteried images
        self.y = T.ivector('y') # the labels are presented as 1D vector of
                                # [int] labels

        #
        # Construct self.n_layers sigmoid layers and self.n_layers denoising
        # layersm where self.n_layers is the depth of our model
        #
        for i in range(self.n_layers):
            # construct a sigmoid layer
            #

            # the size of the input is ethier the number of the hidden units of
            # the layer below or the input size if we are on the first layer.
            # the input of the layer has the same situation
            if i == 0:
                input_size  = n_in
                layer_input = self.x
            else:
                input_size  = hidden_layers_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layer
            self.sigmoid_layers.append(sigmoid_layer)
            # ??? the parameters of the sigmoid layers are paremeters of the
            # sdA, the visible bias in the dA are parameters of those
            # dA, but not the sdA. So we do not add the dA_layer's (below)
            # bvis to self.params.
            self.params.extend(sigmoid_layer.params)

            # construct a denoising autoencoder that shared weights with this
            # sigmoid_layer
            dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng,
                          input=layer_input, n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W = sigmoid_layer.W, bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        #
        # Construct a logistic layer on top of the MLP
        #
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_out
        )
        self.params.extend(self.logLayer.params)

        #
        # Construct a function that impletements one step of finetuning
        #

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def preTraining_functions(self, trainSetX, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in training the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type  trainSetX: theano.tensor.TensorType
        :param trainSetX: Shared variable that contains all datapoints used
                            for training the dA

        :type  batch_size: int
        :param batch_size: size of a minibatch
        '''

        index = T.lscalar('index')
        corruption_level = T.scalar('corruption')
        learning_rate = T.scalar('lr')
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            cost, updates = dA.get_cost_updates(corruption_level,learning_rate)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(corruption_level,default=0.2),
                        theano.Param(learning_rate,default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: trainSetX[batch_begin:batch_end]
                }
            )
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of finetuning,
        a function `validate` that computes the error on a batch from the validation
        set, and a function `test` that computes the error on a batch from the
        testing set

        :type  datasets: list of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                            that as to contain three pairs, `train`, `valid`,
                            `test` in this order, where each pair is formed of
                            two theano variables, one for the datapoints, the
                            other for labels

        :type  batch_size: int
        :param batch_size: learning_rate used during finetuns stage

        :type  learning_rate: float
        :param learning_rate: learning_rate used during finetune stage
        '''

        trainSetX, trainSetY = datasets[0]
        validSetX, validSetY = datasets[1]
        testSetX,  testSetY  = datasets[2]

        n_valid_batches = validSetX.get_value(borrow=True).shape[0] // batch_size
        n_test_batches  = testSetX.get_value(borrow=True).shape[0] // batch_size

        index = T.lscalar('index')
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        gparams = T.grad(self.finetune_cost, self.params)
        updates = [(param, param - learning_rate*gparam)
                    for param, gparam in zip(self.params, gparams)]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x :trainSetX[batch_begin:batch_end],
                self.Y :trainSetY[batch_begin:batch_end]
            },
            name='train'
        )

        valid_score_i = theano.function(
            inputs=[index],
            outputs=self.errors,
            givens={
                self.x :validSetX[batch_begin:batch_end],
                self.Y :validSetY[batch_begin:batch_end]
            },
            name='valid'
        )

        test_score_i = theano.function(
            inputs=[index],
            outputs=self.errors,
            givens={
                self.x :testSetX[batch_begin:batch_end],
                self.Y :testSetY[batch_begin:batch_end]
            },
            name='test'
        )

        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score
コード例 #27
0
def solve_CNN(datapath,
              batch=500,
              n_hidden=5,
              n_out=10,
              n_epoch=3,
              learning_rate=0.54):

    x = T.dmatrix('x')
    y = T.ivector('y')
    index = T.iscalar('index')

    kernal = (50, 30)
    cifar_data = upload()
    train, test = cifar_data

    print 'data being converted to theano-shared............ '
    train_x, train_y = to_shared(train)
    test_x, test_y = to_shared(test)

    n_train_batch = train[0].shape[0] // batch
    n_valid_batch = test[0].shape[0] // batch

    rng = np.random.RandomState(123)

    layer0_input = x.reshape((batch, 3, 32, 32))

    layer0 = ConvPoolLayer(
        input=layer0_input,
        rng=rng,
        filter_shape=(kernal[0], 3, 5, 5),
    )
    layer1 = ConvPoolLayer(input=layer0.output,
                           rng=rng,
                           filter_shape=(kernal[1], kernal[0], 5, 5))

    layer2_input = layer1.output.flatten(2)

    layer2 = HiddenLayer(
        input=layer2_input,
        rng=rng,
        n_out=n_hidden,
        n_in=kernal[1] * 5 * 5,
    )

    layer3 = LogisticRegression(input=layer2.output,
                                n_in=n_hidden,
                                n_out=n_out)

    fun_valid = theano.function(
        inputs=[index],
        outputs=layer3.error(y),
        givens=[(x, test_x[index * batch:(index + 1) * batch, :]),
                (y, test_y[index * batch:(index + 1) * batch])])

    cost = layer3.negative_log_likelihood(y)
    params = layer0.params + layer1.params + layer2.params + layer3.params
    grad_all = T.grad(cost=cost, wrt=params)

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grad_all)]

    fun_train = theano.function(
        inputs=[index],
        outputs=[],
        updates=updates,
        givens=[(x, train_x[index * batch:(index + 1) * batch, :]),
                (y, train_y[index * batch:(index + 1) * batch])])

    ################
    #TRAINING MODEL#
    ################..........................................
    print 'training starts now -->'
    patience = 5000
    patience_increase = 2

    improvement = 0.995
    validation_frequency = min(n_train_batch, patience // 2)

    least_error = np.Inf
    epoch = 0
    done_looping = False
    this_error = 0
    start_time = timeit.default_timer()
    print 'EPOCH counting .....'
    while epoch < n_epoch and (not done_looping):
        for current_batch in range(n_train_batch):
            total_batches = epoch * n_train_batch + current_batch
            fun_train(current_batch)

            if (total_batches + 1) % validation_frequency == 0:
                this_error = [fun_valid(n) for n in range(n_valid_batch)]
                this_error = np.mean(this_error)
                print this_error

                if this_error < least_error * improvement:
                    least_error = this_error
                    patience = max(patience, total_batches * patience_increase)
                    #with open('/home/sameer/best_model_neural_filters.pkl', 'wb') as f:
                    #   pickle.dump(layer0.params, f)
                    #  f.close()

        if total_batches > patience:
            done_looping = True
        epoch += 1
        if total_batches != 0:
            #print 'the convergence ratio is %f' %(patience/float(total_batches))
            print this_error
            print epoch
            save[epoch] = this_error

    print 'the error is %f' % least_error
    print 'the total number of  epoch %d' % epoch
    end_time = timeit.default_timer()
    t = end_time - start_time
    print 'total time = %f sec' % t
    print 'time per epoch = %f sec/epoch' % (t / epoch)
コード例 #28
0
ファイル: SdA.py プロジェクト: wystephen/theano_test
class SdA(object):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1]
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the dA layers
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(corruption_level, default=0.2),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [
            (param, param - gparam * learning_rate)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='train'
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='test'
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='valid'
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score
コード例 #29
0
ファイル: main.py プロジェクト: wesleywatkins/MNIST-Learning
X_test = None
Y_test = None
print("Non 0-1 labels removed from testing dataset!")

print("\nTraining SVM on MNIST dataset...")
svm = SupportVectorMachine()
svm.train(X, Y, 1)
print("SVM trained!")

print("\nTraining Linear Regression on MNIST dataset...")
linear = LinearRegression()
linear.train(X, Y)
print("Linear regression trained!")

print("\nTraining Logistic Regression on MNIST dataset...")
logistic = LogisticRegression()
logistic.train(X, Y)
print("Logistic regression trained!")

# Test SVM
print("\nRunning SVM on test data...")
misclassified = svm.test(X2, Y2)
print("Generalization Error:", round(misclassified/Y2.size, 3))
print("Misclassified:", misclassified, "/", Y2.size)
print("Accuracy (on test data):", round((1 - (misclassified/Y2.size)) * 100, 3), '%')

# Test Linear Regression
print("\nRunning Linear Regression on test data...")
misclassified = linear.test(X2, Y2)
print("Generalization Error:", round(misclassified/Y2.size, 3))
print("Misclassified:", misclassified, "/", Y2.size)
コード例 #30
0
ファイル: Cifar.py プロジェクト: sameerpurwar/sam
def solve_CNN(datapath, batch = 500,n_hidden = 5,n_out = 10,n_epoch = 3,learning_rate = 0.54):
   
    x = T.dmatrix('x')
    y = T.ivector('y')
    index = T.iscalar('index')
    
    kernal = (50,30)
    cifar_data = upload()
    train, test = cifar_data    
    
    
    print 'data being converted to theano-shared............ '
    train_x, train_y = to_shared(train)
    test_x, test_y = to_shared(test)
    
    n_train_batch =  train[0].shape[0] // batch 
    n_valid_batch =  test[0].shape[0] // batch
   
    rng = np.random.RandomState(123)    
    
    layer0_input = x.reshape((batch,3,32,32))
    
    layer0 = ConvPoolLayer(input = layer0_input,
                           rng = rng,
                           filter_shape = (kernal[0],3,5,5),
                          )
    layer1 = ConvPoolLayer(input = layer0.output,
                           rng = rng,
                           filter_shape = (kernal[1],kernal[0],5,5))
                           
    layer2_input = layer1.output.flatten(2)
   
    layer2 = HiddenLayer(input = layer2_input,       
                         rng = rng,
                         n_out = n_hidden,                            
                         n_in = kernal[1]*5*5,
                         )
                         
    layer3 = LogisticRegression(input = layer2.output,
                                n_in = n_hidden,
                                n_out = n_out)
    

    fun_valid = theano.function(inputs = [index],
                                outputs = layer3.error(y),
                                givens  = [(x,test_x[index*batch:(index+1)*batch,:]),
                                           (y,test_y[index*batch:(index+1)*batch])]
                               )
    
    cost = layer3.negative_log_likelihood(y)
    params = layer0.params + layer1.params + layer2.params + layer3.params                           
    grad_all = T.grad(cost = cost,
                      wrt = params)
    
                               
    updates = [(param_i, param_i - learning_rate * grad_i)
                for param_i, grad_i in zip(params, grad_all)]                       
     
    fun_train = theano.function(inputs = [index],
                                outputs = [],
                                updates = updates,
                                givens  = [(x,train_x[index*batch:(index+1)*batch,:]),
                                           (y,train_y[index*batch:(index+1)*batch])]
                                )

                     
     ################
    #TRAINING MODEL#                      
    ################..........................................                     
    print 'training starts now -->'
    patience = 5000
    patience_increase = 2
    
    improvement = 0.995
    validation_frequency = min(n_train_batch, patience//2)    
  
    least_error = np.Inf
    epoch = 0
    done_looping = False
    this_error = 0
    start_time = timeit.default_timer()
    print 'EPOCH counting .....'
    while epoch < n_epoch and (not done_looping):
        for current_batch in range(n_train_batch):            
            total_batches = epoch*n_train_batch + current_batch
            fun_train(current_batch) 
            
            if (total_batches+1) % validation_frequency == 0:                
                this_error = [fun_valid(n) for n in range(n_valid_batch)]
                this_error = np.mean(this_error)
                print this_error                
                
                if this_error < least_error*improvement:
                    least_error = this_error
                    patience =  max(patience,total_batches * patience_increase)
                    #with open('/home/sameer/best_model_neural_filters.pkl', 'wb') as f:
                     #   pickle.dump(layer0.params, f)
                      #  f.close()
                    
        if total_batches > patience:
            done_looping = True
        epoch += 1
        if total_batches != 0:
            #print 'the convergence ratio is %f' %(patience/float(total_batches))
            print this_error
            print epoch
            save[epoch] = this_error
    
    print 'the error is %f' %least_error
    print 'the total number of  epoch %d' %epoch    
    end_time = timeit.default_timer()
    t = end_time - start_time
    print 'total time = %f sec' %t
    print 'time per epoch = %f sec/epoch' %(t/epoch) 
コード例 #31
0
def sgd_optimization_mnist(learning_rate=0.13,
                           n_epochs=1000,
                           dataset='data/mnist.pkl.gz',
                           batch_size=600):

    training_set, validation_set, testing_set, = data_loader.load(dataset)
    training_set_x, training_set_y = training_set
    validation_set_x, validation_set_y = validation_set
    testing_set_x, testing_set_y = testing_set

    # compute number of minibatches for training, validation and testing
    n_train_batches = training_set_x.get_value(
        borrow=True).shape[0] / batch_size
    n_valid_batches = validation_set_x.get_value(
        borrow=True).shape[0] / batch_size
    n_test_batches = testing_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = tensor.lscalar()

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = tensor.matrix('x')
    y = tensor.ivector('y')

    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: testing_set_x[index * batch_size:(index + 1) * batch_size],
            y: testing_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: validation_set_x[index * batch_size:(index + 1) * batch_size],
            y: validation_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = tensor.grad(cost=cost, wrt=classifier.W)
    g_b = tensor.grad(cost=cost, wrt=classifier.b)

    # update the parameters of the model
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: training_set_x[index * batch_size:(index + 1) * batch_size],
            y: training_set_y[index * batch_size:(index + 1) * batch_size]
        })
    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is # found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = 5 * n_train_batches  # requency of training

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # iter: number of minibatches used)
            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    # update best_validation_loss
                    best_validation_loss = this_validation_loss
                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                    # save the best model
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break
        epoch = epoch + 1

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
コード例 #32
0
            features.append((float(col_1), float(col_2), float(col_3),
                             float(col_4), float(col_5), float(col_6)))
            categories.append(int(col_7))
    return features, categories


if __name__ == "__main__":
    train_x, train_y = getInput('./dataForTrainingLogistic.txt')
    test_x, test_y = getInput('./dataForTestingLogistic.txt')

    train_x = np.hstack((np.array(train_x), np.ones((len(train_x), 1))))
    test_x = np.hstack((np.array(test_x), np.ones(((len(test_x), 1)))))
    train_y = np.array(train_y).reshape(len(train_y))
    test_y = np.array(test_y).reshape(len(test_y))

    lr = LogisticRegression(learning_rate=0.00015,
                            initial_w=np.zeros(train_x.shape[1]))
    # batch gradient descent
    # history_loss, history_test_loss, history_score,_ = lr.train_gradient_descent(
    #    epoch=150000, epoch_per_round=10000, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y)

    # stochastic gradient descent
    history_loss, history_test_loss, history_score, _ = lr.train_stochastic_gradient_descent(
        iteration_num=500000,
        iter_per_round=100,
        batch_size=1,
        train_x=train_x,
        train_y=train_y,
        test_x=test_x,
        test_y=test_y)
    print('Coefficient:', lr.w)
    variable_x = range(100, 500001, 100)
コード例 #33
0
ファイル: mlp.py プロジェクト: Fuchai/tangkk-mirex-ace
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 hidden_layers_sizes,
                 n_out,
                 model=None):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type hidden_layers_sizes: int list
        :param n_hidden: number of hidden units in each hidden layer

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """
        self.n_layers = len(hidden_layers_sizes)
        self.hiddenlayers = []
        self.params = []
        self.L1 = 0
        self.L2_sqr = 0
        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = input
            else:
                layer_input = self.hiddenlayers[i - 1].output

            if model is None:
                W = None
                b = None
            else:
                W = model[i * 2]
                b = model[i * 2 + 1]

            hiddenLayer = HiddenLayer(rng=rng,
                                      input=layer_input,
                                      n_in=input_size,
                                      n_out=hidden_layers_sizes[i],
                                      W=W,
                                      b=b,
                                      activation=T.nnet.sigmoid)
            self.hiddenlayers.append(hiddenLayer)
            self.params.extend(hiddenLayer.params)
            self.L1 += (abs(hiddenLayer.W).sum())
            self.L2_sqr += ((hiddenLayer.W**2).sum())

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if model is None:
            W = None
            b = None
        else:
            W = model[-2]
            b = model[-1]
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenlayers[-1].output,
            n_in=hidden_layers_sizes[-1],
            W=W,
            b=b,
            n_out=n_out)
        # end-snippet-2 start-snippet-3
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small

        self.L1 += (abs(self.logRegressionLayer.W).sum())

        self.L2_sqr += ((self.logRegressionLayer.W**2).sum())

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood)
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors
        self.predprobs = self.logRegressionLayer.p_y_given_x
        self.preds = self.logRegressionLayer.y_pred

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params.extend(self.logRegressionLayer.params)
        # end-snippet-3

        # keep track of model input
        self.input = input
コード例 #34
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 L1_reg=0,
                 L2_reg=0,
                 first_layer='grbm',
                 model=None):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.L1 = 0
        self.L2_sqr = 0

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].output

            if model is None:
                W = None
                b = None
            else:
                W = model[i * 2]
                b = model[i * 2 + 1]

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        W=W,
                                        b=b,
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.L1 += (abs(sigmoid_layer.W).sum())
            self.L2_sqr += ((sigmoid_layer.W**2).sum())

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if i == 0:  # first layer GBRBM - dealing with continous value
                if first_layer == 'grbm':
                    rbm_layer = GRBM(numpy_rng=numpy_rng,
                                     theano_rng=theano_rng,
                                     input=layer_input,
                                     n_visible=input_size,
                                     n_hidden=hidden_layers_sizes[i],
                                     W=sigmoid_layer.W,
                                     hbias=sigmoid_layer.b)
                if first_layer == 'rbm':
                    rbm_layer = RBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
            # elif i == self.n_layers-1: # last layer GGRBM
            # rbm_layer = GRBM(numpy_rng=numpy_rng,
            # theano_rng=theano_rng,
            # input=layer_input,
            # n_visible=input_size,
            # n_hidden=hidden_layers_sizes[i],
            # W=sigmoid_layer.W,
            # hbias=sigmoid_layer.b)
            else:  # subsequence layers BBRBM - binary RBM to cope with regularization
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if model is None:
            W = None
            b = None
        else:
            W = model[-2]
            b = model[-1]
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            W=W,
            b=b,
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        self.L1 += (abs(self.logLayer.W).sum())

        self.L2_sqr += ((self.logLayer.W**2).sum())

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = (self.logLayer.negative_log_likelihood(self.y) +
                              +L1_reg * self.L1 + L2_reg * self.L2_sqr)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
        self.predprobs = self.logLayer.p_y_given_x
        self.preds = self.logLayer.y_pred
コード例 #35
0
ファイル: moe.py プロジェクト: zermelozf/esn-lm
 def __init__(self, input_dim, nb_experts, output_dim):
     self.nb_experts = nb_experts
     self.output_dim = output_dim
     self.gates = LogisticRegression(input_dim, nb_experts)
     self.experts = [LogisticRegression(input_dim, output_dim) for k in range(nb_experts)]
コード例 #36
0
ファイル: logistic_test.py プロジェクト: hemu1919/PythonML
# -*- coding: utf-8 -*-
"""
Created on Mon Sep  4 17:29:50 2017

@author: heman
"""
from request_data_link import get
import numpy as np
from logistic import LogisticRegression

link = 'http://data.princeton.edu/wws509/datasets/copen.raw'
m, n, parsed_data = get(link, 6)
index = list(range(0, parsed_data.size, 6))
parsed_data = np.delete(parsed_data, index)
index = list(range(4, parsed_data.size, 5))
targets = parsed_data[index]
data = np.delete(parsed_data, index).reshape(m, n - 1)
del link, m, n, parsed_data, index

regr = LogisticRegression()
regr.train(data, targets, iter=1000000, step=0.001, lamda=0)
labels, predictions = regr.test(data, targets)
コード例 #37
0
from mnist import MNIST
mndata = MNIST('./MNIST')
trImg, trLab = mndata.load_training()
teImg, teLab = mndata.load_testing()

trImg = np.asanyarray(trImg)
trLab = np.asanyarray(trLab)
teImg = np.asanyarray(teImg)
teLab = np.asanyarray(teLab)

usps = LoadUSPS.LoadUSPS('proj3_images.zip')
uspsImg, uspsLab = usps.load()

#1> logistic Regression
logistic = LogisticRegression(28 * 28, 10)
logistic.train(trImg, trLab, lr = 0.3)
accuracy = logistic.test(teImg, teLab)
uspsacc = logistic.test(uspsImg, uspsLab)
print('logisticregression accuracy :', accuracy, uspsacc)

#grid search for best learning rate performance 
#for lr in [0.5, 0.3, 0.1, 0.05, 0.01]:
#    logistic.train(trImg, trLab, lr = 0.1)
#    accuracy = logistic.test(teImg, teLab)
#    print(lr, accuracy)



#2> Multilayer perceptron implementation using tensorflow
mlp = MLP.MLP()
コード例 #38
0
ファイル: SdA.py プロジェクト: wystephen/theano_test
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1]
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
コード例 #39
0
ファイル: plot.py プロジェクト: wp-lai/xmachinelearning
import numpy as np
import matplotlib.pyplot as plt
from logistic import LogisticRegression

# read data
X = np.loadtxt('logistic_x.txt')
y = np.loadtxt('logistic_y.txt')

# build model
lr = LogisticRegression()
lr.fit(X, y)
y_ = lr.predict(X)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.1  # step_size
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
data = np.vstack((xx.ravel(), yy.ravel())).T
labels = lr.predict(data)

# plot
fig, ax = plt.subplots()
ax.scatter(data[:, 0],
           data[:, 1],
           c=np.where(labels == 1, 'green', 'red'),
           alpha=0.01)
plt.title('Decision Boundary of Logistic Regression')
ax.scatter(X[y == 1, 0],
           X[y == 1, 1],
           c='green',
コード例 #40
0
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='data/mnist.pkl.gz',
                           batch_size=600):

    training_set, validation_set, testing_set, = data_loader.load(dataset)
    training_set_x  , training_set_y   = training_set
    validation_set_x, validation_set_y = validation_set
    testing_set_x   , testing_set_y    = testing_set

    # compute number of minibatches for training, validation and testing
    n_train_batches = training_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches  = testing_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = tensor.lscalar()

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = tensor.matrix('x')
    y = tensor.ivector('y')

    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: testing_set_x[index * batch_size: (index + 1) * batch_size],
            y: testing_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: validation_set_x[index * batch_size: (index + 1) * batch_size],
            y: validation_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = tensor.grad(cost=cost, wrt=classifier.W)
    g_b = tensor.grad(cost=cost, wrt=classifier.b)

    # update the parameters of the model
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: training_set_x[index * batch_size: (index + 1) * batch_size],
            y: training_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is # found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = 5 * n_train_batches # requency of training

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # iter: number of minibatches used)
            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    # update best_validation_loss
                    best_validation_loss = this_validation_loss
                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

                    # save the best model
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break
        epoch = epoch + 1

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
コード例 #41
0
ファイル: dbn.py プロジェクト: wyx1227/tangkk-mirex-ace
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, L1_reg=0, L2_reg=0, first_layer='grbm',model=None):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.L1 = 0
        self.L2_sqr = 0

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].output
            
            if model is None:
                W = None
                b = None
            else:
                W = model[i*2]
                b = model[i*2 + 1]
                
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        W = W,
                                        b = b,
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.L1 += (abs(sigmoid_layer.W).sum())
            self.L2_sqr += ((sigmoid_layer.W ** 2).sum())

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if i == 0: # first layer GBRBM - dealing with continous value
                if first_layer == 'grbm':
                    rbm_layer = GRBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
                if first_layer == 'rbm':
                    rbm_layer = RBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
            # elif i == self.n_layers-1: # last layer GGRBM
                # rbm_layer = GRBM(numpy_rng=numpy_rng,
                                # theano_rng=theano_rng,
                                # input=layer_input,
                                # n_visible=input_size,
                                # n_hidden=hidden_layers_sizes[i],
                                # W=sigmoid_layer.W,
                                # hbias=sigmoid_layer.b)
            else: # subsequence layers BBRBM - binary RBM to cope with regularization
                rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if model is None:
            W = None
            b = None
        else:
            W = model[-2]
            b = model[-1]
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            W = W,
            b = b,
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        
        self.L1 += (abs(self.logLayer.W).sum())

        self.L2_sqr += ((self.logLayer.W ** 2).sum())

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = (self.logLayer.negative_log_likelihood(self.y) +
            + L1_reg * self.L1
            + L2_reg * self.L2_sqr
        )
            

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
        self.predprobs = self.logLayer.p_y_given_x
        self.preds = self.logLayer.y_pred

    def pretraining_functions(self, train_set_x, batch_size, cdk, usepersistent):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param cdk: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:
            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            if usepersistent:
                # init persisent chain
                persistent_chain = theano.shared(numpy.zeros((batch_size, rbm.n_hidden),
                                                     dtype=theano.config.floatX),
                                         borrow=True)
                cost, updates = rbm.get_cost_updates(learning_rate,
                             persistent=persistent_chain, k=cdk)
            else:
                cost, updates = rbm.get_cost_updates(learning_rate,
                                                     persistent=None, k=cdk)


            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        #gparams = T.grad(self.finetune_cost, self.params)
        gparams = [T.grad(self.finetune_cost, param) for param in self.params]

        # compute list of fine-tuning updates
        #updates = []
        #for param, gparam in zip(self.params, gparams):
        #    updates.append((param, param - gparam * learning_rate))
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )
        
        train_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )
        
        # Create a function that scans the entire test set
        def train_score():
            return [train_score_i(i) for i in xrange(n_train_batches)]

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, train_score, valid_score, test_score
コード例 #42
0
ファイル: moe.py プロジェクト: neuronalX/esn-lm
class MixtureOfExperts:
    """ The Mixture of Experts model"""
    def __init__(self, input_dim, nb_experts, output_dim):
        self.nb_experts = nb_experts
        self.output_dim = output_dim
        self.gates = LogisticRegression(input_dim, nb_experts)
        self.experts = [
            LogisticRegression(input_dim, output_dim)
            for k in range(nb_experts)
        ]

    def pz_given_x(self, x):
        return self.gates.py_given_x(x)

    def py_given_x(self, x):
        pz = self.gates.py_given_x(x)
        py = np.zeros((x.shape[0], self.output_dim))
        for z in range(self.nb_experts):
            pzb = np.tile(np.expand_dims(pz[:, z], axis=1),
                          (1, self.output_dim))
            py += pzb * self.experts[z].py_given_x(x)
        return py

    def py_given_xz(self, x, z):
        return self.experts[z].py_given_x(x)

    def lik_y_for_every_z(self, x, y):
        py = np.zeros((x.shape[0], self.nb_experts))
        for z in range(self.nb_experts):
            py[:, z] = np.sum(y * self.py_given_xz(x, z), axis=1)
        return py

    def pz_given_xy(self, x, y):
        pz_given_x = self.pz_given_x(x)
        lik_y_forallz = self.lik_y_for_every_z(x, y)
        pz_given_xy = lik_y_forallz * pz_given_x
        renorm = np.tile(np.expand_dims(np.sum(pz_given_xy, axis=1), axis=1),
                         (1, pz_given_xy.shape[1]))
        pz_given_xy = pz_given_xy / renorm
        return pz_given_xy

    def sample_y_given_x(self, x):
        py = self.py_given_x(x)
        y = np.array(
            [np.random.multinomial(1, py[i, :]) for i in range(x.shape[0])])
        return y

    def log_likelihood(self, x, y):
        lik_y = self.lik_y_for_every_z(x, y)
        pz_given_x = self.pz_given_x(x)
        return np.sum(np.log(np.sum(pz_given_x * lik_y, 1)))

    def fit(self, x, y, method='CG', max_iter=15):
        """ The model is trained using Generalized Expectation-Maximization.
            In the Maximization step the Conjugate-Gradient algorithm provided by scipy.optimize is used
            by default.
        """
        if type(y) == type([]):
            y = np.eye(self.output_dim)[y]
        ll, Q1, Q2 = expectation_maximization2(self, x, y, max_iter=max_iter)
        return ll, Q1, Q2
コード例 #43
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 L1_reg=0,
                 L2_reg=0,
                 first_layer='grbm',
                 model=None):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.L1 = 0
        self.L2_sqr = 0

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].output

            if model is None:
                W = None
                b = None
            else:
                W = model[i * 2]
                b = model[i * 2 + 1]

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        W=W,
                                        b=b,
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.L1 += (abs(sigmoid_layer.W).sum())
            self.L2_sqr += ((sigmoid_layer.W**2).sum())

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if i == 0:  # first layer GBRBM - dealing with continous value
                if first_layer == 'grbm':
                    rbm_layer = GRBM(numpy_rng=numpy_rng,
                                     theano_rng=theano_rng,
                                     input=layer_input,
                                     n_visible=input_size,
                                     n_hidden=hidden_layers_sizes[i],
                                     W=sigmoid_layer.W,
                                     hbias=sigmoid_layer.b)
                if first_layer == 'rbm':
                    rbm_layer = RBM(numpy_rng=numpy_rng,
                                    theano_rng=theano_rng,
                                    input=layer_input,
                                    n_visible=input_size,
                                    n_hidden=hidden_layers_sizes[i],
                                    W=sigmoid_layer.W,
                                    hbias=sigmoid_layer.b)
            # elif i == self.n_layers-1: # last layer GGRBM
            # rbm_layer = GRBM(numpy_rng=numpy_rng,
            # theano_rng=theano_rng,
            # input=layer_input,
            # n_visible=input_size,
            # n_hidden=hidden_layers_sizes[i],
            # W=sigmoid_layer.W,
            # hbias=sigmoid_layer.b)
            else:  # subsequence layers BBRBM - binary RBM to cope with regularization
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if model is None:
            W = None
            b = None
        else:
            W = model[-2]
            b = model[-1]
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            W=W,
            b=b,
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        self.L1 += (abs(self.logLayer.W).sum())

        self.L2_sqr += ((self.logLayer.W**2).sum())

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = (self.logLayer.negative_log_likelihood(self.y) +
                              +L1_reg * self.L1 + L2_reg * self.L2_sqr)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
        self.predprobs = self.logLayer.p_y_given_x
        self.preds = self.logLayer.y_pred

    def pretraining_functions(self, train_set_x, batch_size, cdk,
                              usepersistent):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param cdk: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        bc_idx = T.ivector('bc_idx')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:
            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            if usepersistent:
                # init persisent chain
                persistent_chain = theano.shared(numpy.zeros(
                    (batch_size, rbm.n_hidden), dtype=theano.config.floatX),
                                                 borrow=True)
                cost, updates = rbm.get_cost_updates(
                    learning_rate, persistent=persistent_chain, k=cdk)
            else:
                cost, updates = rbm.get_cost_updates(learning_rate,
                                                     persistent=None,
                                                     k=cdk)

            # compile the theano function
            fn = theano.function(
                inputs=[bc_idx,
                        theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[bc_idx]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch
        bc_idx = T.ivector('bc_idx')

        # compute the gradients with respect to the model parameters
        #gparams = T.grad(self.finetune_cost, self.params)
        gparams = [T.grad(self.finetune_cost, param) for param in self.params]

        # compute list of fine-tuning updates
        #updates = []
        #for param, gparam in zip(self.params, gparams):
        #    updates.append((param, param - gparam * learning_rate))
        updates = [(param, param - learning_rate * gparam)
                   for param, gparam in zip(self.params, gparams)]

        train_fn = theano.function(inputs=[bc_idx],
                                   outputs=self.finetune_cost,
                                   updates=updates,
                                   givens={
                                       self.x: train_set_x[bc_idx],
                                       self.y: train_set_y[bc_idx]
                                   })

        train_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                test_set_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # Create a function that scans the entire test set
        def train_score():
            return [train_score_i(i) for i in xrange(n_train_batches)]

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, train_score, valid_score, test_score
コード例 #44
0
ファイル: sdA.py プロジェクト: raoqiyu/Learning-DL
    def __init__(self, numpy_rng, theano_rng=None, n_in=784,
            hidden_layers_sizes=[500, 500], n_out=10,corruption_levels=[0.,0.1]):
        """This class is made to support a variable number of layers.

        :type  theano_rng: theano.tensor.shared_randomstream.RandomSteam
        :param theano_rng: Thenao random generator used to draw initial weights

        :type  n_in: int
        :param n_in: dimension of the input to the sdA

        :type  hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                                    at least one value

        :type  n_out: int
        :param n_out: dimension of the output of the network

        :type  corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each layer
        """
        # self.dA_layers will store the denoising autoencoder associated
        # with the layers of the MLP
        self.dA_layers = []
        # self.sigmoid_layers will store the sigmoid layers of the MLP facade
        self.sigmoid_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasteried images
        self.y = T.ivector('y') # the labels are presented as 1D vector of
                                # [int] labels

        #
        # Construct self.n_layers sigmoid layers and self.n_layers denoising
        # layersm where self.n_layers is the depth of our model
        #
        for i in range(self.n_layers):
            # construct a sigmoid layer
            #

            # the size of the input is ethier the number of the hidden units of
            # the layer below or the input size if we are on the first layer.
            # the input of the layer has the same situation
            if i == 0:
                input_size  = n_in
                layer_input = self.x
            else:
                input_size  = hidden_layers_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layer
            self.sigmoid_layers.append(sigmoid_layer)
            # ??? the parameters of the sigmoid layers are paremeters of the
            # sdA, the visible bias in the dA are parameters of those
            # dA, but not the sdA. So we do not add the dA_layer's (below)
            # bvis to self.params.
            self.params.extend(sigmoid_layer.params)

            # construct a denoising autoencoder that shared weights with this
            # sigmoid_layer
            dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng,
                          input=layer_input, n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W = sigmoid_layer.W, bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        #
        # Construct a logistic layer on top of the MLP
        #
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_out
        )
        self.params.extend(self.logLayer.params)

        #
        # Construct a function that impletements one step of finetuning
        #

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
コード例 #45
0
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20,50],batch_size=500):
    """Demonstates lenet on MNIST dataset
    """
    
    rng = numpy.random.RandomState(1234)
    print('Loading Data'+'.'*20)
    datasets = load_data(dataset)
    
    trainSetX, trainSetY = datasets[0]
    validSetX, validSetY = datasets[1]
    testSetX,  testSetY  = datasets[2]
    
    n_train_batches = trainSetX.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = validSetX.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = testSetX.get_value(borrow=True).shape[0] // batch_size
    
    print('Building Data'+'.'*20)
    
    index = T.lscalar('index')
    x = T.matrix('x')
    y = T.ivector('y')
    
    # Reshape matrix of rasterized images of shape (batch_size, 28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size,1,28,28))
    
    # construct the first convolutional pooling layer
    # filtering reduces the image size to (28-5+1,28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(
        rng=rng,
        input=layer0_input,
        image_shape=(batch_size,1,28,28),
        filter_shape=(nkerns[0],1,5,5),
        poolsize=(2,2)
    )
    
    # construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size,nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(
        rng=rng,
        input=layer0.output,
        image_shape=(batch_size,nkerns[0],12,12),
        filter_shape=(nkerns[1],nkerns[0],5,5),
        poolsize=(2,2)
    )
    
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(
        rng=rng,
        input=layer2_input,
        n_in=nkerns[1]*4*4,
        n_out=500,
        activation=T.tanh
    )
    
    layer3 = LogisticRegression(input=layer2.output,n_in=500,n_out=10)
    
    testModel = theano.function(
        inputs=[index],
        outputs=layer3.errors(y),
        givens={
            x:testSetX[index*batch_size:(index+1)*batch_size],        
            y:testSetY[index*batch_size:(index+1)*batch_size]        
        }    
    )
    validModel = theano.function(
        inputs=[index],
        outputs=layer3.errors(y),
        givens={
            x:validSetX[index*batch_size:(index+1)*batch_size],        
            y:validSetY[index*batch_size:(index+1)*batch_size]        
        }
    )
    
    params = layer3.params+layer2.params+layer1.params+layer0.params
    cost = layer3.negative_log_likelihood(y)
    grads = T.grad(cost,params)
    
    updates= [(param, param - learning_rate*grad)
                for param,grad in zip(params,grads)
    ]
    
    trainModel = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x:trainSetX[index*batch_size:(index+1)*batch_size],        
            y:trainSetY[index*batch_size:(index+1)*batch_size]        
        }    
    )
    
    print('Training'+'.'*20)
    
    patience = 10000
    patience_increase = 2
    improvement_threshold = 2
    validation_frequence = min(n_train_batches, patience/2)
    
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score =0.
    start_time = timeit.default_timer()
    
    epoch = 0
    done_looping = False
    
    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        
        for mini_batch_index in range(n_train_batches):
           iter = (epoch - 1) * n_train_batches + mini_batch_index
           if iter % 100 == 0:
               print('training @ iter = ' , iter)
           
           cost_ij = trainModel(mini_batch_index)
           if (iter + 1) % validation_frequence ==0:
               validation_losses = [validModel(i) for i 
                                       in range(n_valid_batches)]
               this_validation_losses = numpy.mean(validation_losses)
               print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, mini_batch_index+1, n_train_batches,
                        this_validation_losses*100)
               )
               if this_validation_losses < best_validation_loss:
                   best_validation_loss = this_validation_losses
                   best_iter = iter
                   if this_validation_losses < best_validation_loss * \
                                               improvement_threshold:
                       patience = max(patience, patience*patience_increase)
                   
                   test_losses = [testModel(i) for i in range(n_test_batches)]
                   test_score = numpy.mean(test_losses)
                   
                   print('   epoch %i, minibatch %i/%i, test error of'
                         'best model %f %%'%
                           (epoch, mini_batch_index+1, n_train_batches,
                            this_validation_losses*100)
                   )
           if patience <= iter:
               done_looping = True
               break
    endtime = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          ' with test performance %f %%' %
          (best_validation_loss*100, best_iter+1, test_score*100.)
    )
   
    print('The code for file ' + os.path.split(__file__)[1]+
          ' ran for %.2fm' % (endtime - start_time)/60.   
    )
コード例 #46
0
class DBN(object):
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = theano_rng = RandomStreams(numpy_rng.randint(2**30))

        self.x = T.matrix('x')
        self.y = T.ivector('y')
        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.params.extend(self.logLayer.params)

        self.finetune_cost = self.logLayer.negative_log_likehood(self.y)

        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        index = T.lscalar('index')
        learning_rate = T.scalar('lr')
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None,
                                                 k=k)
            fn = theano.function(
                inputs=[index, theano.In(learning_rate, value=0.1)],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            pretrain_fns.append(fn)
        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        (train_set_x, train_set_y) = datasets[0]
        (test_set_x, test_set_y) = datasets[1]
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches //= batch_size
        index = T.lscalar('index')  # index to a [mini]batch
        gparams = T.grad(self.finetune_cost, self.params)
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))
        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })
        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                test_set_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        valid_score = None
        return train_fn, valid_score, test_score

    def predict(self):
        test_set = logistic.load_my_test_data()
        test_set_x = test_set.get_value()
        predict_model = theano.function(inputs=[self.x],
                                        outputs=self.logLayer.y_pred)
        predicted_values = predict_model(test_set_x)
        ids = numpy.arange(predicted_values.shape[0] + 1)
        print ids.dtype
        print predicted_values
        df = pd.DataFrame({"ImageId": ids[1:], "Label": predicted_values})
        print df
        df.to_csv('submission.csv', index=False, index_label=True)