Beispiel #1
0
    def train_offline(self, data, mean=None, std=None):

        print 'training....'
        train_samples=300000
        val_samples=1000
        test_samples=1000
        batchSize = self.batchSize
        learning_rate  = self.learning_rate
        momentum = self.momentum

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        rng = numpy.random.RandomState(23455)

        # training data
        d = data.gen_samples_offline(
            nsamples=train_samples,
            purpose='train',
            patchSize=patchSize,
            mean=mean,
            std=std)

        data_mean = d[2]
        data_std = d[3]

        train_set_x, train_set_y = shared_dataset((d[0],d[1]), doCastLabels=True)

        d = data.gen_samples_offline(
            nsamples=val_samples,
            purpose='validate',
            patchSize=patchSize,
            mean=data_mean,
            std=data_std)
        valid_set_x, valid_set_y = shared_dataset((d[0],d[1]), doCastLabels=True)


        d = data.gen_samples_offline(
            nsamples=test_samples,
            purpose='test',
            patchSize=patchSize,
            mean=data_mean,
            std=data_std)
        test_set_x, test_set_y = shared_dataset((d[0],d[1]), doCastLabels=True)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_samples / batchSize
        n_valid_batches = val_samples / batchSize
        n_test_batches = test_samples / batchSize

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = self.x #T.matrix('x')   # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels
        cost = self.cost(y)

        lr = T.scalar('learning_rate')
        m = T.scalar('momentum')

        learning_rate_shared = theano.shared(np.float32(learning_rate))
        momentum_shared = theano.shared(np.float32(momentum))

        print 'training data....'
        print 'min: ', np.min( train_set_x.eval() )
        print 'max: ', np.max( train_set_x.eval() )
        print 'n_train_batches:',n_train_batches
        print 'n_valid_batches:',n_valid_batches
        print 'n_test_batches:',n_test_batches

        # create a function to compute the mistakes that are made by the model
        test_model = theano.function(
            [index],
            self.errors(y),
            givens={
                x: test_set_x[index * batchSize: (index + 1) * batchSize],
                y: test_set_y[index * batchSize: (index + 1) * batchSize]
            }
        )

        validate_model = theano.function(
            [index],
            self.errors(y),
            givens={
                x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                y: valid_set_y[index * batchSize: (index + 1) * batchSize]
            }
        )


        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)


        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: learning_rate_shared,
                    m: momentum_shared})

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        best_validation_loss = numpy.inf
        best_iter = 0
        decrease_epoch = 1
        decrease_patience = 1
        test_score = 0.
        doResample = True

        validation_frequency = 1

        start_time = time.clock()

        epoch = 0
        done_looping = False

        last_avg_validation_loss = 0
        avg_validation_losses = []

        while (epoch < n_epochs) and (not self.done):
            minibatch_avg_costs = []
            epoch = epoch + 1


            if doResample and epoch>1: # and len(avg_validation_losses) > 0:
                epoch=0
                avg = np.mean(avg_validation_losses)
                diff = abs(avg-last_avg_validation_loss)
                last_avg_validation_loss = avg
                avg_validation_losses = []


                d = data.gen_samples_offline(
                    nsamples=train_samples,
                    purpose='train',
                    patchSize=patchSize,
                    mean=mean,
                    std=std)
                dx = d[0]
                dy = d[1]
                train_set_x.set_value(np.float32(dx))
                train_set_y.set_value(np.int32(dy))


            for minibatch_index in xrange(n_train_batches):
                if self.done:
                    break

                train_cost = train_model(minibatch_index)
                minibatch_avg_costs.append( train_cost )
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    #self.save()
                    # compute zero-one loss on validation set
                    validation_losses = np.array([validate_model(i) for i
                                         in xrange(n_valid_batches)])
                    this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples

                    msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss)

                    print(msg)

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        best_validation_loss = this_validation_loss
                        best_iter = iter

                        self.save()
                        print "New best score!"


        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
Beispiel #2
0
def train_mlp(learning_rate=0.01, n_epochs=10, batch_size=500, n_hidden=[500], patchSize=19, train_samples=1000, val_samples=10000, test_samples=1000, doResample=False, validation_frequency = 1, activation=rectified_linear, doEmailUpdate=False, momentum=0.0):

    def adadelta_updates(parameters,gradients,rho,eps):
        # create variables to store intermediate updates
        gradients_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
        deltas_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
        # calculates the new "average" delta for the next iteration
        gradients_sq_new = [ rho*g_sq + (1-rho)*(g**2) for g_sq,g in zip(gradients_sq,gradients) ]
        
        # calculates the step in direction. The square root is an approximation to getting the RMS for the average value
        deltas = [ (T.sqrt(d_sq+eps)/T.sqrt(g_sq+eps))*grad for d_sq,g_sq,grad in zip(deltas_sq,gradients_sq_new,gradients) ]
        
        # calculates the new "average" deltas for the next step.
        deltas_sq_new = [ rho*d_sq + (1-rho)*(d**2) for d_sq,d in zip(deltas_sq,deltas) ]
        
        # Prepare it as a list f
        gradient_sq_updates = zip(gradients_sq,gradients_sq_new)
        deltas_sq_updates = zip(deltas_sq,deltas_sq_new)
        parameters_updates = [ (p,p - d) for p,d in zip(parameters,deltas) ]
        return gradient_sq_updates + deltas_sq_updates + parameters_updates

    def gradient_updates_momentum(cost, params, learning_rate, momentum):
        updates = []
        for param in params:
            param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
            updates.append((param, param - learning_rate*param_update))
            updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
        return updates

    if doEmailUpdate:
        gmail_pwd = getpass.getpass()

    rng = numpy.random.RandomState(1234)

    data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0)
    train_set_x, train_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    test_set_x, test_set_y = shared_dataset(data, doCastLabels=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_samples / batch_size
    n_valid_batches = val_samples / 1000
    n_test_batches = test_samples / 1000

    learning_rate_shared = theano.shared(np.float32(learning_rate))
    momentum_shared = theano.shared(np.float32(momentum))

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    lr = T.scalar('learning_rate')
    m = T.scalar('momentum')

    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=patchSize**2,
                             n_hidden=n_hidden, n_out=2, activation=activation)


    cost = classifier.negative_log_likelihood(y) 

    test_model = theano.function(inputs=[index],
                                 outputs=classifier.errors(y),
                                 givens={
                                     x: test_set_x[index * batch_size:(index + 1) * batch_size],
                                     y: test_set_y[index * batch_size:(index + 1) * batch_size]})
    validate_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(y),
                                     givens={
                                         x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) 
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    #SGD
#    updates = []
#    for param, gparam in zip(classifier.params, gparams):
#        updates.append((param, param - lr * gparam))

    #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001)
    updates = gradient_updates_momentum(cost, classifier.params, lr, m)
    

    train_model = theano.function(inputs=[index], outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size],
                lr: learning_rate_shared,
                m: momentum_shared})

    print '... training'

    best_validation_loss = numpy.inf
    best_iter = 0
    decrease_epoch = 1
    decrease_patience = 1
    test_score = 0.

    start_time = time.clock()

    epoch = 0
    done_looping = False

    # start pool for data
    print "Starting worker."
    pool = multiprocessing.Pool(processes=1)
    futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])

    while (epoch < n_epochs) and (not done_looping):
        minibatch_avg_costs = []
        epoch = epoch + 1

        if doResample and epoch>1:
            print "Waiting for data."
            data = futureData.get()
            print "GOT NEW DATA"
            train_set_x.set_value(np.float32(data[0]))
            train_set_y.set_value(np.int32(data[1]))
            futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])
#            try:
#                data = futureData.get(timeout=1)
#                print "GOT NEW DATA"
#                train_set_x.set_value(np.float32(data[0]))
#                train_set_y.set_value(np.int32(data[1]))
#                futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]])
#            except multiprocessing.TimeoutError:
#                print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA"
#                pass
#


        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_costs.append(train_model(minibatch_index))
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                classifier.save_MLP('current.pkl')
                # compute zero-one loss on validation set
                validation_losses = np.array([validate_model(i) for i
                                     in xrange(n_valid_batches)])
                this_validation_loss = numpy.mean(validation_losses*100.0)
                
                msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss)

                print(msg)

                classifier.trainingCost.append(minibatch_avg_costs[-1])
                classifier.validationError.append(this_validation_loss*100)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    classifier.save_MLP('best_so_far.pkl')
                    print "New best score!"
                    if doEmailUpdate:
                        send_email(gmail_pwd, msg)
                    # test it on the test set
                    #test_losses = [test_model(i) for i
                    #               in xrange(n_test_batches)]
                    #test_score = numpy.mean(test_losses)
                    #
                    #print(('epoch %i, minibatch %i/%i, test error of '
                    #       'best model %f %%') %
                    #      (epoch, minibatch_index + 1, n_train_batches,
                    #       test_score * 100.))

    pool.close()
    pool.join() 
    print "Pool closed."

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    return classifier
Beispiel #3
0
    def train(self, 
        offline=False, 
        data=None, 
        mean=None,
        std=None
        ):
        print 'mlp.train'

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        patchSize = self.patchSize
        batchSize = self.batchSize
        learning_rate  = self.learning_rate
        momentum = self.momentum

        rng = numpy.random.RandomState(1234)

        tx, ty, vx, vy, reset = data.sample()
        train_samples  = len(ty)
        val_samples    = len(vy)
        train_set_x, train_set_y = shared_dataset((tx, ty), doCastLabels=True)

        if val_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((vx, vy), doCastLabels=True)

        if reset:
            self.best_validation_loss = numpy.inf

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_samples / batchSize
        n_valid_batches = val_samples / 1000 #batchSize

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = self.x #T.matrix('x')   # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels
        cost = self.cost(y)

        lr = T.scalar('learning_rate')
        m = T.scalar('momentum')

        learning_rate_shared = theano.shared(np.float32(learning_rate))
        momentum_shared = theano.shared(np.float32(momentum))

        print 'training data....'
        print 'n_train_batches:',n_train_batches
        print 'n_valid_batches:',n_valid_batches
        print 'train_samples:', train_samples
        print 'val_samples:', val_samples
        print 'best_validation:', self.best_validation_loss

        if val_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                    y: valid_set_y[index * batchSize: (index + 1) * batchSize]
                }
            )

        predict_samples = theano.function(
                [],
                outputs=T.neq(self.y_pred, y),
                givens={
                        x: train_set_x,
                        y: train_set_y,
                }
        )

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: learning_rate_shared,
                    m: momentum_shared})

  
        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        validation_frequency = 1
        start_time = time.clock()

        minibatch_avg_costs = []
        iter = 0
        epoch = 0
        self.best_train_error = np.inf
        last_train_error = numpy.inf
        for minibatch_index in xrange(n_train_batches):
            if self.done:
                break

            train_cost = train_model(minibatch_index)
            minibatch_avg_costs.append( train_cost )

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if n_valid_batches == 0:
                train_error = minibatch_avg_costs[-1].item(0)

                print minibatch_index, '-', train_error
                if train_error < self.best_train_error:
                    self.best_train_error = train_error
                    self.save()
                      

            if n_valid_batches > 0 and (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = np.array([validate_model(i) for i
                                     in xrange(n_valid_batches)])
                #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples
                this_validation_loss = numpy.mean(validation_losses*100.0)

                elapsed_time = time.clock() - start_time
 
                data.report_stats(
                    self.id,
                    elapsed_time, 
                    minibatch_index, 
                    this_validation_loss, 
                    minibatch_avg_costs[-1].item(0))

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    self.save()
                    print "New best score!"

        #if n_valid_batches == 0:
        #    self.save()

        if not self.offline:
            probs = predict_samples()
            data.p[ data.i_train ] = probs
            data.save_stats()
Beispiel #4
0
    def train_online(self, data):
        print 'train online...'
        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        # DATA INITIALIZATION
        d       = data.sample()
        train_x = d[0]
        train_y = d[1]
        valid_x = d[2]
        valid_y = d[3]
        reset   = d[4]


        if reset:
            self.best_validation_loss = numpy.inf 

        print 'best_validation:', self.best_validation_loss
        train_samples = len(train_y)
        valid_samples = len(valid_y)

        if self.resample:
            self.lr_shared.set_value( np.float32(self.learning_rate) )
            self.m_shared.set_value( np.float32(self.momentum) )
        else:
            self.resample  = True
            self.y         = T.ivector('y')  # the labels are presented as 1D vector of [int] labels
            self.lr        = T.scalar('learning_rate')
            self.m         = T.scalar('momentum')

            self.lr_shared = theano.shared(np.float32(self.learning_rate))
            self.m_shared  = theano.shared(np.float32(self.momentum))
        
        index          =  T.lscalar()  # index to a [mini]batch
        x              = self.x
        y              = self.y
        lr             = self.lr
        m              = self.m
        lr_shared      = self.lr_shared
        m_shared       = self.m_shared
        patchSize      = self.patchSize
        batchSize      = self.batchSize
        train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True)

        if valid_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True)

        # compute number of minibatches for training, validation 
        n_train_batches = train_samples / batchSize
        n_valid_batches = valid_samples / batchSize


        #BUILD THE MODEL
        cost = self.cost(y)

        if valid_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                    y: valid_set_y[index * batchSize: (index + 1) * batchSize]
                }
            )

        predict_samples = theano.function(
                inputs=[index],
                outputs=T.neq(self.mlp.y_pred, self.y),
                givens={
                        x: train_set_x[index * batchSize: (index + 1) * batchSize],
                        y: train_set_y[index * batchSize: (index + 1) * batchSize]   
                } 
        )

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: lr_shared,
                    m: m_shared})


        # TRAIN THE MODEL
        print '... training'
        best_iter = 0
        validation_frequency = 1

        start_time = time.clock()

        elapsed_time = 0
        iter = 0

        minibatch_avg_costs = []
        minibatch_index = 0

        count1 = 0
        count2 = 0


        while (elapsed_time < self.trainTime)\
            and (minibatch_index<n_train_batches)\
            and (not self.done):

            train_cost = train_model(minibatch_index)
            #print '----->traincost:', type(train_cost), train_cost

            minibatch_avg_costs.append(train_cost)
       
            #print 'minibatch_index:', minibatch_index, 'n_train_batches:',n_train_batches, self.batchSize,
 
            probs = predict_samples(minibatch_index)

            indices = data.i_train[minibatch_index * batchSize:(minibatch_index + 1) * batchSize]
            data.p[ indices ] = probs
            #print 'probs:', probs
        
            iter += 1
            if (iter + 1) % validation_frequency == 0 and n_valid_batches > 0:

                validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(validation_losses) * 100.0 / valid_samples

                elapsed_time = time.clock() - start_time

                data.report_stats(
                    self.id,
                    elapsed_time,
                    minibatch_index,
                    this_validation_loss,
                    minibatch_avg_costs[-1].item(0))

                # if we got the best validation score until now
                count1 += len(np.where(probs==0)[0])
                count2 += len(np.where(probs==1)[0])
            
                data.add_validation_loss( this_validation_loss )
    
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    best_iter = iter

                    print '===>saving....'
                    self.save()
                    print "New best score!"

            # advance to next mini batch
            minibatch_index += 1

            # update elapsed time
            elapsed_time = time.clock() - start_time

        data.save_stats()
 
        p = data.p[ data.i_train ]
        n_bad = len( np.where( p == 1 )[0] )
        error = float(n_bad)/len(p)
        print '----------'
        print 'accuracy:', data.accuracy
        print 'error:', error
        print 'lerror:', self.error
        print 'probi:', np.bincount( np.int64( p ) )

        if n_valid_batches == 0:
            self.save()

        elapsed_time = time.clock() - start_time
        msg = 'The code ran for'
        status = '%f seconds' % (elapsed_time)
        Utility.report_status( msg, status )
Beispiel #5
0
    def train_offline(self, data, mean=None, std=None):
        print 'training....'
        train_samples = 700000
        val_samples = 5000
        test_samples = 1000
        n_epochs = 5000
        patchSize = self.patchSize
        batchSize = 50  #self.batchSize
        learning_rate = self.learning_rate
        momentum = 0.9  #self.momentum

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value() * 0.,
                                             broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate * param_update))
                updates.append((param_update, momentum * param_update +
                                (1. - momentum) * T.grad(cost, param)))
            return updates

        rng = numpy.random.RandomState(1234)

        # training data
        d = data.gen_samples_offline(nsamples=train_samples,
                                     purpose='train',
                                     patchSize=patchSize,
                                     mean=mean,
                                     std=std)

        data_mean = d[2]
        data_std = d[3]

        train_set_x, train_set_y = shared_dataset((d[0], d[1]),
                                                  doCastLabels=True)

        d = data.gen_samples_offline(nsamples=val_samples,
                                     purpose='validate',
                                     patchSize=patchSize,
                                     mean=data_mean,
                                     std=data_std)
        valid_set_x, valid_set_y = shared_dataset((d[0], d[1]),
                                                  doCastLabels=True)

        d = data.gen_samples_offline(nsamples=test_samples,
                                     purpose='test',
                                     patchSize=patchSize,
                                     mean=data_mean,
                                     std=data_std)
        test_set_x, test_set_y = shared_dataset((d[0], d[1]),
                                                doCastLabels=True)
        '''

        d = gen_data_supervised(
            purpose='train',
            nsamples=train_samples,
            patchSize=patchSize,
            balanceRate=0.5,
            data_mean=mean,
            data_std=std)
        data = d[0]
        train_set_x, train_set_y = shared_dataset(data, doCastLabels=True)

        #print 'data:', np.shape(data)
        #print 'train:', np.shape(train_set_x), np.shape(train_set_y)
        #print 'valid:', np.shape(valid_set_x), np.shape(valid_set_y)
        #print 'test :', np.shape(test_set_x), np.shape(test_set_y)

        norm_mean = d[1]
        norm_std  = d[2]
        grayImages = d[3]
        labelImages = d[4]
        maskImages = d[5]

        print 'norm_std:', norm_std
        print 'norm_mean:',norm_mean
    
        # validation data
        d = gen_data_supervised(
            purpose='validate',
            nsamples=val_samples,
            patchSize=patchSize,
            balanceRate=0.5,
            data_mean=norm_mean,
            data_std=norm_std)[0]
        valid_set_x, valid_set_y = shared_dataset(d, doCastLabels=True)

        # test data
        d = gen_data_supervised(
            purpose='test',
            nsamples=test_samples,
            patchSize=patchSize,
            balanceRate=0.5,
            data_mean=norm_mean,
            data_std=norm_std)[0]
        test_set_x, test_set_y = shared_dataset(d, doCastLabels=True)

        '''

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_samples / batchSize
        n_valid_batches = val_samples / 1000  #batchSize
        n_test_batches = test_samples / 1000  #batchSize

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = self.x  #T.matrix('x')   # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        cost = self.cost(y)

        lr = T.scalar('learning_rate')
        m = T.scalar('momentum')

        learning_rate_shared = theano.shared(np.float32(learning_rate))
        momentum_shared = theano.shared(np.float32(momentum))

        print 'training data....'
        print 'min: ', np.min(train_set_x.eval())
        print 'max: ', np.max(train_set_x.eval())
        print 'n_train_batches:', n_train_batches
        print 'n_valid_batches:', n_valid_batches
        print 'n_test_batches:', n_test_batches

        # create a function to compute the mistakes that are made by the model
        test_model = theano.function(
            [index],
            self.errors(y),
            givens={
                x: test_set_x[index * batchSize:(index + 1) * batchSize],
                y: test_set_y[index * batchSize:(index + 1) * batchSize]
            })

        validate_model = theano.function(
            [index],
            self.errors(y),
            givens={
                x: valid_set_x[index * batchSize:(index + 1) * batchSize],
                y: valid_set_y[index * batchSize:(index + 1) * batchSize]
            })

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batchSize:(index + 1) * batchSize],
                y: train_set_y[index * batchSize:(index + 1) * batchSize],
                lr: learning_rate_shared,
                m: momentum_shared
            })

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        best_validation_loss = numpy.inf
        best_iter = 0
        decrease_epoch = 1
        decrease_patience = 1
        test_score = 0.
        doResample = True

        validation_frequency = 1

        start_time = time.clock()

        epoch = 0
        done_looping = False

        print 'lr:', learning_rate
        print 'patchsizwe:', patchSize
        print 'm:', momentum

        print 'n_train_batches:', n_train_batches
        print 'n_valid_batches:', n_valid_batches
        print 'n_test_batches:', n_test_batches

        # start pool for data
        print "Starting worker."
        '''
        pool = multiprocessing.Pool(processes=1)
        futureData = pool.apply_async(
                        stupid_map_wrapper,
                        [[gen_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])
        '''

        last_avg_validation_loss = 0
        avg_validation_losses = []
        while (epoch < n_epochs) and (not self.done):
            minibatch_avg_costs = []
            epoch = epoch + 1
            if doResample and epoch > 1:  # and len(avg_validation_losses) > 0:
                epoch = 0
                avg = np.mean(avg_validation_losses)
                diff = abs(avg - last_avg_validation_loss)
                last_avg_validation_loss = avg
                avg_validation_losses = []

                #if diff < 0.025:
                print 'resampling...'
                print 'diff:', diff
                print 'last_avg_validation_loss:', last_avg_validation_loss
                '''
                d = gen_data_supervised(
                    purpose='train',
                    nsamples=train_samples,
                    patchSize=patchSize,
                    balanceRate=0.5,
                    data_mean=mean,
                    data_std=std)
                data = d[0]
                train_set_x.set_value(np.float32(data[0]))
                train_set_y.set_value(np.int32(data[1]))

                '''
                d = data.gen_samples_offline(nsamples=train_samples,
                                             purpose='train',
                                             patchSize=patchSize,
                                             mean=mean,
                                             std=std)
                dx = d[0]
                dy = d[1]
                train_set_x.set_value(np.float32(dx))
                train_set_y.set_value(np.int32(dy))

            for minibatch_index in xrange(n_train_batches):
                if self.done:
                    break

                train_cost = train_model(minibatch_index)
                minibatch_avg_costs.append(train_cost)
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    #self.save()
                    # compute zero-one loss on validation set
                    validation_losses = np.array(
                        [validate_model(i) for i in xrange(n_valid_batches)])
                    #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples
                    this_validation_loss = numpy.mean(validation_losses *
                                                      100.0)
                    avg_validation_losses.append(this_validation_loss * 100)

                    msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (
                        epoch, minibatch_index + 1, n_train_batches,
                        minibatch_avg_costs[-1], this_validation_loss)

                    print(msg)

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        best_validation_loss = this_validation_loss
                        best_iter = iter

                        self.save()
                        print "New best score!"

        #pool.close()
        #pool.join()
        print "Pool closed."

        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] + ' ran for %.2fm' %
                              ((end_time - start_time) / 60.))
Beispiel #6
0
    def train_online(self, data):

        print 'train online...'
        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate*param_update))
                updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
            return updates

        # DATA INITIALIZATION
        d       = data.sample()
        train_x = d[0]
        train_y = d[1]
        valid_x = d[2]
        valid_y = d[3]
        reset   = d[4]

        if reset:
            self.best_validation_loss = numpy.inf

        train_samples = len(train_y)
        valid_samples = len(valid_y)

        print 'valid_samples:',valid_samples
        print 'train_samples:', train_samples

        if self.resample:
            self.lr_shared.set_value( np.float32(self.learning_rate) )
            self.m_shared.set_value( np.float32(self.momentum) )

        else:
            self.resample  = True
            self.y         = T.ivector('y')  # the labels are presented as 1D vector of [int] labels
            self.lr        = T.scalar('learning_rate')
            self.m         = T.scalar('momentum')

            self.lr_shared = theano.shared(np.float32(self.learning_rate))
            self.m_shared  = theano.shared(np.float32(self.momentum))



        index          =  T.lscalar()  # index to a [mini]batch
        x              = self.x
        y              = self.y
        lr             = self.lr
        m              = self.m
        lr_shared      = self.lr_shared
        m_shared       = self.m_shared
        patchSize      = self.patchSize
        batchSize      = self.batchSize
        train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True)

        if valid_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True)

        # compute number of minibatches for training, validation 
        n_train_batches = train_samples / batchSize
        n_valid_batches = valid_samples / batchSize


        #BUILD THE MODEL
        cost = self.cost(y)

        if valid_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize: (index + 1) * batchSize],
                    y: valid_set_y[index * batchSize: (index + 1) * batchSize]
                }
            )

        '''
        predict_samples = theano.function(
                inputs=[index],
                outputs=T.neq(self.y_pred, self.y),
                givens={
                        x: train_set_x[index * batchSize: (index + 1) * batchSize],
                        y: train_set_y[index * batchSize: (index + 1) * batchSize]
                }
        )
        '''
        predict_samples = theano.function(
                [],
                outputs=T.neq(self.y_pred, self.y),
                givens={
                        x: train_set_x,
                        y: train_set_y,
                }
        )


        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(inputs=[index], outputs=cost,
                updates=updates,
                givens={
                    x: train_set_x[index * batchSize:(index + 1) * batchSize],
                    y: train_set_y[index * batchSize:(index + 1) * batchSize],
                    lr: lr_shared,
                    m: m_shared})


        # TRAIN THE MODEL
        print '... training'
        print 'self.best_validation_loss:', self.best_validation_loss
        best_iter = 0
        validation_frequency = 1

        start_time = time.clock()

        elapsed_time = 0
        iter = 0

        minibatch_avg_costs = []
        minibatch_index = 0


        #while (elapsed_time < self.trainTime)\
        #    and (minibatch_index<n_train_batches)\
        #    and (not self.done):
        while (minibatch_index<n_train_batches) and (not self.done):
            if (elapsed_time >= self.trainTime):
                break

            train_cost = train_model(minibatch_index)

            # test the trained samples against the target
            # values to measure the training performance
            i = minibatch_index

            '''
            probs = predict_samples(minibatch_index)
            #print 'probs:', probs.shape
            i_batch = data.i_train[ i * batchSize:(i+1)*batchSize ]
            data.p[ i_batch ] = probs
            '''

            '''
            good = np.where( probs == 0)[0]
            bad  = np.where( probs == 1)[0]
            print 'bad:', len(bad)
            print 'good:', len(good)
            #print probs
            '''
            #print '----->traincost:', type(train_cost), train_cost

            minibatch_avg_costs.append(train_cost)

            iter += 1
            #iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0 and valid_samples > 0:

                validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(validation_losses) * 100.0 / valid_samples
                elapsed_time = time.clock() - start_time

                '''
                self.reportTrainingStats(elapsed_time,
                        minibatch_index,
                        this_validation_loss,
                        minibatch_avg_costs[-1].item(0))
                '''
                print this_validation_loss, '/', self.best_validation_loss
                data.add_validation_loss( this_validation_loss )

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    best_iter = iter

                    self.save()
                    print "New best score!"

            # advance to next mini batch
            minibatch_index += 1

            # update elapsed time
            elapsed_time = time.clock() - start_time

        if valid_samples == 0:
            self.save()

        probs = predict_samples()
        data.p[ data.i_train ] = probs

        elapsed_time = time.clock() - start_time
        msg = 'The code an for'
        status = '%f seconds' % (elapsed_time)
        Utility.report_status( msg, status )
        print 'done...'
Beispiel #7
0
    def train(self, offline=False, data=None, mean=None, std=None):
        print 'mlp.train'

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value() * 0.,
                                             broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate * param_update))
                updates.append((param_update, momentum * param_update +
                                (1. - momentum) * T.grad(cost, param)))
            return updates

        patchSize = self.patchSize
        batchSize = self.batchSize
        learning_rate = self.learning_rate
        momentum = self.momentum

        rng = numpy.random.RandomState(1234)

        tx, ty, vx, vy, reset = data.sample()
        train_samples = len(ty)
        val_samples = len(vy)
        train_set_x, train_set_y = shared_dataset((tx, ty), doCastLabels=True)

        if val_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((vx, vy),
                                                      doCastLabels=True)

        if reset:
            self.best_validation_loss = numpy.inf

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_samples / batchSize
        n_valid_batches = val_samples / 1000  #batchSize

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = self.x  #T.matrix('x')   # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        cost = self.cost(y)

        lr = T.scalar('learning_rate')
        m = T.scalar('momentum')

        learning_rate_shared = theano.shared(np.float32(learning_rate))
        momentum_shared = theano.shared(np.float32(momentum))

        print 'training data....'
        print 'n_train_batches:', n_train_batches
        print 'n_valid_batches:', n_valid_batches
        print 'train_samples:', train_samples
        print 'val_samples:', val_samples
        print 'best_validation:', self.best_validation_loss

        if val_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize:(index + 1) * batchSize],
                    y: valid_set_y[index * batchSize:(index + 1) * batchSize]
                })

        predict_samples = theano.function([],
                                          outputs=T.neq(self.y_pred, y),
                                          givens={
                                              x: train_set_x,
                                              y: train_set_y,
                                          })

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batchSize:(index + 1) * batchSize],
                y: train_set_y[index * batchSize:(index + 1) * batchSize],
                lr: learning_rate_shared,
                m: momentum_shared
            })

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        validation_frequency = 1
        start_time = time.clock()

        minibatch_avg_costs = []
        iter = 0
        epoch = 0
        self.best_train_error = np.inf
        last_train_error = numpy.inf
        for minibatch_index in xrange(n_train_batches):
            if self.done:
                break

            train_cost = train_model(minibatch_index)
            minibatch_avg_costs.append(train_cost)

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if n_valid_batches == 0:
                train_error = minibatch_avg_costs[-1].item(0)

                print minibatch_index, '-', train_error
                if train_error < self.best_train_error:
                    self.best_train_error = train_error
                    self.save()

            if n_valid_batches > 0 and (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = np.array(
                    [validate_model(i) for i in xrange(n_valid_batches)])
                #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples
                this_validation_loss = numpy.mean(validation_losses * 100.0)

                elapsed_time = time.clock() - start_time

                data.report_stats(self.id, elapsed_time, minibatch_index,
                                  this_validation_loss,
                                  minibatch_avg_costs[-1].item(0))

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    self.save()
                    print "New best score!"

        #if n_valid_batches == 0:
        #    self.save()

        if not self.offline:
            probs = predict_samples()
            data.p[data.i_train] = probs
            data.save_stats()
Beispiel #8
0
    def train_online(self, data):

        print 'train online...'

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value() * 0.,
                                             broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate * param_update))
                updates.append((param_update, momentum * param_update +
                                (1. - momentum) * T.grad(cost, param)))
            return updates

        # DATA INITIALIZATION
        d = data.sample()
        train_x = d[0]
        train_y = d[1]
        valid_x = d[2]
        valid_y = d[3]
        reset = d[4]

        if reset:
            self.best_validation_loss = numpy.inf

        train_samples = len(train_y)
        valid_samples = len(valid_y)

        print 'valid_samples:', valid_samples
        print 'train_samples:', train_samples

        if self.resample:
            self.lr_shared.set_value(np.float32(self.learning_rate))
            self.m_shared.set_value(np.float32(self.momentum))

        else:
            self.resample = True
            self.y = T.ivector(
                'y')  # the labels are presented as 1D vector of [int] labels
            self.lr = T.scalar('learning_rate')
            self.m = T.scalar('momentum')

            self.lr_shared = theano.shared(np.float32(self.learning_rate))
            self.m_shared = theano.shared(np.float32(self.momentum))

        index = T.lscalar()  # index to a [mini]batch
        x = self.x
        y = self.y
        lr = self.lr
        m = self.m
        lr_shared = self.lr_shared
        m_shared = self.m_shared
        patchSize = self.patchSize
        batchSize = self.batchSize
        train_set_x, train_set_y = shared_dataset((train_x, train_y),
                                                  doCastLabels=True)

        if valid_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y),
                                                      doCastLabels=True)

        # compute number of minibatches for training, validation
        n_train_batches = train_samples / batchSize
        n_valid_batches = valid_samples / batchSize

        #BUILD THE MODEL
        cost = self.cost(y)

        if valid_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize:(index + 1) * batchSize],
                    y: valid_set_y[index * batchSize:(index + 1) * batchSize]
                })
        '''
        predict_samples = theano.function(
                inputs=[index],
                outputs=T.neq(self.y_pred, self.y),
                givens={
                        x: train_set_x[index * batchSize: (index + 1) * batchSize],
                        y: train_set_y[index * batchSize: (index + 1) * batchSize]
                }
        )
        '''
        predict_samples = theano.function([],
                                          outputs=T.neq(self.y_pred, self.y),
                                          givens={
                                              x: train_set_x,
                                              y: train_set_y,
                                          })

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batchSize:(index + 1) * batchSize],
                y: train_set_y[index * batchSize:(index + 1) * batchSize],
                lr: lr_shared,
                m: m_shared
            })

        # TRAIN THE MODEL
        print '... training'
        print 'self.best_validation_loss:', self.best_validation_loss
        best_iter = 0
        validation_frequency = 1

        start_time = time.clock()

        elapsed_time = 0
        iter = 0

        minibatch_avg_costs = []
        minibatch_index = 0

        #while (elapsed_time < self.trainTime)\
        #    and (minibatch_index<n_train_batches)\
        #    and (not self.done):
        while (minibatch_index < n_train_batches) and (not self.done):
            if (elapsed_time >= self.trainTime):
                break

            train_cost = train_model(minibatch_index)

            # test the trained samples against the target
            # values to measure the training performance
            i = minibatch_index
            '''
            probs = predict_samples(minibatch_index)
            #print 'probs:', probs.shape
            i_batch = data.i_train[ i * batchSize:(i+1)*batchSize ]
            data.p[ i_batch ] = probs
            '''
            '''
            good = np.where( probs == 0)[0]
            bad  = np.where( probs == 1)[0]
            print 'bad:', len(bad)
            print 'good:', len(good)
            #print probs
            '''
            #print '----->traincost:', type(train_cost), train_cost

            minibatch_avg_costs.append(train_cost)

            iter += 1
            #iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0 and valid_samples > 0:

                validation_losses = np.array(
                    [validate_model(i) for i in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(
                    validation_losses) * 100.0 / valid_samples
                elapsed_time = time.clock() - start_time
                '''
                self.reportTrainingStats(elapsed_time,
                        minibatch_index,
                        this_validation_loss,
                        minibatch_avg_costs[-1].item(0))
                '''
                print this_validation_loss, '/', self.best_validation_loss
                data.add_validation_loss(this_validation_loss)

                # if we got the best validation score until now
                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    best_iter = iter

                    self.save()
                    print "New best score!"

            # advance to next mini batch
            minibatch_index += 1

            # update elapsed time
            elapsed_time = time.clock() - start_time

        if valid_samples == 0:
            self.save()

        probs = predict_samples()
        data.p[data.i_train] = probs

        elapsed_time = time.clock() - start_time
        msg = 'The code an for'
        status = '%f seconds' % (elapsed_time)
        Utility.report_status(msg, status)
        print 'done...'
Beispiel #9
0
    def train_online(self, data):
        print 'train online...'

        def gradient_updates_momentum(cost, params, learning_rate, momentum):
            updates = []
            for param in params:
                param_update = theano.shared(param.get_value() * 0.,
                                             broadcastable=param.broadcastable)
                updates.append((param, param - learning_rate * param_update))
                updates.append((param_update, momentum * param_update +
                                (1. - momentum) * T.grad(cost, param)))
            return updates

        # DATA INITIALIZATION
        d = data.sample()
        train_x = d[0]
        train_y = d[1]
        valid_x = d[2]
        valid_y = d[3]
        reset = d[4]

        if reset:
            self.best_validation_loss = numpy.inf

        print 'best_validation:', self.best_validation_loss
        train_samples = len(train_y)
        valid_samples = len(valid_y)

        if self.resample:
            self.lr_shared.set_value(np.float32(self.learning_rate))
            self.m_shared.set_value(np.float32(self.momentum))
        else:
            self.resample = True
            self.y = T.ivector(
                'y')  # the labels are presented as 1D vector of [int] labels
            self.lr = T.scalar('learning_rate')
            self.m = T.scalar('momentum')

            self.lr_shared = theano.shared(np.float32(self.learning_rate))
            self.m_shared = theano.shared(np.float32(self.momentum))

        index = T.lscalar()  # index to a [mini]batch
        x = self.x
        y = self.y
        lr = self.lr
        m = self.m
        lr_shared = self.lr_shared
        m_shared = self.m_shared
        patchSize = self.patchSize
        batchSize = self.batchSize
        train_set_x, train_set_y = shared_dataset((train_x, train_y),
                                                  doCastLabels=True)

        if valid_samples > 0:
            valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y),
                                                      doCastLabels=True)

        # compute number of minibatches for training, validation
        n_train_batches = train_samples / batchSize
        n_valid_batches = valid_samples / batchSize

        #BUILD THE MODEL
        cost = self.cost(y)

        if valid_samples > 0:
            validate_model = theano.function(
                [index],
                self.errors(y),
                givens={
                    x: valid_set_x[index * batchSize:(index + 1) * batchSize],
                    y: valid_set_y[index * batchSize:(index + 1) * batchSize]
                })

        predict_samples = theano.function(
            inputs=[index],
            outputs=T.neq(self.mlp.y_pred, self.y),
            givens={
                x: train_set_x[index * batchSize:(index + 1) * batchSize],
                y: train_set_y[index * batchSize:(index + 1) * batchSize]
            })

        gparams = []
        for param in self.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        updates = gradient_updates_momentum(cost, self.params, lr, m)

        train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batchSize:(index + 1) * batchSize],
                y: train_set_y[index * batchSize:(index + 1) * batchSize],
                lr: lr_shared,
                m: m_shared
            })

        # TRAIN THE MODEL
        print '... training'
        best_iter = 0
        validation_frequency = 1

        start_time = time.clock()

        elapsed_time = 0
        iter = 0

        minibatch_avg_costs = []
        minibatch_index = 0

        count1 = 0
        count2 = 0


        while (elapsed_time < self.trainTime)\
            and (minibatch_index<n_train_batches)\
            and (not self.done):

            train_cost = train_model(minibatch_index)
            #print '----->traincost:', type(train_cost), train_cost

            minibatch_avg_costs.append(train_cost)

            #print 'minibatch_index:', minibatch_index, 'n_train_batches:',n_train_batches, self.batchSize,

            probs = predict_samples(minibatch_index)

            indices = data.i_train[minibatch_index *
                                   batchSize:(minibatch_index + 1) * batchSize]
            data.p[indices] = probs
            #print 'probs:', probs

            iter += 1
            if (iter + 1) % validation_frequency == 0 and n_valid_batches > 0:

                validation_losses = np.array(
                    [validate_model(i) for i in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(
                    validation_losses) * 100.0 / valid_samples

                elapsed_time = time.clock() - start_time

                data.report_stats(self.id, elapsed_time, minibatch_index,
                                  this_validation_loss,
                                  minibatch_avg_costs[-1].item(0))

                # if we got the best validation score until now
                count1 += len(np.where(probs == 0)[0])
                count2 += len(np.where(probs == 1)[0])

                data.add_validation_loss(this_validation_loss)

                if this_validation_loss < self.best_validation_loss:
                    self.best_validation_loss = this_validation_loss
                    best_iter = iter

                    print '===>saving....'
                    self.save()
                    print "New best score!"

            # advance to next mini batch
            minibatch_index += 1

            # update elapsed time
            elapsed_time = time.clock() - start_time

        data.save_stats()

        p = data.p[data.i_train]
        n_bad = len(np.where(p == 1)[0])
        error = float(n_bad) / len(p)
        print '----------'
        print 'accuracy:', data.accuracy
        print 'error:', error
        print 'lerror:', self.error
        print 'probi:', np.bincount(np.int64(p))

        if n_valid_batches == 0:
            self.save()

        elapsed_time = time.clock() - start_time
        msg = 'The code ran for'
        status = '%f seconds' % (elapsed_time)
        Utility.report_status(msg, status)
Beispiel #10
0
def evaluate_lenet5(learning_rate=0.0001, n_epochs=20000, nkerns=[48,48,48], kernelSizes=[5,5,5], hiddenSizes=[200], doResample=True, batch_size=1, patchSize=65, train_samples=50000, val_samples=10000, test_samples=1000, validation_frequency = 100, doEmailUpdate=False, momentum=0.98, filename='tmp_cnn.pkl'):

    def gradient_updates_momentum(cost, params, learning_rate, momentum):
        updates = []
        for param in params:
            param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
            updates.append((param, param - learning_rate*param_update))
            updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
        return updates


    rng = numpy.random.RandomState(23455)
    
    data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0)
    train_set_x, train_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    test_set_x, test_set_y = shared_dataset(data, doCastLabels=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_samples / batch_size
    n_valid_batches = val_samples / batch_size
    n_test_batches = test_samples / batch_size

    learning_rate_shared = theano.shared(np.float32(learning_rate))
    momentum_shared = theano.shared(np.float32(momentum))


    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    lr = T.scalar('learning_rate')
    m = T.scalar('momentum')



    if doEmailUpdate:
        gmail_pwd = getpass.getpass()

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    
    classifier = CNN(input=x, batch_size=batch_size, patchSize=patchSize, rng=rng, 
                     nkerns=nkerns, kernelSizes=kernelSizes, hiddenSizes=hiddenSizes, 
                     fileName=filename)

    cost = classifier.cost(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )


    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    #SGD
#    updates = []
#    for param, gparam in zip(classifier.params, gparams):
#        updates.append((param, param - lr * gparam))

    #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001)
    updates = gradient_updates_momentum(cost, classifier.params, lr, m)
    

    train_model = theano.function(inputs=[index], outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size],
                lr: learning_rate_shared,
                m: momentum_shared})


    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    best_validation_loss = numpy.inf
    best_iter = 0
    decrease_epoch = 1
    decrease_patience = 1
    test_score = 0.

    start_time = time.clock()

    epoch = 0
    done_looping = False

    # start pool for data
    print "Starting worker."
    pool = multiprocessing.Pool(processes=1)
    futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])

    while (epoch < n_epochs) and (not done_looping):
        minibatch_avg_costs = []
        epoch = epoch + 1

        if doResample and epoch>1:
            print "Waiting for data."
            data = futureData.get()
            print "GOT NEW DATA"
            train_set_x.set_value(np.float32(data[0]))
            train_set_y.set_value(np.int32(data[1]))
            futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])
#            try:
#                data = futureData.get(timeout=1)
#                print "GOT NEW DATA"
#                train_set_x.set_value(np.float32(data[0]))
#                train_set_y.set_value(np.int32(data[1]))
#                futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]])
#            except multiprocessing.TimeoutError:
#                print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA"
#                pass
#


        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_costs.append(train_model(minibatch_index))
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                classifier.save_CNN('current_cnn.pkl')
                # compute zero-one loss on validation set
                validation_losses = np.array([validate_model(i) for i
                                     in xrange(n_valid_batches)])
                this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples 
                
                msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss)

                print(msg)

                classifier.trainingCost.append(minibatch_avg_costs[-1])
                classifier.validationError.append(this_validation_loss*100)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    classifier.save_CNN('best_cnn_so_far.pkl')
                    print "New best score!"
                    if doEmailUpdate:
                        send_email(gmail_pwd, msg)
                    # test it on the test set
                    #test_losses = [test_model(i) for i
                    #               in xrange(n_test_batches)]
                    #test_score = numpy.mean(test_losses)
                    #
                    #print(('epoch %i, minibatch %i/%i, test error of '
                    #       'best model %f %%') %
                    #      (epoch, minibatch_index + 1, n_train_batches,
                    #       test_score * 100.))

    pool.close()
    pool.join() 
    print "Pool closed."

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    return classifier
Beispiel #11
0
def train_mlp(learning_rate=0.01, n_epochs=10, batch_size=500, n_hidden=[500], patchSize=39, train_samples=10000, val_samples=10000, test_samples=10000, doResample=False, validation_frequency = 50, dropout_rate=0.0, activation=rectified_linear, doEmailUpdate=False, momentum=0.9):

    def adadelta_updates(parameters,gradients,rho,eps):
        # create variables to store intermediate updates
        gradients_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
        deltas_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
        # calculates the new "average" delta for the next iteration
        gradients_sq_new = [ rho*g_sq + (1-rho)*(g**2) for g_sq,g in zip(gradients_sq,gradients) ]

        # calculates the step in direction. The square root is an approximation to getting the RMS for the average value
        deltas = [ (T.sqrt(d_sq+eps)/T.sqrt(g_sq+eps))*grad for d_sq,g_sq,grad in zip(deltas_sq,gradients_sq_new,gradients) ]

        # calculates the new "average" deltas for the next step.
        deltas_sq_new = [ rho*d_sq + (1-rho)*(d**2) for d_sq,d in zip(deltas_sq,deltas) ]

        # Prepare it as a list f
        gradient_sq_updates = zip(gradients_sq,gradients_sq_new)
        deltas_sq_updates = zip(deltas_sq,deltas_sq_new)
        parameters_updates = [ (p,p - d) for p,d in zip(parameters,deltas) ]
        return gradient_sq_updates + deltas_sq_updates + parameters_updates

    def gradient_updates_momentum(cost, params, learning_rate, momentum):
        updates = []
        for param in params:
            param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
            updates.append((param, param - learning_rate*param_update))
            updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
        return updates

    if doEmailUpdate:
        gmail_pwd = getpass.getpass()

    rng = numpy.random.RandomState(1234)

    data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0)
    train_set_x, train_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True)

    data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0]
    test_set_x, test_set_y = shared_dataset(data, doCastLabels=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_samples / batch_size
    n_valid_batches = val_samples / 1000
    n_test_batches = test_samples / 1000

    learning_rate_shared = theano.shared(np.float32(learning_rate))
    momentum_shared = theano.shared(np.float32(momentum))

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    lr = T.scalar('learning_rate')
    m = T.scalar('momentum')

    # construct the MLP class
    classifier = MLP_dropout(rng=rng, input=x, n_in=patchSize**2,
                             n_hidden=n_hidden, n_out=2, dropout_rate=dropout_rate, activation=activation)


    cost = classifier.dropout_negative_log_likelihood(y)

    test_model = theano.function(inputs=[index],
                                 outputs=classifier.errors(y),
                                 givens={
                                     x: test_set_x[index * batch_size:(index + 1) * batch_size],
                                     y: test_set_y[index * batch_size:(index + 1) * batch_size]})
    validate_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(y),
                                     givens={
                                         x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    #SGD
    updates = []
    for param, gparam in zip(classifier.params, gparams):
        updates.append((param, param - lr * gparam))

    #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001)
#    updates = gradient_updates_momentum(cost, classifier.params, lr, m)


    train_model = theano.function(inputs=[index], outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size],
                lr: learning_rate_shared})#,
                #m: momentum_shared})

    print '... training'

    best_validation_loss = numpy.inf
    best_iter = 0
    decrease_epoch = 1
    decrease_patience = 1
    test_score = 0.

    start_time = time.clock()

    epoch = 0
    done_looping = False

    # start pool for data
    print "Starting worker."
    pool = multiprocessing.Pool(processes=1)
    futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])

    while (epoch < n_epochs) and (not done_looping):
        minibatch_avg_costs = []
        epoch = epoch + 1

        if epoch % 10 == 0:
            classifier.save_MLP('train_progress.pkl')

        if doResample and epoch>1:
            print "Waiting for data."
            data = futureData.get()
            print "GOT NEW DATA"
            train_set_x.set_value(np.float32(data[0]))
            train_set_y.set_value(np.int32(data[1]))
            futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]])
#            try:
#                data = futureData.get(timeout=1)
#                print "GOT NEW DATA"
#                train_set_x.set_value(np.float32(data[0]))
#                train_set_y.set_value(np.int32(data[1]))
#                futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]])
#            except multiprocessing.TimeoutError:
#                print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA"
#                pass
#


        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_costs.append(train_model(minibatch_index))
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                msg = 'epoch %i, minibatch %i/%i, training error %f, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss * 100.)

                print(msg)

                classifier.trainingCost.append(minibatch_avg_costs[-1])
                classifier.validationError.append(this_validation_loss*100)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    classifier.save_MLP('best_so_far.pkl')
                    print "New best score!"
                    if doEmailUpdate:
                        send_email(gmail_pwd, msg)
                    # test it on the test set
                    #test_losses = [test_model(i) for i
                    #               in xrange(n_test_batches)]
                    #test_score = numpy.mean(test_losses)
                    #
                    #print(('epoch %i, minibatch %i/%i, test error of '
                    #       'best model %f %%') %
                    #      (epoch, minibatch_index + 1, n_train_batches,
                    #       test_score * 100.))

    pool.close()
    pool.join()
    print "Pool closed."

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    return classifier