Example #1
0
def evaluate_model(sda, testdata, options):

    train_set_x, train_set_y = testdata

    if options['retrain'] == 0:
        test_model = sda.build_test_function(
            dataset=testdata,
            batch_size=1,  # options['batchsize']
        )
    else:
        test_model = sda.build_test_function_reuse(
            dataset=testdata,
            batch_size=1,  #options['batchsize'],
        )

    # print sda.params[1].get_value()[-1]
    # print sda.params_b[1].get_value()[-1]
    # kkk

    (ytest, ypred, ypred_prob) = test_model()

    print >> sys.stderr, "Test GT Differences: "
    print >> sys.stderr, sum(ytest != train_set_y.eval())

    if options['oneclass'] == True:
        options['nclasses'] = 2

    # ypred      = numpy.array( ypred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
    if options['threshold'] != None:
        ypred = numpy.array(ypred_prob[:, 0] < options['threshold'],
                            dtype=numpy.uint8)
    else:
        ypred = numpy.argmax(ypred_prob, axis=1)

    test_score = evaluate_error(ytest, ypred, options)

    cm = confusion_matrix(ytest, ypred, options['nclasses'])

    print >> sys.stderr, "Test CM"
    print >> sys.stderr, cm

    if options['verbose'] > 0:
        print >> sys.stderr, (('     test error of best model %03f %%') %
                              (test_score * 100.))

    return (test_score, ytest, ypred)
Example #2
0
File: main.py Project: rjgsousa/TEM
def evaluate_model(sda,testdata,options):

    train_set_x, train_set_y = testdata
    
    if options['retrain'] == 0:    
        test_model = sda.build_test_function(
            dataset       = testdata,
            batch_size    = 1, # options['batchsize']
            )
    else:
        test_model = sda.build_test_function_reuse(
            dataset       = testdata,
            batch_size    = 1, #options['batchsize'],
        )

    # print sda.params[1].get_value()[-1]
    # print sda.params_b[1].get_value()[-1]
    # kkk

    (ytest,ypred,ypred_prob) = test_model()

    print >> sys.stderr, "Test GT Differences: "
    print >> sys.stderr, sum(ytest != train_set_y.eval())

    if options['oneclass'] == True:
        options['nclasses'] = 2
    
    # ypred      = numpy.array( ypred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
    if options['threshold'] != None:
        ypred = numpy.array( ypred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
    else:
        ypred = numpy.argmax( ypred_prob, axis = 1 )

    
    test_score = evaluate_error( ytest, ypred, options )

    cm = confusion_matrix( ytest, ypred, options['nclasses'] )

    print >> sys.stderr, "Test CM"
    print >> sys.stderr, cm
    
    if options['verbose'] > 0:
        print >> sys.stderr, (('     test error of best model %03f %%') %
                              (test_score * 100.))

    return (test_score, ytest, ypred)
Example #3
0
def pretrain_finetune_model(sda, pretraining_fns, train_set, test_set,
                            options):
    train_set_x, train_set_y = train_set

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= options['batchsize']

    if options['retrain'] == 0:

        bestmodelsda = copy.copy(sda)
        # -----------------------------------------------
        # PRETRAINING
        # -----------------------------------------------
        if options['verbose'] > 5:
            print >> sys.stderr, ('... pre-training the model')
        start_time = time.clock()
        ## Pre-train layer-wise
        corruption_levels = options['corruptlevels']
        for i in xrange(sda.n_layers):
            # go through pretraining epochs
            for epoch in xrange(options['pretraining_epochs']):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](
                        index=batch_index,
                        corruption=corruption_levels[i],
                        lr=options['pretrain_lr']))

                if epoch % 100 == 0 and options['verbose'] > 5:
                    print >> sys.stderr, (
                        'Pre-training layer %02i, epoch %04d, cost ' %
                        (i, epoch)),
                    print >> sys.stderr, (numpy.mean(c))
        end_time = time.clock()
        if options['savetimes']:
            filename = '{0:s}/times_pr_{1:03d}_{2:03d}.pkl.gz'.format(
                options['outputfolderres'], options['nrun'],
                string.atoi(options['resolution']))
            save_gzdata(filename, end_time - start_time)

        if options['verbose'] > 4:
            print >> sys.stderr, ('The pretraining code for file ' +
                                  os.path.split(__file__)[1] +
                                  ' ran for %.2fm' %
                                  ((end_time - start_time) / 60.))

        # get the training, validation and testing function for the model
        #dataset = [folds[0][0], folds[1][0], folds[2]]
        dataset = [train_set, test_set]

        if options['verbose'] > 5:
            print >> sys.stderr, ('... getting the finetuning functions')
        train_fn, validate_model = sda.build_finetune_functions(
            datasets=dataset,
            batch_size=options['batchsize'],
            learning_rate=options['finetune_lr'])

    else:
        dataset = [train_set, test_set]

        train_fn, validate_model = sda.build_finetune_functions_reuse(
            datasets=dataset,
            batch_size=options['batchsize'],
            learning_rate=options['finetune_lr'],
            update_layerwise=options['retrain_ft_layers'])

    # ------------------------------------------------------------------------------------------------

    # -----------------------------------------------
    # FINETUNE
    # -----------------------------------------------
    if options['verbose'] > 5:
        print >> sys.stderr, ('... finetunning the model')
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  #2. # wait this much longer when a new best is found
    improvement_threshold = 0.995  # 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.

    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < options['training_epochs']) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                #this_validation_loss    = numpy.mean( validate_model() )
                (y_valid, y_pred, y_pred_prob) = validate_model()

                # pos = numpy.random.randint(len(y_pred),size=(100,))
                # print options
                # print pos
                # print y_pred_prob[pos,:].T
                # raw_input()
                # alll

                # we are going to control the predictions according to their prob
                if options['threshold'] != None:
                    y_pred = numpy.array(
                        y_pred_prob[:, 0] < options['threshold'],
                        dtype=numpy.uint8)
                else:
                    y_pred = numpy.argmax(y_pred_prob, axis=1)

                this_validation_loss = evaluate_error(y_valid, y_pred, options)

                # if epoch % 10 == 0:
                #     cm = confusion_matrix(y_valid, y_pred, options['nclasses'])
                #     print >> sys.stderr, cm, this_validation_loss

                # print >> sys.stderr, this_validation_loss
                # this_validation_loss = numpy.mean(validation_losses)
                if epoch % 30 == 0 and options['verbose'] > 5:
                    # print >> sys.stderr, y_valid
                    # print >> sys.stderr, y_pred_prob
                    # print >> sys.stderr, y_pred
                    # print >> sys.stderr, y_valid.shape
                    # print >> sys.stderr, y_pred.shape
                    # print >> sys.stderr, y_pred_prob[1:10,:], y_pred[1:10], y_valid[1:10]
                    # print >> sys.stderr, test_set[1].eval()

                    print >> sys.stderr, (
                        'epoch %04i, minibatch %04i/%04i, validation error %03f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    bestmodelsda = copy.copy(sda)

                    # % ------------------------------------------------------------
                    if options['oneclass'] == True:
                        options['nclasses'] = 2

                    #print >> sys.stderr, sda.params[-2].get_value().T, sda.params[-1].get_value()
                    pos = numpy.random.randint(len(y_pred), size=(10, ))
                    # print options
                    # print pos
                    # print y_pred_prob.shape
                    #print >> sys.stderr, options['threshold']
                    # print >> sys.stderr, numpy.array( y_pred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
                    #print >> sys.stderr, y_pred_prob[pos,:].T
                    #print >> sys.stderr, y_pred[pos]
                    cm = confusion_matrix(y_valid, y_pred, options['nclasses'])
                    #print >> sys.stderr, ("Fine tune...epoch %04i" %  epoch)
                    #print >> sys.stderr, this_validation_loss
                    #print >> sys.stderr, cm
                    # options['nclasses'] = 1
                    # % ------------------------------------------------------------

                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    if patience <= iter:
                        done_looping = True
                        break

    end_time = time.clock()

    if options['savetimes']:
        filename = '{0:s}/times_fn_{1:03d}_{2:03d}.pkl.gz'.format(
            options['outputfolderres'], options['nrun'],
            string.atoi(options['resolution']))
        save_gzdata(filename, end_time - start_time)

    print >> sys.stderr, ("Stopped at epoch %04i" % epoch)
    return (best_validation_loss, bestmodelsda)
Example #4
0
File: main.py Project: rjgsousa/TEM
def pretrain_finetune_model(sda,pretraining_fns,train_set,test_set,options):
    train_set_x, train_set_y = train_set

    n_train_batches  = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= options['batchsize']

    if options['retrain'] == 0:    
    
        bestmodelsda = copy.copy( sda )
        # -----------------------------------------------
        # PRETRAINING
        # -----------------------------------------------  
        if options['verbose'] > 5:
            print >> sys.stderr, ('... pre-training the model')
        start_time = time.clock()
        ## Pre-train layer-wise
        corruption_levels = options['corruptlevels']
        for i in xrange(sda.n_layers):
            # go through pretraining epochs
            for epoch in xrange(options['pretraining_epochs']):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                corruption=corruption_levels[i],
                                                lr=options['pretrain_lr']))

                if epoch % 100 == 0 and options['verbose'] > 5:
                    print >> sys.stderr, ('Pre-training layer %02i, epoch %04d, cost ' % (i, epoch)),
                    print >> sys.stderr, (numpy.mean(c))
        end_time = time.clock()
        if options['savetimes']:
            filename = '{0:s}/times_pr_{1:03d}_{2:03d}.pkl.gz'.format(options['outputfolderres'],options['nrun'],string.atoi(options['resolution']))
            save_gzdata(filename, end_time - start_time)
        
        if options['verbose'] > 4:
            print  >> sys.stderr, ('The pretraining code for file ' +
                                   os.path.split(__file__)[1] +
                                   ' ran for %.2fm' % ((end_time - start_time) / 60.))

        # get the training, validation and testing function for the model
        #dataset = [folds[0][0], folds[1][0], folds[2]]
        dataset = [train_set, test_set]
    
        if options['verbose'] > 5:
            print >> sys.stderr,('... getting the finetuning functions')
        train_fn, validate_model = sda.build_finetune_functions(
            datasets=dataset,
            batch_size=options['batchsize'],
            learning_rate=options['finetune_lr']
        )

    else:
        dataset = [train_set, test_set]

        train_fn, validate_model = sda.build_finetune_functions_reuse(
            datasets=dataset, batch_size=options['batchsize'],
            learning_rate=options['finetune_lr'], update_layerwise=options['retrain_ft_layers'])
        
    # ------------------------------------------------------------------------------------------------
        
    # -----------------------------------------------
    # FINETUNE
    # -----------------------------------------------  
    if options['verbose'] > 5:
        print >> sys.stderr, ('... finetunning the model')
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2. #2. # wait this much longer when a new best is found
    improvement_threshold = 0.995 # 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.

    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < options['training_epochs']) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                #this_validation_loss    = numpy.mean( validate_model() )
                (y_valid, y_pred, y_pred_prob)  = validate_model()

                # pos = numpy.random.randint(len(y_pred),size=(100,))
                # print options
                # print pos
                # print y_pred_prob[pos,:].T
                # raw_input()
                # alll

                # we are going to control the predictions according to their prob
                if options['threshold'] != None:
                    y_pred = numpy.array( y_pred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
                else:
                    y_pred = numpy.argmax( y_pred_prob, axis = 1 )

                this_validation_loss = evaluate_error( y_valid, y_pred, options )

                # if epoch % 10 == 0:
                #     cm = confusion_matrix(y_valid, y_pred, options['nclasses'])
                #     print >> sys.stderr, cm, this_validation_loss
                    
                # print >> sys.stderr, this_validation_loss
                # this_validation_loss = numpy.mean(validation_losses)
                if  epoch % 30 == 0 and options['verbose'] > 5:
                    # print >> sys.stderr, y_valid
                    # print >> sys.stderr, y_pred_prob
                    # print >> sys.stderr, y_pred
                    # print >> sys.stderr, y_valid.shape
                    # print >> sys.stderr, y_pred.shape
                    # print >> sys.stderr, y_pred_prob[1:10,:], y_pred[1:10], y_valid[1:10]
                    # print >> sys.stderr, test_set[1].eval()
                    
                    print >> sys.stderr,('epoch %04i, minibatch %04i/%04i, validation error %03f %%' %
                                         (epoch, minibatch_index + 1, n_train_batches,
                                          this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    bestmodelsda = copy.copy(sda)

                    # % ------------------------------------------------------------
                    if options['oneclass'] == True:
                        options['nclasses'] = 2

                    #print >> sys.stderr, sda.params[-2].get_value().T, sda.params[-1].get_value()
                    pos = numpy.random.randint(len(y_pred),size=(10,))
                    # print options
                    # print pos
                    # print y_pred_prob.shape
                    #print >> sys.stderr, options['threshold']
                    # print >> sys.stderr, numpy.array( y_pred_prob[:,0] < options['threshold'], dtype=numpy.uint8)
                    #print >> sys.stderr, y_pred_prob[pos,:].T
                    #print >> sys.stderr, y_pred[pos]
                    cm = confusion_matrix(y_valid, y_pred, options['nclasses'])
                    #print >> sys.stderr, ("Fine tune...epoch %04i" %  epoch)
                    #print >> sys.stderr, this_validation_loss
                    #print >> sys.stderr, cm
                    # options['nclasses'] = 1
                    # % ------------------------------------------------------------

                    
                    # improve patience if loss improvement is good enough
                    if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    if patience <= iter:
                        done_looping = True
                        break

    end_time = time.clock()

    if options['savetimes']:
        filename = '{0:s}/times_fn_{1:03d}_{2:03d}.pkl.gz'.format(options['outputfolderres'],options['nrun'],string.atoi(options['resolution']))
        save_gzdata(filename, end_time - start_time)

    print >> sys.stderr, ("Stopped at epoch %04i" % epoch )
    return (best_validation_loss,bestmodelsda)