Ejemplo n.º 1
0
def crossvalidation(ds,
                    trainer,
                    model,
                    nfolds=10,
                    verbose=1,
                    shuffle=False,
                    error=eval.errorrate,
                    seed=None):
    n = ds.n
    ds.shuffle(seed=seed)
    folds = ds.split(nfolds)
    err = []
    for foldidx in range(nfolds):
        if verbose > 1:
            print("--------------------")
            print("Fold-%d" % (foldidx + 1))
            print("--------------------")
        lm = copy.deepcopy(model)
        t1 = time()
        dtest = folds[foldidx]
        trainidxs = range(nfolds)
        del trainidxs[foldidx]
        dtrain = MemoryDataset.merge(folds[trainidxs])
        trainer.train(lm, dtrain, verbose=(verbose - 1), shuffle=shuffle)
        e = error(lm, dtest)
        if verbose > 0:
            fid = ("%d" % (foldidx + 1)).ljust(5)
            print("%s %s" % (fid, ("%.2f" % e).rjust(5)))
        err.append(e)
        if verbose > 1:
            print "Total time for fold-%d: %f" % (foldidx + 1, time() - t1)
    return np.array(err)
Ejemplo n.º 2
0
def crossvalidation(ds, trainer, model, nfolds=10, verbose=1, shuffle=False,
                    error=eval.errorrate, seed=None):
    n = ds.n
    ds.shuffle(seed = seed)
    folds = ds.split(nfolds)
    err = []
    for foldidx in range(nfolds):
        if verbose > 1:
            print("--------------------")
            print("Fold-%d" % (foldidx+1))
            print("--------------------")
        lm = copy.deepcopy(model)
        t1 = time()
        dtest = folds[foldidx]
        trainidxs = range(nfolds)
        del trainidxs[foldidx]
        dtrain = MemoryDataset.merge(folds[trainidxs])
        trainer.train(lm, dtrain,
                      verbose = (verbose-1),
                      shuffle = shuffle)
        e = error(lm,dtest)
        if verbose > 0:
            fid = ("%d" % (foldidx+1)).ljust(5)
            print("%s %s" % (fid , ("%.2f"%e).rjust(5)))
        err.append(e)
        if verbose > 1:
            print "Total time for fold-%d: %f" % (foldidx+1, time()-t1)
    return np.array(err)
Ejemplo n.º 3
0
def main():
    try:
        parser = parse.parseCV(cli.__version__)
        options, args = parser.parse_args()
        if len(args) < 1 or len(args) > 1:
            parser.error("Incorrect number of arguments. ")

        verbose = options.verbose
        fname = args[0]
        ds = MemoryDataset.load(fname, verbose=verbose)

        if len(ds.classes) > 2:
            model = GeneralizedLinearModel(ds.dim,
                                           len(ds.classes),
                                           biasterm=options.biasterm)
        else:
            model = LinearModel(ds.dim, biasterm=options.biasterm)
        if options.epochs == -1:
            options.epochs = math.ceil(10**6 / ((options.nfolds - 1) *
                                                (ds.n / options.nfolds)))
            print "epochs: ", options.epochs

        trainer = cli.create_trainer(options)
        print("%s %s" % ("Fold".ljust(5), "Error"))
        err = crossvalidation(ds,
                              trainer,
                              model,
                              nfolds=options.nfolds,
                              shuffle=options.shuffle,
                              error=eval.errorrate,
                              verbose=options.verbose,
                              seed=options.seed)
        print("%s %s (%.2f)" % ("avg".ljust(5),
                                ("%.2f" % np.mean(err)).rjust(5), np.std(err)))

    except Exception, exc:
        print "[ERROR] ", exc
Ejemplo n.º 4
0
def main():
    try:
        parser  = parse.parseCV(cli.__version__)
        options, args = parser.parse_args()
        if len(args) < 1 or len(args) > 1:
            parser.error("Incorrect number of arguments. ")
        
        verbose = options.verbose
        fname = args[0]
        ds = MemoryDataset.load(fname,verbose = verbose)

        if len(ds.classes) > 2:
            model = GeneralizedLinearModel(ds.dim,len(ds.classes), 
                                           biasterm = options.biasterm)
        else:
            model = LinearModel(ds.dim,
                                biasterm = options.biasterm)
        if options.epochs == -1:
            options.epochs = math.ceil(10**6 / (
                (options.nfolds - 1) * (ds.n / options.nfolds)))
            print "epochs: ", options.epochs
            
        trainer = cli.create_trainer(options)
        print("%s %s" % ("Fold".ljust(5), "Error"))
        err = crossvalidation(ds, trainer, model,
                              nfolds = options.nfolds,
                              shuffle = options.shuffle,
                              error = eval.errorrate,
                              verbose = options.verbose,
                              seed = options.seed)
        print("%s %s (%.2f)" % ("avg".ljust(5),
                                ("%.2f"%np.mean(err)).rjust(5),
                                np.std(err)))

    except Exception, exc:
        print "[ERROR] ", exc
Ejemplo n.º 5
0
def main():
    try: 
        parser  = parse.parseSB(__version__)
        options, args = parser.parse_args()
        if len(args) < 1 or len(args) > 1:
            parser.error("incorrect number of arguments (`--help` for help).")

        if options.test_only and not options.model_file:
            parser.error("option -m is required for --test-only.")

        if options.test_only and options.test_file:
            parser.error("options --test-only and -t are mutually exclusive.")

        verbose = options.verbose
        data_file = args[0]
        dtrain = MemoryDataset.load(data_file, verbose = verbose)
        
        if not options.test_only:
            if verbose > 0:
                print("---------")
                print("Training:")
                print("---------")

            if len(dtrain.classes) > 2:
                model = GeneralizedLinearModel(dtrain.dim,len(dtrain.classes), 
                                               biasterm = options.biasterm)
            else:
                model = LinearModel(dtrain.dim,
                                    biasterm = options.biasterm)

            trainer = create_trainer(options)
            
            if isinstance(trainer, (OVA,MaxentSGD,AveragedPerceptron)):
                if not isinstance(model, GeneralizedLinearModel):
                    raise ValueError("Multi-class classifiers "\
                                     "require > 2 classes. ")
            else:
                if isinstance(model, GeneralizedLinearModel):
                    raise ValueError("%s cannot be used "\
                                     "for multi-class problems." % str(trainer))
            trainer.train(model,dtrain,verbose = verbose,
                      shuffle = options.shuffle)

            if options.computetrainerror:
                test_model(model, dtrain, text="Training error")
            if options.model_file:
                f = open(options.model_file, 'w+')
                try:
                    pickle.dump(model,f)
                finally:
                    f.close()
                
            if options.test_file:
                dtest = MemoryDataset.load(options.test_file,
                                           verbose = verbose)
                if options.prediction_file:
                    write_predictions(model, dtest, options.prediction_file)
                else:
                    test_model(model, dtest)
        else:
            model = None
            f = open(options.model_file, 'r')
            try:
                model = pickle.load(f)
            finally:
                f.close()
            if not model:
                raise Exception("cannot deserialize "\
                                "model in '%s'. " % options.model_file)
            if options.prediction_file:
                write_predictions(model, dtrain, options.prediction_file)
            else:
                test_model(model, dtrain)

    except Exception, exc:
        print "[ERROR] ", exc