def crossvalidation(ds, trainer, model, nfolds=10, verbose=1, shuffle=False, error=eval.errorrate, seed=None): n = ds.n ds.shuffle(seed=seed) folds = ds.split(nfolds) err = [] for foldidx in range(nfolds): if verbose > 1: print("--------------------") print("Fold-%d" % (foldidx + 1)) print("--------------------") lm = copy.deepcopy(model) t1 = time() dtest = folds[foldidx] trainidxs = range(nfolds) del trainidxs[foldidx] dtrain = MemoryDataset.merge(folds[trainidxs]) trainer.train(lm, dtrain, verbose=(verbose - 1), shuffle=shuffle) e = error(lm, dtest) if verbose > 0: fid = ("%d" % (foldidx + 1)).ljust(5) print("%s %s" % (fid, ("%.2f" % e).rjust(5))) err.append(e) if verbose > 1: print "Total time for fold-%d: %f" % (foldidx + 1, time() - t1) return np.array(err)
def crossvalidation(ds, trainer, model, nfolds=10, verbose=1, shuffle=False, error=eval.errorrate, seed=None): n = ds.n ds.shuffle(seed = seed) folds = ds.split(nfolds) err = [] for foldidx in range(nfolds): if verbose > 1: print("--------------------") print("Fold-%d" % (foldidx+1)) print("--------------------") lm = copy.deepcopy(model) t1 = time() dtest = folds[foldidx] trainidxs = range(nfolds) del trainidxs[foldidx] dtrain = MemoryDataset.merge(folds[trainidxs]) trainer.train(lm, dtrain, verbose = (verbose-1), shuffle = shuffle) e = error(lm,dtest) if verbose > 0: fid = ("%d" % (foldidx+1)).ljust(5) print("%s %s" % (fid , ("%.2f"%e).rjust(5))) err.append(e) if verbose > 1: print "Total time for fold-%d: %f" % (foldidx+1, time()-t1) return np.array(err)
def main(): try: parser = parse.parseCV(cli.__version__) options, args = parser.parse_args() if len(args) < 1 or len(args) > 1: parser.error("Incorrect number of arguments. ") verbose = options.verbose fname = args[0] ds = MemoryDataset.load(fname, verbose=verbose) if len(ds.classes) > 2: model = GeneralizedLinearModel(ds.dim, len(ds.classes), biasterm=options.biasterm) else: model = LinearModel(ds.dim, biasterm=options.biasterm) if options.epochs == -1: options.epochs = math.ceil(10**6 / ((options.nfolds - 1) * (ds.n / options.nfolds))) print "epochs: ", options.epochs trainer = cli.create_trainer(options) print("%s %s" % ("Fold".ljust(5), "Error")) err = crossvalidation(ds, trainer, model, nfolds=options.nfolds, shuffle=options.shuffle, error=eval.errorrate, verbose=options.verbose, seed=options.seed) print("%s %s (%.2f)" % ("avg".ljust(5), ("%.2f" % np.mean(err)).rjust(5), np.std(err))) except Exception, exc: print "[ERROR] ", exc
def main(): try: parser = parse.parseCV(cli.__version__) options, args = parser.parse_args() if len(args) < 1 or len(args) > 1: parser.error("Incorrect number of arguments. ") verbose = options.verbose fname = args[0] ds = MemoryDataset.load(fname,verbose = verbose) if len(ds.classes) > 2: model = GeneralizedLinearModel(ds.dim,len(ds.classes), biasterm = options.biasterm) else: model = LinearModel(ds.dim, biasterm = options.biasterm) if options.epochs == -1: options.epochs = math.ceil(10**6 / ( (options.nfolds - 1) * (ds.n / options.nfolds))) print "epochs: ", options.epochs trainer = cli.create_trainer(options) print("%s %s" % ("Fold".ljust(5), "Error")) err = crossvalidation(ds, trainer, model, nfolds = options.nfolds, shuffle = options.shuffle, error = eval.errorrate, verbose = options.verbose, seed = options.seed) print("%s %s (%.2f)" % ("avg".ljust(5), ("%.2f"%np.mean(err)).rjust(5), np.std(err))) except Exception, exc: print "[ERROR] ", exc
def main(): try: parser = parse.parseSB(__version__) options, args = parser.parse_args() if len(args) < 1 or len(args) > 1: parser.error("incorrect number of arguments (`--help` for help).") if options.test_only and not options.model_file: parser.error("option -m is required for --test-only.") if options.test_only and options.test_file: parser.error("options --test-only and -t are mutually exclusive.") verbose = options.verbose data_file = args[0] dtrain = MemoryDataset.load(data_file, verbose = verbose) if not options.test_only: if verbose > 0: print("---------") print("Training:") print("---------") if len(dtrain.classes) > 2: model = GeneralizedLinearModel(dtrain.dim,len(dtrain.classes), biasterm = options.biasterm) else: model = LinearModel(dtrain.dim, biasterm = options.biasterm) trainer = create_trainer(options) if isinstance(trainer, (OVA,MaxentSGD,AveragedPerceptron)): if not isinstance(model, GeneralizedLinearModel): raise ValueError("Multi-class classifiers "\ "require > 2 classes. ") else: if isinstance(model, GeneralizedLinearModel): raise ValueError("%s cannot be used "\ "for multi-class problems." % str(trainer)) trainer.train(model,dtrain,verbose = verbose, shuffle = options.shuffle) if options.computetrainerror: test_model(model, dtrain, text="Training error") if options.model_file: f = open(options.model_file, 'w+') try: pickle.dump(model,f) finally: f.close() if options.test_file: dtest = MemoryDataset.load(options.test_file, verbose = verbose) if options.prediction_file: write_predictions(model, dtest, options.prediction_file) else: test_model(model, dtest) else: model = None f = open(options.model_file, 'r') try: model = pickle.load(f) finally: f.close() if not model: raise Exception("cannot deserialize "\ "model in '%s'. " % options.model_file) if options.prediction_file: write_predictions(model, dtrain, options.prediction_file) else: test_model(model, dtrain) except Exception, exc: print "[ERROR] ", exc