def load_data(dtype=np.float32, order='F'): """Load the data, then cache and memmap the train/test split""" ###################################################################### # Load dataset safe_print("Loading dataset...") data = fetch_mldata('MNIST original') X = check_array(data['data'], dtype=dtype, order=order) y = data["target"] # Normalize features X = X / 255 # Create train-test split (as [Joachims, 2006]) safe_print("Creating train-test split...") n_train = 60000 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] return X_train, X_test, y_train, y_test
'No Preprocessing': [ RandomForestRegressor(random_state=SEED), GradientBoostingRegressor() ] } ens.add(est, prep) ens.add(GradientBoostingRegressor(), meta=True) return ens if __name__ == '__main__': safe_print("\nML-ENSEMBLE\n") safe_print("Benchmark of ML-ENSEMBLE against Scikit-learn estimators " "on the friedman1 dataset.\n") safe_print("Scoring metric: Root Mean Squared Error.\n") safe_print("Available CPUs: %i\n" % os.cpu_count()) SEED = 2017 np.random.seed(SEED) step = 4000 mi = step mx = 40000 + step ens_multi = build_ensemble(folds=2, shuffle=False, n_jobs=-1)
parser.add_argument('--classifiers', nargs="+", choices=ESTIMATORS, type=str, default=['Subsemble', 'BlendEnsemble'], help="list of classifiers to benchmark.") parser.add_argument('--order', nargs="?", default="C", type=str, choices=["F", "C"], help="Allow to choose between fortran and C ordered " "data") args = vars(parser.parse_args()) safe_print(__doc__) X_train, X_test, y_train, y_test = load_data(order=args["order"]) safe_print("") safe_print("Dataset statistics:") safe_print("===================") safe_print("%s %d" % ("number of features:".ljust(25), X_train.shape[1])) safe_print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size)) safe_print("%s %s" % ("data type:".ljust(25), X_train.dtype)) safe_print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25), X_train.shape[0], int(X_train.nbytes / 1e6))) safe_print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25), X_test.shape[0],