Example #1
0
def load_data(dtype=np.float32, order='F'):
    """Load the data, then cache and memmap the train/test split"""
    ######################################################################
    # Load dataset
    safe_print("Loading dataset...")
    data = fetch_mldata('MNIST original')
    X = check_array(data['data'], dtype=dtype, order=order)
    y = data["target"]

    # Normalize features
    X = X / 255

    # Create train-test split (as [Joachims, 2006])
    safe_print("Creating train-test split...")
    n_train = 60000
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    return X_train, X_test, y_train, y_test
Example #2
0
        'No Preprocessing': [
            RandomForestRegressor(random_state=SEED),
            GradientBoostingRegressor()
        ]
    }

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens


if __name__ == '__main__':

    safe_print("\nML-ENSEMBLE\n")
    safe_print("Benchmark of ML-ENSEMBLE against Scikit-learn estimators "
               "on the friedman1 dataset.\n")
    safe_print("Scoring metric: Root Mean Squared Error.\n")

    safe_print("Available CPUs: %i\n" % os.cpu_count())

    SEED = 2017
    np.random.seed(SEED)

    step = 4000
    mi = step
    mx = 40000 + step

    ens_multi = build_ensemble(folds=2, shuffle=False, n_jobs=-1)
Example #3
0
    parser.add_argument('--classifiers',
                        nargs="+",
                        choices=ESTIMATORS,
                        type=str,
                        default=['Subsemble', 'BlendEnsemble'],
                        help="list of classifiers to benchmark.")
    parser.add_argument('--order',
                        nargs="?",
                        default="C",
                        type=str,
                        choices=["F", "C"],
                        help="Allow to choose between fortran and C ordered "
                        "data")
    args = vars(parser.parse_args())

    safe_print(__doc__)

    X_train, X_test, y_train, y_test = load_data(order=args["order"])

    safe_print("")
    safe_print("Dataset statistics:")
    safe_print("===================")
    safe_print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
    safe_print("%s %d" %
               ("number of classes:".ljust(25), np.unique(y_train).size))
    safe_print("%s %s" % ("data type:".ljust(25), X_train.dtype))
    safe_print("%s %d (size=%dMB)" %
               ("number of train samples:".ljust(25), X_train.shape[0],
                int(X_train.nbytes / 1e6)))
    safe_print("%s %d (size=%dMB)" %
               ("number of test samples:".ljust(25), X_test.shape[0],