コード例 #1
0
def experiment_with_movie_lens(init_kwargs,
                               run_kwargs,
                               row_count=None,
                               with_dask=False):
    hyper_model = create_plain_model(reward_metric='f1',
                                     with_encoder=True,
                                     with_dask=with_dask)

    X = dsutils.load_movielens()
    # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' '))
    X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp)
    if row_count is not None:
        X = X.head(row_count)

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('rating')

    tb = get_tool_box(X, y)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'],
        task=experiment.task)
    print('evaluate score:', score)
    assert score
コード例 #2
0
def experiment_with_bank_data(init_kwargs,
                              run_kwargs,
                              row_count=3000,
                              with_dask=False):
    hyper_model = create_plain_model(with_encoder=True, with_dask=with_dask)
    X = dsutils.load_bank()
    if row_count is not None:
        X = X.head(row_count)
    X['y'] = LabelEncoder().fit_transform(X['y'])

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('y')

    tb = get_tool_box(X, y)
    scorer = tb.metrics.metric_to_scoring(hyper_model.reward_metric)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'scorer': scorer,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score
コード例 #3
0
def run_compete_experiment_with_heart_disease(init_kwargs, run_kwargs, with_dask=False):
    hyper_model = create_hyper_model()
    scorer = get_scorer(metric_to_scoring(hyper_model.reward_metric))
    X = dsutils.load_heart_disease_uci()

    if with_dask:
        setup_dask(None)
        X = dex.dd.from_pandas(X, npartitions=2)

    y = X.pop('target')
    X_train, X_test, y_train, y_test = dex.train_test_split(X, y, test_size=0.3, random_state=7)
    X_train, X_eval, y_train, y_eval = dex.train_test_split(X_train, y_train, test_size=0.3, random_state=7)

    init_kwargs = {
        'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test,
        'scorer': scorer,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {
        'max_trials': 3,
        'batch_size': 128,
        'epochs': 1,
        **run_kwargs
    }
    experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator is not None

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    score = calc_score(y_test, preds, proba, metrics=['AUC', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score