def experiment_with_boston(self,
                               init_kwargs,
                               run_kwargs,
                               row_count=3000,
                               with_dask=False):
        if with_dask:
            X = self.boston
            y = X.pop('target')
        else:
            X = dsutils.load_boston()
            if row_count is not None:
                X = X.head(row_count)
            X['target'] = LabelEncoder().fit_transform(X['target'])
            y = X.pop('target')
            y = y.astype('float64')

        hyper_model = create_plain_model(with_encoder=True)

        tb = get_tool_box(X, y)
        X_train, X_test, y_train, y_test = \
            tb.train_test_split(X, y, test_size=0.3, random_state=9527)
        X_train, X_eval, y_train, y_eval = \
            tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

        init_kwargs = {
            'X_eval': X_eval,
            'y_eval': y_eval,
            'X_test': X_test,
            **init_kwargs
        }

        compete_experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                               **init_kwargs)
        base_experiment = Experiment(hyper_model, X_train, y_train,
                                     **init_kwargs)

        mydict_compete = compete_experiment.get_data_character()
        mydict_base = base_experiment.get_data_character()

        assert mydict_base
        assert mydict_compete
        assert mydict_base['experimentType'] == 'base'
        assert mydict_compete['experimentType'] == 'compete'
        assert mydict_base['target']['taskType'] == 'regression'
        assert mydict_base['target']['freq'] is None
        assert mydict_base['target']['unique']
        assert mydict_base['target']['mean'] is not None
        assert mydict_base['target']['max'] is not None
        assert mydict_base['target']['min'] is not None
        assert mydict_base['target']['stdev'] is not None
        assert mydict_base['target']['dataType'] is 'float'
        assert len(mydict_base['targetDistribution']) <= 10
        assert mydict_base['datasetShape']['X_train']
        assert mydict_base['datasetShape']['y_train']
        assert mydict_base['datasetShape']['X_eval']
        assert mydict_base['datasetShape']['y_eval']
        assert mydict_base['datasetShape']['X_test']
        assert mydict_compete['featureDistribution']
Beispiel #2
0
def experiment_with_movie_lens(init_kwargs,
                               run_kwargs,
                               row_count=None,
                               with_dask=False):
    hyper_model = create_plain_model(reward_metric='f1',
                                     with_encoder=True,
                                     with_dask=with_dask)

    X = dsutils.load_movielens()
    # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' '))
    X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp)
    if row_count is not None:
        X = X.head(row_count)

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('rating')

    tb = get_tool_box(X, y)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'],
        task=experiment.task)
    print('evaluate score:', score)
    assert score
Beispiel #3
0
def experiment_with_bank_data(init_kwargs,
                              run_kwargs,
                              row_count=3000,
                              with_dask=False):
    hyper_model = create_plain_model(with_encoder=True, with_dask=with_dask)
    X = dsutils.load_bank()
    if row_count is not None:
        X = X.head(row_count)
    X['y'] = LabelEncoder().fit_transform(X['y'])

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('y')

    tb = get_tool_box(X, y)
    scorer = tb.metrics.metric_to_scoring(hyper_model.reward_metric)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'scorer': scorer,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score
Beispiel #4
0
def run_compete_experiment_with_heart_disease(init_kwargs, run_kwargs, with_dask=False):
    hyper_model = create_hyper_model()
    scorer = get_scorer(metric_to_scoring(hyper_model.reward_metric))
    X = dsutils.load_heart_disease_uci()

    if with_dask:
        setup_dask(None)
        X = dex.dd.from_pandas(X, npartitions=2)

    y = X.pop('target')
    X_train, X_test, y_train, y_test = dex.train_test_split(X, y, test_size=0.3, random_state=7)
    X_train, X_eval, y_train, y_eval = dex.train_test_split(X_train, y_train, test_size=0.3, random_state=7)

    init_kwargs = {
        'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test,
        'scorer': scorer,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {
        'max_trials': 3,
        'batch_size': 128,
        'epochs': 1,
        **run_kwargs
    }
    experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator is not None

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    score = calc_score(y_test, preds, proba, metrics=['AUC', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score