def experiment_with_movie_lens(init_kwargs, run_kwargs, row_count=None, with_dask=False): hyper_model = create_plain_model(reward_metric='f1', with_encoder=True, with_dask=with_dask) X = dsutils.load_movielens() # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' ')) X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp) if row_count is not None: X = X.head(row_count) if with_dask: setup_dask(None) X = dd.from_pandas(X, npartitions=1) y = X.pop('rating') tb = get_tool_box(X, y) X_train, X_test, y_train, y_test = \ tb.train_test_split(X, y, test_size=0.3, random_state=9527) X_train, X_eval, y_train, y_eval = \ tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, **run_kwargs} experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) if with_dask: preds, proba = tb.to_local(preds, proba) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'], task=experiment.task) print('evaluate score:', score) assert score
def experiment_with_bank_data(init_kwargs, run_kwargs, row_count=3000, with_dask=False): hyper_model = create_plain_model(with_encoder=True, with_dask=with_dask) X = dsutils.load_bank() if row_count is not None: X = X.head(row_count) X['y'] = LabelEncoder().fit_transform(X['y']) if with_dask: setup_dask(None) X = dd.from_pandas(X, npartitions=1) y = X.pop('y') tb = get_tool_box(X, y) scorer = tb.metrics.metric_to_scoring(hyper_model.reward_metric) X_train, X_test, y_train, y_test = \ tb.train_test_split(X, y, test_size=0.3, random_state=9527) X_train, X_eval, y_train, y_eval = \ tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'scorer': scorer, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, **run_kwargs} experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) if with_dask: preds, proba = tb.to_local(preds, proba) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision']) print('evaluate score:', score) assert score
def run_compete_experiment_with_heart_disease(init_kwargs, run_kwargs, with_dask=False): hyper_model = create_hyper_model() scorer = get_scorer(metric_to_scoring(hyper_model.reward_metric)) X = dsutils.load_heart_disease_uci() if with_dask: setup_dask(None) X = dex.dd.from_pandas(X, npartitions=2) y = X.pop('target') X_train, X_test, y_train, y_test = dex.train_test_split(X, y, test_size=0.3, random_state=7) X_train, X_eval, y_train, y_eval = dex.train_test_split(X_train, y_train, test_size=0.3, random_state=7) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'scorer': scorer, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = { 'max_trials': 3, 'batch_size': 128, 'epochs': 1, **run_kwargs } experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator is not None preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) score = calc_score(y_test, preds, proba, metrics=['AUC', 'accuracy', 'f1', 'recall', 'precision']) print('evaluate score:', score) assert score