def setup_class(cls): from sklearn.preprocessing import LabelEncoder df = dsutils.load_bank() df['y'] = LabelEncoder().fit_transform(df['y']) cls.bank_data = df cls.movie_lens = dsutils.load_movielens() os.makedirs(cls.work_dir)
def experiment_with_movie_lens(init_kwargs, run_kwargs, row_count=None, with_dask=False): hyper_model = create_plain_model(reward_metric='f1', with_encoder=True, with_dask=with_dask) X = dsutils.load_movielens() # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' ')) X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp) if row_count is not None: X = X.head(row_count) if with_dask: setup_dask(None) X = dd.from_pandas(X, npartitions=1) y = X.pop('rating') tb = get_tool_box(X, y) X_train, X_test, y_train, y_test = \ tb.train_test_split(X, y, test_size=0.3, random_state=9527) X_train, X_eval, y_train, y_eval = \ tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, **run_kwargs} experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) if with_dask: preds, proba = tb.to_local(preds, proba) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'], task=experiment.task) print('evaluate score:', score) assert score
def test_category_datetime_text(self): df = dsutils.load_movielens() df['genres'] = df['genres'].apply(lambda s: s.replace('|', ' ')) df['timestamp'] = df['timestamp'].apply(datetime.fromtimestamp) ftt = FeatureGenerationTransformer( task='binary', text_cols=['title'], categories_cols=['gender', 'genres']) x_t = ftt.fit_transform(df) xt_columns = x_t.columns.to_list() assert 'CROSS_CATEGORICAL_gender__genres' in xt_columns assert 'TFIDF__title____0__' in xt_columns assert 'DAY__timestamp__' in xt_columns
def setup_class(cls): from sklearn.preprocessing import LabelEncoder df = dsutils.load_bank() df['y'] = LabelEncoder().fit_transform(df['y']) # binary task target df['education'] = LabelEncoder().fit_transform( df['education']) # multiclass task target cls.bank_data = df cls.bank_data_cudf = cudf.from_pandas(df) cls.boston_data = dsutils.load_blood() cls.boston_data_cudf = cudf.from_pandas(cls.boston_data) cls.movie_lens = dsutils.load_movielens() os.makedirs(cls.work_dir)
def setup_class(cls): cls.bank_data = dsutils.load_bank() cls.movie_lens = dsutils.load_movielens()
def setup_class(cls): setup_dask(cls) cls.bank_data = dd.from_pandas(dsutils.load_bank(), npartitions=2) cls.movie_lens = dd.from_pandas(dsutils.load_movielens(), npartitions=2)