def setup_class(cls): if is_dask_installed: import dask.dataframe as dd setup_dask(cls) cls.boston = dd.from_pandas(dsutils.load_boston(), npartitions=1) cls.blood = dd.from_pandas(dsutils.load_blood(), npartitions=1) cls.bike_sharing = dd.from_pandas(dsutils.load_Bike_Sharing(), npartitions=1)
def experiment_with_movie_lens(init_kwargs, run_kwargs, row_count=None, with_dask=False): hyper_model = create_plain_model(reward_metric='f1', with_encoder=True, with_dask=with_dask) X = dsutils.load_movielens() # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' ')) X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp) if row_count is not None: X = X.head(row_count) if with_dask: setup_dask(None) X = dd.from_pandas(X, npartitions=1) y = X.pop('rating') tb = get_tool_box(X, y) X_train, X_test, y_train, y_test = \ tb.train_test_split(X, y, test_size=0.3, random_state=9527) X_train, X_eval, y_train, y_eval = \ tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, **run_kwargs} experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) if with_dask: preds, proba = tb.to_local(preds, proba) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'], task=experiment.task) print('evaluate score:', score) assert score
def experiment_with_bank_data(init_kwargs, run_kwargs, row_count=3000, with_dask=False): hyper_model = create_plain_model(with_encoder=True, with_dask=with_dask) X = dsutils.load_bank() if row_count is not None: X = X.head(row_count) X['y'] = LabelEncoder().fit_transform(X['y']) if with_dask: setup_dask(None) X = dd.from_pandas(X, npartitions=1) y = X.pop('y') tb = get_tool_box(X, y) scorer = tb.metrics.metric_to_scoring(hyper_model.reward_metric) X_train, X_test, y_train, y_test = \ tb.train_test_split(X, y, test_size=0.3, random_state=9527) X_train, X_eval, y_train, y_eval = \ tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527) init_kwargs = { 'X_eval': X_eval, 'y_eval': y_eval, 'X_test': X_test, 'scorer': scorer, 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, **run_kwargs} experiment = CompeteExperiment(hyper_model, X_train, y_train, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator preds = estimator.predict(X_test) proba = estimator.predict_proba(X_test) if with_dask: preds, proba = tb.to_local(preds, proba) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision']) print('evaluate score:', score) assert score
def setup_class(cls): setup_dask(cls) print("Loading datasets...") row_count = 1000 df = dsutils.load_adult().head(row_count) cls.df = dd.from_pandas(df, npartitions=2) cls.df_row_count = row_count cls.target = 14 print(f'Class {cls.__name__} setup.')
def setup_class(self): setup_dask(self) print("Loading datasets...") data = dd.from_pandas(dsutils.load_glass_uci(), npartitions=2) self.y = data.pop(10).values self.X = data conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, ) self.dt = deeptable.DeepTable(config=conf) self.X_train, self.X_test, self.y_train, self.y_test = \ [t.persist() for t in get_tool_box(data).train_test_split(self.X, self.y, test_size=0.2, random_state=42)] self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=3)
def run_compete_experiment_with_heart_disease(init_kwargs, run_kwargs, with_dask=False): df = dsutils.load_heart_disease_uci() target = 'target' if with_dask: setup_dask(None) df = dd.from_pandas(df, npartitions=1) tb = get_tool_box(df) train_data, test_data = tb.train_test_split(df, test_size=0.2, random_state=7) train_data, eval_data = tb.train_test_split(train_data, test_size=0.3, random_state=7) y_test = test_data.pop(target) init_kwargs = { 'searcher': 'random', 'search_space': tiny_dt_space, 'reward_metric': 'AUC', 'ensemble_size': 0, 'drift_detection': False, **init_kwargs } run_kwargs = {'max_trials': 3, 'batch_size': 16, 'epochs': 1, **run_kwargs} experiment = make_experiment(train_data, target='target', eval_data=eval_data, test_data=test_data, **init_kwargs) estimator = experiment.run(**run_kwargs) assert estimator is not None preds = estimator.predict(test_data) proba = estimator.predict_proba(test_data) score = tb.metrics.calc_score( y_test, preds, proba, metrics=['AUC', 'accuracy', 'f1', 'recall', 'precision']) print('evaluate score:', score) assert score
def setup_class(self): TestVarLenCategoricalFeature.setup_class(self) setup_dask(self) self.df = dd.from_pandas(self.df, npartitions=2)
def setup_class(cls): setup_dask(cls)
# scoring preds = dt2.predict( df_test, batch_size=512, ) proba = dt2.predict_proba( df_test, batch_size=512, ) print( get_tool_box(y_test).metrics.calc_score(y_test, preds, proba, metrics=['accuracy', 'auc'])) if __name__ == '__main__': setup_dask(None) gpus = tf.config.list_physical_devices('GPU') bs = int(os.environ.get('BATCH_SIZE', '32')) es = int(os.environ.get('EPOCHS', '5')) if len(gpus) < 2: run(batch_size=bs, epochs=es) else: strategy = tf.distribute.MirroredStrategy() run(distribute_strategy=strategy, batch_size=len(gpus) * bs, epochs=es) print('done')
def setup_class(self): setup_dask(self) super().setup_class(self)