return h2o_auc if __name__ == '__main__': parser = argparse.ArgumentParser(description='Modeling h2o') parser.add_argument('--dataset', '-d', help="pass dataset name", required=True) parser.add_argument('--seed', '-s', help='random seed', default=2020) args = parser.parse_args() data_name = str(args.dataset) seed = int(args.seed) h2o.init() X_train, X_test, y_train, y_test = load_data(data_name) start_time = time.time() score = h2o_train(X_train, X_test, y_train, y_test, seed=2020) end_time = time.time() hour = (end_time - start_time) / 3600.0 with open('h2o_result.txt', 'a') as f: f.write(f"{data_name}\t{score}\t{hour}\n")
batch_size=batch_size), dim=1) logits = check_numpy(logits) return logits if __name__ == '__main__': device = 'cuda' if torch.cuda.is_available() else 'cpu' hours = 2 # 2 hours for training res = [] for data_name, d in data_config.items(): # split dataset train/test = 0.7:0.3 X_train, X_test, y_train, y_test = load_data(data_name, combine_y=False, split_seed=2020, test_size=0.3) # general feature generator; feature_generator = AutoMLFeatureGenerator() print("#" * 50, 'training set preprocessing') X_train = feature_generator.fit_transform(X_train, drop_duplicates=False) print("#" * 50, 'testing set preprocessing') X_test = feature_generator.transform(X_test) feature_types_metadata = feature_generator.feature_types_metadata problem_type = 'binary' path = f'LR-{data_name}'
""" wide and deep test, follow code from autogluon autogluon's NN architecture is based on wide and deep network """ from autogluon import TabularPrediction as task from data_config.data_config import load_data, data_config if __name__ == '__main__': res = {} for data_name in data_config.keys(): ylabel = data_config[data_name]['ylabel'] X_train, X_valid = load_data(data_name, combine_y=True) train_data = task.Dataset(df=X_train) test_data = task.Dataset(df=X_valid) savedir = f'{data_name}/' # where to save trained models predictor = task.fit( train_data=train_data, label=ylabel, output_directory=savedir, eval_metric='roc_auc', verbosity=2, visualizer='tensorboard', random_seed=0, save_space=True, keep_only_best=True, ) auc = predictor.evaluate(X_valid) res[data_name] = auc print(res)
import pickle import torch from sklearn.model_selection import StratifiedKFold from data_config.data_config import data_config, load_data import numpy as np from sklearn.metrics import roc_auc_score if __name__ == '__main__': res = {} for data_name in data_config.keys(): epoch = 100 train, test, y_train, y_test = load_data(data_name, combine_y=False) types = train.dtypes # categorical_columns = [] categorical_dims = {} features = list(train.columns) print(train.shape) for col in train.columns: if types[col] == 'object': train[col] = train[col].fillna("VV_likely") test[col] = test[col].fillna('VV_likely') d = train[col].unique()