def run(param): """ Train SidNet :param param: parameters """ data_loader = DataLoader(random_seed=param.random_seed, reduction_dimension=param.reduction_dimension, reduction_iterations=param.reduction_iterations) # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio data = data_loader.load(data_path=param.data_path, heldout_ratio=param.heldout_ratio) logger.info('Start training SidNet with the hyperparameters...') # training trainer = SidNetTrainer(param) trained_model = trainer.train_with_hyper_param( data=data, hyper_param=param.hyper_param, epochs=param.epochs) # save model logger.info('Save the trained model at {}...'.format(param.output_home)) logger.info('- Path for the trained model: {}'.format( param.paths.model_output_path)) logger.info('- Path for the hyperparameters used in the model: {}'.format( param.paths.param_output_path)) torch.save(trained_model.state_dict(), param.paths.model_output_path) param.device = 0 with open(param.paths.param_output_path, 'w') as out_file: json.dump(param, out_file)
def single_run(index): custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() lgb_custom = LGB(config) base_model = Model(lgb_custom) evaler = Evaler() print("[KFold Time] Num: %d" % (index+1)) kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble
def main(input_home='../output', dataset='BITCOIN_ALPHA', gpu_id=0): """ Evaluate SidNet :param input_home: directory where a trained model is stored :param dataset: dataset name :param gpu_id: gpu id """ device = torch.device(f"cuda:{gpu_id}" if ( torch.cuda.is_available() and gpu_id >= 0) else "cpu") param_output_path = f'{input_home}/{dataset}/param.json' model_output_path = f'{input_home}/{dataset}/model.pt' with open(param_output_path, 'r') as in_file: param = DotMap(json.load(in_file)) param.device = device if param.use_torch_random_seed: torch.manual_seed(param.torch_seed) # data_loader = DataLoader( random_seed=param.random_seed, reduction_dimension=param.reduction_dimension, reduction_iterations=param.reduction_iterations) # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio data = data_loader.load(data_path=param.data_path, heldout_ratio=param.heldout_ratio) trainer = SidNetTrainer(param) hyper_param = param.hyper_param converted_data = trainer.convert_data(data) model = SidNet(hid_dims=hyper_param.hid_dims, in_dim=hyper_param.in_dim, device=device, num_nodes=converted_data.num_nodes, num_layers=hyper_param.num_layers, num_diff_layers=hyper_param.num_diff_layers, c=hyper_param.c).to(device) model.load_state_dict( torch.load(model_output_path, map_location=device)) loss = model(nApT=converted_data.train.nApT, nAmT=converted_data.train.nAmT, X=converted_data.H, edges=converted_data.train.edges, y=converted_data.train.y) model.eval() auc, f1_scores, _ = model.evaluate( test_edges=converted_data.test.edges, test_y=converted_data.test.y) logger.info('test auc: {:.4f}'.format(auc)) logger.info('test f1_macro: {:.4f}'.format(f1_scores.macro))
def single_run(index): custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() config['param']['gpu_id'] = index xgb_custom = XGB(config) base_model = Model(xgb_custom) evaler = Evaler() print("[KFold Time] Num: %d" % (index + 1)) kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble
evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble sum_res = 0 kfold_time = 5 index_list = [i for i in range(kfold_time)] model_list = [] with ProcessPoolExecutor(max_workers=kfold_time) as executor: for index, kfold_model in enumerate(executor.map(single_run, index_list)): model_list.append(kfold_model) sum_res += kfold_model.eval_res # start training print("[Overall Summary] Train Loss: %g" % (sum_res / kfold_time)) custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() # initialize submitter submitter = Submitter(submit_file_path='../demo/credit_data/submit.csv', save_path='../demo', file_name='pei_xgb_base.csv') # submit your prediction submitter.submit(model_list, data)