コード例 #1
0
def run(param):
    """
    Train SidNet
    :param param: parameters
    """
    data_loader = DataLoader(random_seed=param.random_seed,
                             reduction_dimension=param.reduction_dimension,
                             reduction_iterations=param.reduction_iterations)

    # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio
    data = data_loader.load(data_path=param.data_path,
                            heldout_ratio=param.heldout_ratio)

    logger.info('Start training SidNet with the hyperparameters...')

    # training
    trainer = SidNetTrainer(param)
    trained_model = trainer.train_with_hyper_param(
        data=data, hyper_param=param.hyper_param, epochs=param.epochs)

    # save model
    logger.info('Save the trained model at {}...'.format(param.output_home))
    logger.info('- Path for the trained model: {}'.format(
        param.paths.model_output_path))
    logger.info('- Path for the hyperparameters used in the model: {}'.format(
        param.paths.param_output_path))

    torch.save(trained_model.state_dict(), param.paths.model_output_path)
    param.device = 0
    with open(param.paths.param_output_path, 'w') as out_file:
        json.dump(param, out_file)
コード例 #2
0
def single_run(index):
    custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl')
    custom_spliter = Spliter()
    data = DataLoader(custom_reader, custom_spliter)
    data.load()

    lgb_custom = LGB(config)
    base_model = Model(lgb_custom)

    evaler = Evaler()

    print("[KFold Time] Num: %d" % (index+1))
    kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False)
    kfoldEnsemble.fit(data)

    return kfoldEnsemble
コード例 #3
0
def main(input_home='../output', dataset='BITCOIN_ALPHA', gpu_id=0):
    """
    Evaluate SidNet
    :param input_home: directory where a trained model is stored
    :param dataset: dataset name
    :param gpu_id: gpu id
    """

    device = torch.device(f"cuda:{gpu_id}" if (
        torch.cuda.is_available() and gpu_id >= 0) else "cpu")

    param_output_path = f'{input_home}/{dataset}/param.json'
    model_output_path = f'{input_home}/{dataset}/model.pt'

    with open(param_output_path, 'r') as in_file:
        param = DotMap(json.load(in_file))
        param.device = device

        if param.use_torch_random_seed:
            torch.manual_seed(param.torch_seed)

        #
        data_loader = DataLoader(
            random_seed=param.random_seed,
            reduction_dimension=param.reduction_dimension,
            reduction_iterations=param.reduction_iterations)

        # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio
        data = data_loader.load(data_path=param.data_path,
                                heldout_ratio=param.heldout_ratio)

        trainer = SidNetTrainer(param)
        hyper_param = param.hyper_param
        converted_data = trainer.convert_data(data)

        model = SidNet(hid_dims=hyper_param.hid_dims,
                       in_dim=hyper_param.in_dim,
                       device=device,
                       num_nodes=converted_data.num_nodes,
                       num_layers=hyper_param.num_layers,
                       num_diff_layers=hyper_param.num_diff_layers,
                       c=hyper_param.c).to(device)

        model.load_state_dict(
            torch.load(model_output_path, map_location=device))

        loss = model(nApT=converted_data.train.nApT,
                     nAmT=converted_data.train.nAmT,
                     X=converted_data.H,
                     edges=converted_data.train.edges,
                     y=converted_data.train.y)

        model.eval()
        auc, f1_scores, _ = model.evaluate(
            test_edges=converted_data.test.edges, test_y=converted_data.test.y)

        logger.info('test auc: {:.4f}'.format(auc))
        logger.info('test f1_macro:  {:.4f}'.format(f1_scores.macro))
コード例 #4
0
def single_run(index):
    custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv')
    custom_spliter = Spliter()
    data = DataLoader(custom_reader, custom_spliter)
    data.load()

    config['param']['gpu_id'] = index

    xgb_custom = XGB(config)
    base_model = Model(xgb_custom)

    evaler = Evaler()

    print("[KFold Time] Num: %d" % (index + 1))
    kfoldEnsemble = KFoldEnsemble(base_model=base_model,
                                  evaler=evaler,
                                  nfold=5,
                                  seed=index,
                                  nni_log=False)
    kfoldEnsemble.fit(data)

    return kfoldEnsemble
コード例 #5
0
                                  evaler=evaler,
                                  nfold=5,
                                  seed=index,
                                  nni_log=False)
    kfoldEnsemble.fit(data)

    return kfoldEnsemble


sum_res = 0
kfold_time = 5
index_list = [i for i in range(kfold_time)]
model_list = []
with ProcessPoolExecutor(max_workers=kfold_time) as executor:
    for index, kfold_model in enumerate(executor.map(single_run, index_list)):
        model_list.append(kfold_model)
        sum_res += kfold_model.eval_res
# start training
print("[Overall Summary] Train Loss: %g" % (sum_res / kfold_time))

custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv')
custom_spliter = Spliter()
data = DataLoader(custom_reader, custom_spliter)
data.load()
# initialize submitter
submitter = Submitter(submit_file_path='../demo/credit_data/submit.csv',
                      save_path='../demo',
                      file_name='pei_xgb_base.csv')

# submit your prediction
submitter.submit(model_list, data)