Esempio n. 1
0
def single_run(index):
    custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl')
    custom_spliter = Spliter()
    data = DataLoader(custom_reader, custom_spliter)
    data.load()

    lgb_custom = LGB(config)
    base_model = Model(lgb_custom)

    evaler = Evaler()

    print("[KFold Time] Num: %d" % (index+1))
    kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False)
    kfoldEnsemble.fit(data)

    return kfoldEnsemble
def single_run(index):
    custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv')
    custom_spliter = Spliter()
    data = DataLoader(custom_reader, custom_spliter)
    data.load()

    config['param']['gpu_id'] = index

    xgb_custom = XGB(config)
    base_model = Model(xgb_custom)

    evaler = Evaler()

    print("[KFold Time] Num: %d" % (index + 1))
    kfoldEnsemble = KFoldEnsemble(base_model=base_model,
                                  evaler=evaler,
                                  nfold=5,
                                  seed=index,
                                  nni_log=False)
    kfoldEnsemble.fit(data)

    return kfoldEnsemble
Esempio n. 3
0
def main(config):
    # load data
    custom_reader = Reader('demo', 'train.pkl', 'train_target.pkl')
    custom_spliter = Spliter()
    data = DataLoader(custom_reader, custom_spliter)

    # initialize model
    lgb_custom = LGB(config)
    base_model = Model(lgb_custom)

    # initialize metric
    evaler = Evaler()

    # intialize method
    kfoldEnsemble = KFoldEnsemble(base_model=base_model,
                                  evaler=evaler,
                                  nfold=5,
                                  seed=0,
                                  nni_log=False)

    # start training
    kfoldEnsemble.fit(data)
        os.makedirs(os.path.join("models", model_type), exist_ok=True)

    latest_model_path = os.path.join("models", model_type, "latest_model.pt")
    best_model_path = os.path.join("models", model_type, "best_model.pt")
    optim_path = os.path.join("models", model_type, "optim.pt")
    stats_path = os.path.join("stats", model_type, "stats.pkl")

    def init_weights(m):
        try:
            torch.nn.init.kaiming_uniform_(m.weight.data)
            m.bias.data.zero_()
        except:
            pass

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Model(num_features=num_features).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=8)
    model.apply(init_weights)
    print(type(model))

    if os.path.exists(latest_model_path):
        print("Model exists. Loading from {0}".format(latest_model_path))
        model = torch.load(latest_model_path)

    optimizer = optim.Adam(model.parameters(), lr=lr)

    if os.path.exists(optim_path):
        print(
            "Optimizer state dict exists. Loading from {0}".format(optim_path))
        optim = torch.load(optim_path)
        optimizer.load_state_dict(optim['optimizer'])
Esempio n. 5
0
    latest_model_path = os.path.join("models", model_type, "latest_model.pt")
    best_model_path = os.path.join("models", model_type, "best_model.pt")
    optim_path = os.path.join("models", model_type, "optim.pt")
    stats_path = os.path.join("stats", model_type, "stats.pkl")


    def init_weights(m):
        try:
            torch.nn.init.kaiming_uniform_(m.weight.data)
            m.bias.data.zero_()
        except:
            pass


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Model().to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=8)
    model.apply(init_weights)
    print(type(model))

    if os.path.exists(latest_model_path):
        print("Model exists. Loading from {0}".format(latest_model_path))
        model = torch.load(latest_model_path)

    optimizer = optim.Adam(model.parameters(), lr=lr)

    if os.path.exists(optim_path):
        print("Optimizer state dict exists. Loading from {0}".format(optim_path))
        optim = torch.load(optim_path)
        optimizer.load_state_dict(optim['optimizer'])
Esempio n. 6
0
def train(train_data, valid_data, args, result_file):
    node_cnt = train_data.shape[1]
    model = Model(node_cnt, 2, args.window_size, args.multi_layer, horizon=args.horizon)
    model.to(args.device)
    if len(train_data) == 0:
        raise Exception('Cannot organize enough training data')
    if len(valid_data) == 0:
        raise Exception('Cannot organize enough validation data')

    if args.norm_method == 'z_score':
        train_mean = np.mean(train_data, axis=0)
        train_std = np.std(train_data, axis=0)
        normalize_statistic = {"mean": train_mean.tolist(), "std": train_std.tolist()}
    elif args.norm_method == 'min_max':
        train_min = np.min(train_data, axis=0)
        train_max = np.max(train_data, axis=0)
        normalize_statistic = {"min": train_min.tolist(), "max": train_max.tolist()}
    else:
        normalize_statistic = None
    if normalize_statistic is not None:
        with open(os.path.join(result_file, 'norm_stat.json'), 'w') as f:
            json.dump(normalize_statistic, f)

    if args.optimizer == 'RMSProp':
        my_optim = torch.optim.RMSprop(params=model.parameters(), lr=args.lr, eps=1e-08)
    else:
        my_optim = torch.optim.Adam(params=model.parameters(), lr=args.lr, betas=(0.9, 0.999))
    my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=my_optim, gamma=args.decay_rate)

    train_set = ForecastDataset(train_data, window_size=args.window_size, horizon=args.horizon,
                                normalize_method=args.norm_method, norm_statistic=normalize_statistic)
    valid_set = ForecastDataset(valid_data, window_size=args.window_size, horizon=args.horizon,
                                normalize_method=args.norm_method, norm_statistic=normalize_statistic)
    train_loader = torch_data.DataLoader(train_set, batch_size=args.batch_size, drop_last=False, shuffle=True,
                                         num_workers=0)
    valid_loader = torch_data.DataLoader(valid_set, batch_size=args.batch_size, shuffle=False, num_workers=0)

    forecast_loss = nn.MSELoss(reduction='mean').to(args.device)

    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        total_params += param
    print(f"Total Trainable Params: {total_params}")

    best_validate_mae = np.inf
    validate_score_non_decrease_count = 0
    performance_metrics = {}
    for epoch in range(args.epoch):
        epoch_start_time = time.time()
        model.train()
        loss_total = 0
        cnt = 0
        for i, (inputs, target) in enumerate(train_loader):
            inputs = inputs.to(args.device)
            target = target.to(args.device)
            model.zero_grad()
            forecast, _ = model(inputs)
            loss = forecast_loss(forecast, target)
            cnt += 1
            loss.backward()
            my_optim.step()
            loss_total += float(loss)
        print('| end of epoch {:3d} | time: {:5.2f}s | train_total_loss {:5.4f}'.format(epoch, (
                time.time() - epoch_start_time), loss_total / cnt))
        save_model(model, result_file, epoch)
        if (epoch+1) % args.exponential_decay_step == 0:
            my_lr_scheduler.step()
        if (epoch + 1) % args.validate_freq == 0:
            is_best_for_now = False
            print('------ validate on data: VALIDATE ------')
            performance_metrics = \
                validate(model, valid_loader, args.device, args.norm_method, normalize_statistic,
                         node_cnt, args.window_size, args.horizon,
                         result_file=result_file)
            if best_validate_mae > performance_metrics['mae']:
                best_validate_mae = performance_metrics['mae']
                is_best_for_now = True
                validate_score_non_decrease_count = 0
            else:
                validate_score_non_decrease_count += 1
            # save model
            if is_best_for_now:
                save_model(model, result_file)
        # early stop
        if args.early_stop and validate_score_non_decrease_count >= args.early_stop_step:
            break
    return performance_metrics, normalize_statistic
Esempio n. 7
0
        'random_state': 0,
        'tree_method': 'gpu_hist',
        'gpu_id': 0,
    }
}

# load data
custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl',
                       'test.pkl')
custom_spliter = Spliter()
data = DataLoader(custom_reader, custom_spliter)
data.load()

# initialize model
lgb_custom = XGB(config)
base_model = Model(lgb_custom)

# initialize metric
evaler = Evaler()

# intialize method

kfoldEnsemble = KFoldEnsemble(base_model=base_model,
                              evaler=evaler,
                              nfold=5,
                              seed=ii,
                              nni_log=False)
kfoldEnsemble.fit(data)

# initialize submitter
submitter = Submitter(submit_file_path='../demo/credit_data/submit.csv',