def single_run(index): custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() lgb_custom = LGB(config) base_model = Model(lgb_custom) evaler = Evaler() print("[KFold Time] Num: %d" % (index+1)) kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble
def single_run(index): custom_reader = Reader('../demo/pei_data', 'TRAIN.csv', 'TEST.csv') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() config['param']['gpu_id'] = index xgb_custom = XGB(config) base_model = Model(xgb_custom) evaler = Evaler() print("[KFold Time] Num: %d" % (index + 1)) kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble
def main(config): # load data custom_reader = Reader('demo', 'train.pkl', 'train_target.pkl') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) # initialize model lgb_custom = LGB(config) base_model = Model(lgb_custom) # initialize metric evaler = Evaler() # intialize method kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=0, nni_log=False) # start training kfoldEnsemble.fit(data)
os.makedirs(os.path.join("models", model_type), exist_ok=True) latest_model_path = os.path.join("models", model_type, "latest_model.pt") best_model_path = os.path.join("models", model_type, "best_model.pt") optim_path = os.path.join("models", model_type, "optim.pt") stats_path = os.path.join("stats", model_type, "stats.pkl") def init_weights(m): try: torch.nn.init.kaiming_uniform_(m.weight.data) m.bias.data.zero_() except: pass device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Model(num_features=num_features).to(device) criterion = nn.CrossEntropyLoss(ignore_index=8) model.apply(init_weights) print(type(model)) if os.path.exists(latest_model_path): print("Model exists. Loading from {0}".format(latest_model_path)) model = torch.load(latest_model_path) optimizer = optim.Adam(model.parameters(), lr=lr) if os.path.exists(optim_path): print( "Optimizer state dict exists. Loading from {0}".format(optim_path)) optim = torch.load(optim_path) optimizer.load_state_dict(optim['optimizer'])
latest_model_path = os.path.join("models", model_type, "latest_model.pt") best_model_path = os.path.join("models", model_type, "best_model.pt") optim_path = os.path.join("models", model_type, "optim.pt") stats_path = os.path.join("stats", model_type, "stats.pkl") def init_weights(m): try: torch.nn.init.kaiming_uniform_(m.weight.data) m.bias.data.zero_() except: pass device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Model().to(device) criterion = nn.CrossEntropyLoss(ignore_index=8) model.apply(init_weights) print(type(model)) if os.path.exists(latest_model_path): print("Model exists. Loading from {0}".format(latest_model_path)) model = torch.load(latest_model_path) optimizer = optim.Adam(model.parameters(), lr=lr) if os.path.exists(optim_path): print("Optimizer state dict exists. Loading from {0}".format(optim_path)) optim = torch.load(optim_path) optimizer.load_state_dict(optim['optimizer'])
def train(train_data, valid_data, args, result_file): node_cnt = train_data.shape[1] model = Model(node_cnt, 2, args.window_size, args.multi_layer, horizon=args.horizon) model.to(args.device) if len(train_data) == 0: raise Exception('Cannot organize enough training data') if len(valid_data) == 0: raise Exception('Cannot organize enough validation data') if args.norm_method == 'z_score': train_mean = np.mean(train_data, axis=0) train_std = np.std(train_data, axis=0) normalize_statistic = {"mean": train_mean.tolist(), "std": train_std.tolist()} elif args.norm_method == 'min_max': train_min = np.min(train_data, axis=0) train_max = np.max(train_data, axis=0) normalize_statistic = {"min": train_min.tolist(), "max": train_max.tolist()} else: normalize_statistic = None if normalize_statistic is not None: with open(os.path.join(result_file, 'norm_stat.json'), 'w') as f: json.dump(normalize_statistic, f) if args.optimizer == 'RMSProp': my_optim = torch.optim.RMSprop(params=model.parameters(), lr=args.lr, eps=1e-08) else: my_optim = torch.optim.Adam(params=model.parameters(), lr=args.lr, betas=(0.9, 0.999)) my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=my_optim, gamma=args.decay_rate) train_set = ForecastDataset(train_data, window_size=args.window_size, horizon=args.horizon, normalize_method=args.norm_method, norm_statistic=normalize_statistic) valid_set = ForecastDataset(valid_data, window_size=args.window_size, horizon=args.horizon, normalize_method=args.norm_method, norm_statistic=normalize_statistic) train_loader = torch_data.DataLoader(train_set, batch_size=args.batch_size, drop_last=False, shuffle=True, num_workers=0) valid_loader = torch_data.DataLoader(valid_set, batch_size=args.batch_size, shuffle=False, num_workers=0) forecast_loss = nn.MSELoss(reduction='mean').to(args.device) total_params = 0 for name, parameter in model.named_parameters(): if not parameter.requires_grad: continue param = parameter.numel() total_params += param print(f"Total Trainable Params: {total_params}") best_validate_mae = np.inf validate_score_non_decrease_count = 0 performance_metrics = {} for epoch in range(args.epoch): epoch_start_time = time.time() model.train() loss_total = 0 cnt = 0 for i, (inputs, target) in enumerate(train_loader): inputs = inputs.to(args.device) target = target.to(args.device) model.zero_grad() forecast, _ = model(inputs) loss = forecast_loss(forecast, target) cnt += 1 loss.backward() my_optim.step() loss_total += float(loss) print('| end of epoch {:3d} | time: {:5.2f}s | train_total_loss {:5.4f}'.format(epoch, ( time.time() - epoch_start_time), loss_total / cnt)) save_model(model, result_file, epoch) if (epoch+1) % args.exponential_decay_step == 0: my_lr_scheduler.step() if (epoch + 1) % args.validate_freq == 0: is_best_for_now = False print('------ validate on data: VALIDATE ------') performance_metrics = \ validate(model, valid_loader, args.device, args.norm_method, normalize_statistic, node_cnt, args.window_size, args.horizon, result_file=result_file) if best_validate_mae > performance_metrics['mae']: best_validate_mae = performance_metrics['mae'] is_best_for_now = True validate_score_non_decrease_count = 0 else: validate_score_non_decrease_count += 1 # save model if is_best_for_now: save_model(model, result_file) # early stop if args.early_stop and validate_score_non_decrease_count >= args.early_stop_step: break return performance_metrics, normalize_statistic
'random_state': 0, 'tree_method': 'gpu_hist', 'gpu_id': 0, } } # load data custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() # initialize model lgb_custom = XGB(config) base_model = Model(lgb_custom) # initialize metric evaler = Evaler() # intialize method kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=ii, nni_log=False) kfoldEnsemble.fit(data) # initialize submitter submitter = Submitter(submit_file_path='../demo/credit_data/submit.csv',