def test_hook_from_json_config_for_losses(tmpdir, monkeypatch, params): out_dir = tmpdir.join("test_hook_from_json_config_for_losses") config_file = tmpdir.join("config.json") config_file.write(get_json_config_for_losses(str(out_dir))) monkeypatch.setenv(CONFIG_FILE_PATH_ENV_STR, str(config_file)) hook = Hook.create_from_json_file() assert has_training_ended(out_dir) is False run_xgboost_model(hook=hook, params=params) trial = create_trial(str(out_dir)) eval_metric = params["eval_metric"] test_metric = f"test-{eval_metric}" train_metric = f"train-{eval_metric}" if eval_metric == "rmse": assert train_metric in trial.tensor_names( collection=CollectionKeys.METRICS) assert train_metric in trial.tensor_names( collection=CollectionKeys.LOSSES) assert test_metric in trial.tensor_names( collection=CollectionKeys.METRICS) assert test_metric in trial.tensor_names( collection=CollectionKeys.LOSSES) if eval_metric == "auc" or eval_metric == "map": assert train_metric in trial.tensor_names( collection=CollectionKeys.METRICS) assert train_metric not in trial.tensor_names( collection=CollectionKeys.LOSSES) assert test_metric in trial.tensor_names( collection=CollectionKeys.METRICS) assert test_metric not in trial.tensor_names( collection=CollectionKeys.LOSSES)
def test_hook_from_json_config_full(tmpdir, monkeypatch): out_dir = tmpdir.join("test_hook_from_json_config_full") config_file = tmpdir.join("config.json") config_file.write(get_json_config_full(str(out_dir))) monkeypatch.setenv(CONFIG_FILE_PATH_ENV_STR, str(config_file)) hook = Hook.create_from_json_file() assert has_training_ended(out_dir) is False run_xgboost_model(hook=hook)
def main(): args = parse_args() train_files_path, validation_files_path = args.train, args.validation train_features_path = os.path.join(args.train, 'train_features.csv') train_labels_path = os.path.join(args.train, 'train_labels.csv') val_features_path = os.path.join(args.validation, 'val_features.csv') val_labels_path = os.path.join(args.validation, 'val_labels.csv') print('Loading training dataframes...') df_train_features = pd.read_csv(train_features_path, header=None) df_train_labels = pd.read_csv(train_labels_path, header=None) print('Loading validation dataframes...') df_val_features = pd.read_csv(val_features_path, header=None) df_val_labels = pd.read_csv(val_labels_path, header=None) X = df_train_features.values y = df_train_labels.values.reshape(-1) val_X = df_val_features.values val_y = df_val_labels.values.reshape(-1) print('Train features shape: {}'.format(X.shape)) print('Train labels shape: {}'.format(y.shape)) print('Validation features shape: {}'.format(val_X.shape)) print('Validation labels shape: {}'.format(val_y.shape)) dtrain = xgboost.DMatrix(X, label=y) dval = xgboost.DMatrix(val_X, label=val_y) hook = Hook.create_from_json_file() hook.train_data = dtrain hook.validation_data = dval watchlist = [(dtrain, "train"), (dval, "validation")] params = { "max_depth": args.max_depth, "eta": args.eta, "gamma": args.gamma, "min_child_weight": args.min_child_weight, "silent": args.silent, "objective": args.objective, "eval_metric": args.eval_metric } bst = xgboost.train(params=params, dtrain=dtrain, evals=watchlist, num_boost_round=args.num_round, callbacks=[hook]) model_dir = os.environ.get('SM_MODEL_DIR') pkl.dump(bst, open(model_dir + '/model.bin', 'wb'))
def main(): args = parse_args() train_files_path, validation_files_path = args.train, args.validation train_files_list = glob.glob(train_files_path + '/*.*') print(train_files_list) val_files_list = glob.glob(validation_files_path + '/*.*') print(val_files_list) print('Loading training data...') df_train = pd.concat(map(pd.read_csv, train_files_list)) print('Loading validation data...') df_val = pd.concat(map(pd.read_csv, val_files_list)) print('Data loading completed.') y = df_train.Target.values X = df_train.drop(['Target'], axis=1).values val_y = df_val.Target.values val_X = df_val.drop(['Target'], axis=1).values dtrain = xgboost.DMatrix(X, label=y) dval = xgboost.DMatrix(val_X, label=val_y) params = { "max_depth": args.max_depth, "eta": args.eta, "gamma": args.gamma, "min_child_weight": args.min_child_weight, "silent": args.silent, "objective": args.objective, "num_class": args.num_class } hook = Hook.create_from_json_file() hook.train_data = dtrain hook.validation_data = dval watchlist = [(dtrain, "train"), (dval, "validation")] bst = xgboost.train(params=params, dtrain=dtrain, evals=watchlist, num_boost_round=args.num_round, callbacks=[hook]) model_dir = os.environ.get('SM_MODEL_DIR') pkl.dump(bst, open(model_dir + '/model.bin', 'wb'))