-
Notifications
You must be signed in to change notification settings - Fork 0
/
xgb_custom_training.py
45 lines (32 loc) · 1.56 KB
/
xgb_custom_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
from ml_metrics import rmse
from predict_one_by_one import predict_one_by_one
def custom_valid_scheme(model, train, valid, feats, target, agg_function, early_stopping=5, val_at_num_epoch=5):
def _train(X_train, model, y_train, iteration):
# TODO: Train based on model from previous iteration instead of from scratch (although it's not a real bottleneck)
model.get_model().set_params(n_estimators=(iteration * val_at_num_epoch))
model.fit(X_train, y_train)
extract_test_func = lambda df: df[df['split'] == 'valid']
X_train = train[feats]
y_train = train[target]
epochs_without_improvement = 0
best_score = np.inf
best_iter = 0
iter = 1
while epochs_without_improvement < early_stopping:
_train(X_train, model, y_train, iter)
new_valid = predict_one_by_one(train=train, test=valid, feats=feats, model=model, agg_function=agg_function,
extract_test_func=extract_test_func)
score = rmse(valid[target].values, new_valid[target].values)
print(f'RMSE on valid: {score}')
if score < best_score:
best_score = score
best_iter = iter
epochs_without_improvement = 0
else:
epochs_without_improvement += 1
iter += 1
model.get_model().set_params(n_estimators=(best_iter * val_at_num_epoch))
model.fit(X_train, y_train)
print(f'score didn\'t improve for {epochs_without_improvement} epochs - finished training with best score of {best_score}')
return best_score