Esempio n. 1
0
def test_staged_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool)
    preds = []
    for pred in model.staged_predict(test_pool):
        preds.append(pred)
    np.save(PREDS_PATH, np.array(preds))
    return local_canonical_file(PREDS_PATH)
Esempio n. 2
0
def test_staged_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool)
    preds = []
    for pred in model.staged_predict(test_pool):
        preds.append(pred)
    np.save(PREDS_PATH, np.array(preds))
    return local_canonical_file(PREDS_PATH)
# Fit model
model.fit(train_pool)
# Only prediction_type='RawFormulaVal' is allowed with custom `loss_function`
preds_raw = model.predict(X_test, prediction_type='RawFormulaVal')

# ### 3.7 Staged Predict
# CatBoost model has `staged_predict` method. It allows you to iteratively get predictions for a given range of trees.

# In[29]:

model = CatBoostClassifier(iterations=10,
                           random_seed=42,
                           logging_level='Silent').fit(train_pool)
ntree_start, ntree_end, eval_period = 3, 9, 2
predictions_iterator = model.staged_predict(validate_pool, 'Probability',
                                            ntree_start, ntree_end,
                                            eval_period)
for preds, tree_count in zip(predictions_iterator,
                             range(ntree_start, ntree_end, eval_period)):
    print('First class probabilities using the first {} trees: {}'.format(
        tree_count, preds[:5, 1]))

# ### 3.8 Feature Importances
# Sometimes it is very important to understand which feature made the greatest contribution to the final result. To do this, the CatBoost model has a `get_feature_importance` method.

# In[30]:

model = CatBoostClassifier(iterations=50,
                           random_seed=42,
                           logging_level='Silent').fit(train_pool)
feature_importances = model.get_feature_importance(train_pool)