def modelBuilding(X, y, cat_features): X_train, X_validation, y_train, y_validation = train_test_split(X,\ y, train_size=0.8, random_state=1234) model = CatBoostClassifier(iterations=2000, learning_rate=0.01, task_type="GPU" #loss_function='CrossEntropy' ) model.fit(X_train, y_train, cat_features=cat_features, eval_set=(X_validation, y_validation), verbose=True) print('Model is fitted: ' + str(model.is_fitted())) print('Model params:') print(model.get_params()) return model
validation_pool = Pool(data=X_validation, label=y_validation, cat_features=cat_features) ####################### # BETTER/BEST MODEL ####################### # Note: You can tinker with learning rates model = CatBoostClassifier( iterations=5, learning_rate=0.1, ) model.fit(train_pool, eval_set=validation_pool, verbose=False) # Print model info print('Model is fitted: {}'.format(model.is_fitted())) print('Model params:\n{}'.format(model.get_params())) # Choose the best iteration # Note: There is a parameter: use_best_model ( = True or False) model = CatBoostClassifier(iterations=100, ) model.fit( train_pool, eval_set=validation_pool, verbose=False, ) print('Tree count: ' + str(model.tree_count_))
from catboost import CatBoostClassifier clf = CatBoostClassifier( iterations=2000, learning_rate=0.1, #loss_function='CrossEntropy' ) clf.fit(X_train, y_train, cat_features=categorical_columns, eval_set=(X_val, y_val), verbose=False) print('CatBoost model is fitted: ' + str(clf.is_fitted())) print('CatBoost model parameters:') print(clf.get_params()) predictions = clf.predict(X_val) print("accuracy_score", accuracy_score(y_val, predictions)) predictions_probas = clf.predict_proba(X_val) print("roc-auc score for the class 1, from target 'HasDetections' ", roc_auc_score(y_val, predictions_probas[:, 1])) val_cnf_matrix = confusion_matrix(y_val, predictions) sns.heatmap(val_cnf_matrix, annot=True, fmt='.2f', cmap="BrBG").set_title("Validation") plt.show()
# In[12]: from catboost import CatBoostClassifier model = CatBoostClassifier( iterations=5, learning_rate=0.1, # loss_function='CrossEntropy' ) model.fit( X_train, y_train, cat_features=cat_features, eval_set=(X_validation, y_validation), verbose=False ) print('Model is fitted: ' + str(model.is_fitted())) print('Model params:') print(model.get_params()) # In[13]: from catboost import CatBoostClassifier model = CatBoostClassifier( iterations=50, random_seed=63, learning_rate=0.5, custom_loss=['AUC', 'Accuracy'] ) model.fit(