from sklearn.metrics import confusion_matrix conf = confusion_matrix(test["Label"], y_pred["PredictedLabel"]) print(conf) try: trees2 = rx_fast_trees( "Label ~ age + fnlwgt + educationnum + capitalgain + capitalloss + education", data=train) except Exception as e: print(e) trees2 = rx_fast_trees( "Label ~ age + fnlwgt + educationnum + capitalgain + capitalloss + education_cat", data=train, ml_transforms=[categorical(cols=dict(education_cat="education"))]) y_pred2 = rx_predict(trees2, test) conf = confusion_matrix(test["Label"], y_pred2["PredictedLabel"]) print(conf) cats = {} for col in [ "workclass", "education", "maritalstatus", "occupation", "relationship", "race", "sex", "nativecountry" ]: cats[col + "_cat"] = col formula = "Label ~ age + fnlwgt + educationnum + capitalgain + capitalloss +" + \ " + ".join(sorted(cats.keys())) print(cats)
# # To answer that question, we need to add the wine color # as a new feature. Because it is a categorical feature, we # need to convert it into a numerical one. # We use the transform :epkg:`microsoftml:categorical` # to convert column *color* into *color_num*. from microsoftml import categorical cols = list(wines.columns.drop(["quality", "color"])) # We still drop column color. cols.append("color_num") # But we add the new one. model = rx_fast_trees( "quality ~" + "+".join(cols), data=wines_train, method="regression", ml_transforms=[categorical(cols=dict(color_num="color"))]) pred = rx_predict(model, wines_test, extra_vars_to_write=["quality"]) r2_color = r2_score(pred.quality, pred.Score) print("R2 with colors=", r2_color) ##################################### # Performance is not better. Let's confirm that with # the feature importances. feature_importance = [(k, v) for k, v in model.summary_["keyValuePairs"].items()] import numpy fig, ax = plt.subplots(1, 1) ind = numpy.arange(len(feature_importance)) ax.barh(ind, [f[1] for f in feature_importance], 0.35)