def determine_errors(df_val, tree, ml_task): predictions = decision_tree_predictions(df_val, tree) actual_values = df_val.label if ml_task == "regression": # mean squared error return ((predictions - actual_values) **2).mean() else: # number of errors return sum(predictions != actual_values)
def random_forest_predictions(test_df, forest): df_predictions = {} for i in range(len(forest)): column_name = "tree_{}".format(i) predictions = decision_tree_predictions(test_df, tree=forest[i]) df_predictions[column_name] = predictions df_predictions = pd.DataFrame(df_predictions) random_forest_predictions = df_predictions.mode(axis=1)[0] return random_forest_predictions
def random_forest_predictions(test_df, forest): df_predictions = {} for i in range(len(forest)): column_name = "tree_{}".format(i) predictions = decision_tree_predictions(test_df, tree=forest[i]) # keys and values df_predictions[column_name] = predictions # transform the dictionary into dataframe. # rows:index of datapts; columns: prediction of each trees df_predictions = pd.DataFrame(df_predictions) # vote for most. Type of predicion is pandas.core.series.Series random_forest_predictions = df_predictions.mode(axis=1)[0] return random_forest_predictions
def random_forest_predictions(test_df, forest, ml_task): df_predictions = {} for i in range(len(forest)): column_name = "tree_{}".format(i) predictions = decision_tree_predictions(test_df, tree=forest[i]) df_predictions[column_name] = predictions df_predictions = pd.DataFrame(df_predictions) if ml_task == "regression": random_forest_predictions = df_predictions.mean(axis=1) else: random_forest_predictions = df_predictions.mode(axis=1)[0] return random_forest_predictions