def main(): use_feature_index = [2, 3] iris = datasets.load_iris() X = iris.data[:, use_feature_index] y = iris.target feature_names = np.array(iris.feature_names)[use_feature_index] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) n_estimators = 50 rf = RandomForest(n_estimators=n_estimators, random_state=300) rf.fit(X_train, y_train) score_m = rf.score(X_test, y_test) # scoreの出力 print("-" * 50) print("score:" + str(score_m)) # 特徴量の重要度の出力 print("-" * 50) f_importance_m = rf.feature_importances print("feature importances:") for f_name, f_importance in zip(feature_names, f_importance_m): print(" ", f_name, ":", f_importance) # 決定した領域の出力 plt = PlotResult(rf, X_train, y_train, X_test, y_test, feature_names, "my_random_forest") plt.plot_result()
def analyze_RF(X, y): X_train, X_test, y_train, y_test = train_test_split(X, y) arr = list(range(50, 150, 10)) scores = [] for e in arr: model = RandomForest(num_trees=e, num_features=3) model.fit(X_train, y_train) scores.append(model.score(X_test, y_test)) fig, ax = plt.subplots() ax.plot(arr, scores) return scores, arr