def find_best_random_forest(data, target, cv): clf = RandomForestClassifier(random_state=0) n_estimators = [10, 20, 35, 50, 80, 100, 120, 150, 200] param_grid = [{'n_estimators': n_estimators}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = '随机森林' return grid_search
def find_best_logistic(data, target, cv): clf = LogisticRegression(penalty='l2') C = [0.1, 0.5, 1, 5, 10] param_grid = [{'C': C}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = '逻辑回归' return grid_search
def find_best_svm(data, target, cv): clf = SVC() C = [0.1, 0.5, 1, 5, 10] kernel = ['linear', 'poly', 'rbf'] param_grid = [{'C': C, 'kernel':kernel}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = 'SVM' return grid_search
def find_best_knn(data,target,cv): clf = KNeighborsClassifier() n_neighbors = [1,2,3,5,8,10,15,20,25,30,35,40] weights = ['uniform','distance'] param_grid = [{'n_neighbors': n_neighbors, 'weights': weights}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = 'KNN' return grid_search
def find_best_svm(data, target, cv): clf = SVC() C = [0.1, 0.5, 1, 5, 10] kernel = ['linear', 'poly', 'rbf'] param_grid = [{'C': C, 'kernel': kernel}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = 'SVM' return grid_search
def find_best_knn(data, target, cv): clf = KNeighborsClassifier() n_neighbors = [1, 2, 3, 5, 8, 10, 15, 20, 25, 30, 35, 40] weights = ['uniform', 'distance'] param_grid = [{'n_neighbors': n_neighbors, 'weights': weights}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = 'KNN' return grid_search
def find_best_decisiontree(data, target, cv): clf = DecisionTreeClassifier() criterion = ['gini','entropy'] max_depth = [10, 15, 20, 30, None] min_samples_split = [2, 3, 5, 8, 10] min_samples_leaf = [1, 2, 3, 5, 8] param_grid = [{'criterion': criterion, 'max_depth':max_depth, 'min_samples_split':min_samples_split, 'min_samples_leaf':min_samples_leaf}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = '决策树' return grid_search
def find_best_decisiontree(data, target, cv): clf = DecisionTreeClassifier() criterion = ['gini', 'entropy'] max_depth = [10, 15, 20, 30, None] min_samples_split = [2, 3, 5, 8, 10] min_samples_leaf = [1, 2, 3, 5, 8] param_grid = [{ 'criterion': criterion, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf }] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=cv) grid_search.fit(data, target) grid_search.cls_name = '决策树' return grid_search