예제 #1
0
 def tune_rf(self, X, y):
     msg.print_line()
     msg.tune_rf_message()
     estimators = None
     features = None
     leaf = None
     msg.loading_message()
     rf_params = self.mysql_cn.read('select * from params_rf;')
     n_estimators = rf_params['n_estimators'].tolist()
     max_features = rf_params['max_features'].tolist()
     min_samples_leaf = rf_params['min_samples_leaf'].tolist()
     if not n_estimators or not max_features or not min_samples_leaf:
         msg.tuning_message()
         param_grid = {
             'n_estimators': [10],
             'max_features': ['auto', 'sqrt', 'log2'],
             'min_samples_leaf': [1, 5, 10]
         }
         CV_rf = GridSearchCV(estimator=RF(), param_grid=param_grid, cv=5)
         CV_rf.fit(X, y)
         rf_param = CV_rf.best_params_
         n_estimators = rf_param['n_estimators']
         max_features = rf_param['max_features']
         min_samples_leaf = rf_param['min_samples_leaf']
         msg.print_rf_params(n_estimators, max_features, min_samples_leaf)
         msg.insert_message()
         self.mysql_cn.insert_update("INSERT INTO params_rf(n_estimators, max_features, min_samples_leaf) "
                                     "VALUES(%d, '%s', %d)" % (n_estimators, max_features, min_samples_leaf))
         return (n_estimators, max_features, min_samples_leaf)
     elif self.check_tune[0]:
         msg.tuning_message()
         param_grid = {
             'n_estimators': [10, 100],
             'max_features': ['auto', 'sqrt', 'log2'],
             'min_samples_leaf': [1, 5, 10]
         }
         CV_rf = GridSearchCV(estimator=RF(), param_grid=param_grid, cv=5)
         CV_rf.fit(X, y)
         rf_param = CV_rf.best_params_
         n_estimators = rf_param['n_estimators']
         max_features = rf_param['max_features']
         min_samples_leaf = rf_param['min_samples_leaf']
         msg.print_rf_params(n_estimators, max_features, min_samples_leaf)
         msg.update_message()
         self.mysql_cn.insert_update(
             "UPDATE params_rf SET n_estimators = %d, max_features = '%s', min_samples_leaf = %d"
                     % (n_estimators, max_features, min_samples_leaf))
         return (n_estimators, max_features, min_samples_leaf)
     else:
         msg.loading_message()
         new_rf_params = self.mysql_cn.read('select * from params_rf;')
         estimators = new_rf_params['n_estimators'].tolist()
         features = new_rf_params['max_features'].tolist()
         leaf = new_rf_params['min_samples_leaf'].tolist()
         n_estimators = estimators[0]
         max_features = features[0]
         min_samples_leaf = leaf[0]
         msg.print_rf_params(n_estimators, max_features, min_samples_leaf)
         return (n_estimators, max_features, min_samples_leaf)
예제 #2
0
 def tune_svm(self, X, y):
     msg.print_line()
     msg.tune_svm_message()
     C_range = np.logspace(-2, 2, 9)
     gamma_range = np.logspace(-2, 2, 9)
     param_grid = [{'kernel': ['rbf'], 'gamma': gamma_range, 'C': C_range}]
     msg.loading_message()
     svm_params = self.mysql_cn.read('select * from params_svm;')
     kernel = svm_params['kernel'].tolist()
     c = svm_params['c'].tolist()
     gamma = svm_params['gamma'].tolist()
     if not kernel:
         msg.tuning_message()
         CV_svm = GridSearchCV(SVC(), param_grid=param_grid, cv=5)
         CV_svm.fit(X, y)
         svm_params = CV_svm.best_params_
         kernel = svm_params['kernel']
         c = svm_params['C']
         gamma = svm_params['gamma']
         msg.print_svm_params(kernel, c, gamma)
         msg.insert_message()
         self.mysql_cn.insert_update("INSERT INTO params_svm(kernel, c, gamma) "
                                     "VALUES('%s', %s, %s)" % (kernel, c, gamma))
         return (kernel, c, gamma)
     elif self.check_tune[0]:
         msg.tuning_message()
         CV_svm = GridSearchCV(SVC(), param_grid=param_grid, cv=5)
         CV_svm.fit(X, y)
         svm_params = CV_svm.best_params_
         kernel = svm_params['kernel']
         c = svm_params['C']
         gamma = svm_params['gamma']
         msg.print_svm_params(kernel, c, gamma)
         msg.update_message()
         self.mysql_cn.insert_update(
             "UPDATE params_svm SET kernel = '%s', c = %s, gamma = %s"
             % (kernel, c, gamma))
         return (kernel, c, gamma)
     else:
         msg.loading_message()
         new_svm_params = self.mysql_cn.read('select * from params_svm;')
         kernel = new_svm_params['kernel'].tolist()
         c = new_svm_params['c'].tolist()
         gamma = new_svm_params['gamma'].tolist()
         msg.print_svm_params(kernel[0], c[0], gamma[0])
         return (kernel[0], c[0], gamma[0])
예제 #3
0
def cv_predict(X, y, clf_class, **kwargs):
    # Construct a kfolds object
    kf = KFold(len(y), n_folds=10, shuffle=True)
    y_prob = np.zeros((len(y), 2))
    # Iterate through folds
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        y_train = y[train_index]
        # Initialize a classifier with key word arguments
        clf = clf_class(**kwargs)
        clf.fit(X_train, y_train)
        y_prob[test_index] = clf.predict_proba(X_test)
    return y_prob, clf

msg.print_line()
msg.calculate_probs_message()
pred_prob, clf = cv_predict(X, y, KNN, n_neighbors=k)
pred_churn = pred_prob[:, 1]

joblib.dump(clf, 'D:\SLIIT\SoftwareIndustry\knn_model.pkl', compress=1)

# Number of times a predicted probability is assigned to an observation
counts = pandas.value_counts(np.ndarray.round(pred_churn, 3))
counts = pandas.concat([counts], axis=1).reset_index()

counts.columns = ['pred_prob', 'count']
print(counts)

df1 = pandas.DataFrame(counts).sort_values(by='pred_prob')
counts_list = df1.values.tolist()