def kmeans():
    train_x, test_x, train_y, test_y = data_scan.data_split()
    pca = PCA(n_components=2)
    pca.fit(train_x)
    pca.transform(train_x)
    y_pred = KMeans(n_clusters=2, random_state=2).fit_predict(train_x)
    plt.scatter(train_x[:, 0], train_x[:, 1], c=y_pred)
    plt.show()
def train_and_test():
    train_x, test_x, train_y, test_y = data_scan.data_split()
    classifier = BayesClassifier()
    classifier.train(train_x, train_y)
    # 测试集
    print("验证测试集", test_x.shape)
    correct_size = 0
    for i in range(len(test_y)):
        # print(data)
        test_data_x = test_x[i]
        test_data_y = test_y[i]
        result = classifier.classify(test_data_x.reshape(1, 21))
        print(test_data_x, test_data_y, result[0])
        if result[0] == test_data_y:
            correct_size = correct_size + 1
    print("正确率:%f%%" % (correct_size * 100 / test_x.shape[0]))
def load_total_data():
    train_x, test_x, train_y, test_y = data_split(
        path='../../data/data_preceded.csv')
    threshold = 26
    train = []
    for label in train_y:
        temp = np.zeros(2)
        temp[label] = 1
        train.append(temp.T)
    train_y = np.array(train)
    test = []
    for label in test_y:
        temp = np.zeros(2)
        temp[label] = 1
        test.append(temp.T)
    test_y = np.array(test)
    return (train_x / threshold), (test_x / threshold), train_y, test_y
def load_data(is_load_train_data):
    train_x, test_x, train_y, test_y = data_split(
        path='../../../data/data_preceded.csv')
    # 一定要标准化呀!!!
    threshold = 26
    if is_load_train_data:
        data = train_x
        labels = train_y
    else:
        data = test_x
        labels = test_y
    ls = []
    for label in labels:
        temp = np.zeros(2)
        temp[label] = 1
        ls.append(temp.T)
    labels = np.array(ls)
    return (data / threshold), labels
Example #5
0
# @Author  : Equator
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from code.preprocessing.data_scan import data_split

num_folds = 10
seed = 7
scoring = 'neg_mean_squared_error'


def knn_improve(train_x, train_y):
    scaler = StandardScaler().fit(train_x)
    rescaledX = scaler.transform(train_x)
    param_gird = {'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]}
    model = KNeighborsRegressor()
    fold = KFold(n_splits=num_folds, random_state=seed, shuffle=True)
    grid = GridSearchCV(model, param_gird, scoring=scoring, cv=fold)
    grid_result = grid.fit(X=rescaledX, y=train_y)
    cv_result = zip(grid_result.cv_results_['mean_test_score'],
                    grid_result.cv_results_['std_test_score'],
                    grid_result.cv_results_['params'])
    for mean, std, param in cv_result:
        print("%f (%f) with %r" % (mean, std, param))
    print('最优:%s 使用 %s' % (grid_result.best_score_, grid_result.best_params_))


if __name__ == '__main__':
    train_x, test_x, train_y, test_y = data_split()
    knn_improve(train_x, train_y)