Esempio n. 1
0
def train_data():
    x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data()

    knn = KNeighborsClassifier()

    indices = np.random.permutation(len(x_data))
    x_train = x_data
    y_train = y_data
    x_test = x_data[indices[-TEST_DATA_ROWS:]]
    y_test = y_data[indices[-TEST_DATA_ROWS:]]
    knn.fit(x_train, y_train)  # start training
    print 'training data count:', len(indices), ' number of zones:', zone_cnt
    test_result = knn.predict(x_test)  # test
    prob_test_result = knn.predict_proba(x_test)
    print prob_test_result

    # no duplicate value, so reverse this dictionary
    int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys()))

    print 'predict result:', test_result, [int_zone_dict[x] for x in test_result]  # test result
    print 'ground truth:', y_test, [int_zone_dict[x] for x in y_test]  # ground truth
    cnt = 0
    for i in range(TEST_DATA_ROWS):
        if test_result[i] == y_test[i]:
            cnt += 1
    print 'accurate rate', cnt * 1.0 / TEST_DATA_ROWS

    from sklearn.cross_validation import cross_val_score
    print cross_val_score(knn, x_train, y_train)
Esempio n. 2
0
import numpy as np
from util import get_x_y_data
from sklearn.cluster import KMeans

TEST_DATA_ROWS = 20

# class sklearn.cluster.KMeans
# (n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1)

x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data()
# no duplicate value, so reverse this dictionary
int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys()))

kmeans = KMeans(n_clusters=zone_cnt)  # a,b,c,d,e  5 centor
kmeans.fit(x_data)
print kmeans.get_params()
# centers
print kmeans.cluster_centers_

# every lable for cluster
print kmeans.labels_

# the smaller inertia is, the better the classifier works
print kmeans.inertia_

indices = np.random.permutation(len(x_data))
x_test = x_data[indices[-TEST_DATA_ROWS:]]
x_distance = kmeans.transform(x_test)
test_result = kmeans.predict(x_test)  # test

for type, dis in zip(test_result, x_distance):