def train_data(): x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data() knn = KNeighborsClassifier() indices = np.random.permutation(len(x_data)) x_train = x_data y_train = y_data x_test = x_data[indices[-TEST_DATA_ROWS:]] y_test = y_data[indices[-TEST_DATA_ROWS:]] knn.fit(x_train, y_train) # start training print 'training data count:', len(indices), ' number of zones:', zone_cnt test_result = knn.predict(x_test) # test prob_test_result = knn.predict_proba(x_test) print prob_test_result # no duplicate value, so reverse this dictionary int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys())) print 'predict result:', test_result, [int_zone_dict[x] for x in test_result] # test result print 'ground truth:', y_test, [int_zone_dict[x] for x in y_test] # ground truth cnt = 0 for i in range(TEST_DATA_ROWS): if test_result[i] == y_test[i]: cnt += 1 print 'accurate rate', cnt * 1.0 / TEST_DATA_ROWS from sklearn.cross_validation import cross_val_score print cross_val_score(knn, x_train, y_train)
import numpy as np from util import get_x_y_data from sklearn.cluster import KMeans TEST_DATA_ROWS = 20 # class sklearn.cluster.KMeans # (n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1) x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data() # no duplicate value, so reverse this dictionary int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys())) kmeans = KMeans(n_clusters=zone_cnt) # a,b,c,d,e 5 centor kmeans.fit(x_data) print kmeans.get_params() # centers print kmeans.cluster_centers_ # every lable for cluster print kmeans.labels_ # the smaller inertia is, the better the classifier works print kmeans.inertia_ indices = np.random.permutation(len(x_data)) x_test = x_data[indices[-TEST_DATA_ROWS:]] x_distance = kmeans.transform(x_test) test_result = kmeans.predict(x_test) # test for type, dis in zip(test_result, x_distance):