def test_auto_norm(self):
     date_mat, date_label = knn.file2matrix('datingTestSet2.txt')
     norm_mat, ranges, min_val = knn.auto_norm(date_mat)
     min_exp = np.array([0., 0., 0.001156])
     ranges_exp = np.array([9.1273000e+04, 2.0919349e+01, 1.6943610e+00])
     self.assertEqual(True, (min_exp == min_val).all())
     self.assertEqual(True, (ranges == ranges_exp).all())
Esempio n. 2
0
def classify_gui(k):
    """
    :param k: k值
    :return:人群分类
    """
    data_mat, class_label_vector = file_to_matrix('../data/dating_test_set_2.txt')
    fly_distances = float(input("请输入飞行里程数:"))
    icecream = float(input("请输入消耗冰淇淋公升数:"))
    play_time = float(input("请输入玩游戏花费时间百分比:"))
    norm_data_set, ranges, min_vals = auto_norm(data_mat)
    data_person=np.array([fly_distances,icecream,play_time])
    norm_person_data=(data_person-min_vals)/ranges
    class_person=['不喜欢','一般','极具魅力']
    label_person=classify(norm_person_data,norm_data_set,class_label_vector,k)
    return class_person[label_person-1]
Esempio n. 3
0
def classify_person():
    """
    对给定的数据进行人群分类判断
    :return:
    """
    #定义人群分类:[0,1,2]
    ff_miles=float(input("每年飞行常客里程数:"))
    ice_cream=float(input('每周消耗的冰淇淋公升数:'))
    percent_game=float(input('玩游戏所消耗的时间百分比:'))
    data_mat,class_label_vector=file_to_matrix('../data/dating_test_set_2.txt')
    norm_dating_data_set,ranges,min_vals=auto_norm(data_mat)
    in_x=np.array([ff_miles,ice_cream,percent_game])   #待验证数据

    norm_in_x=(in_x-min_vals)/(ranges)
    classify_result=classify(norm_in_x,norm_dating_data_set,class_label_vector,3)
    return classify_result
Esempio n. 4
0
def dating_class_test():
    """
    应用测试集测试分类机的错误率
    :return: None
    """
    hold_out_ratio = 0.10  #拿出作为测试集的数据比例
    data_set,labels = file_to_matrix('./data/dating_test_set_2.txt')
    norm_data_set,ranges,min_vals=auto_norm(data_set)
    size=norm_data_set.shape[0]  #获得数据集行数
    num_test_size = int(size * hold_out_ratio)   #保留行数
    error_count = 0.0  #错误统计
    for i in range(num_test_size):
        classifier_result=classify(norm_data_set[i,:],norm_data_set[num_test_size:size],labels[num_test_size:size],5)
        print('分类器返回:%d, 真实答案为:%d'% (classifier_result,labels[i]))
        if classifier_result!= labels[i]:
            error_count+=1.0
    print('分类器错误率为:%0.2f%%' % (error_count/(float(num_test_size))*100))
Esempio n. 5
0
    def test_main_dating(self):
        test_ratio = 0.50
        dataset_matrix, labels = knn.read_matrix('dating/dataset.txt')
        norm_matrix, ranges, min_value = knn.auto_norm(dataset_matrix)
        size = norm_matrix.shape[0]
        test_num = int(size * test_ratio)
        err_count = 0.0

        for i in range(test_num):
            classifier_result = knn.classify0(norm_matrix[i, :],
                                              norm_matrix[test_num:size,:],
                                              labels[test_num:size],
                                              3)
            print "predict: %d real: %d" % (classifier_result, labels[i])
            if classifier_result != labels[i]:
                err_count += 1.0
                
        err_rate = err_count / float(test_num)
        print 'total: %d error: %d rate: %f' % (test_num, err_count, err_rate)