Example #1
0
 def do_txt_training(self, txt, k):
     dataset, labels = self.txt2dataset(txt)
     dataset = normalize(dataset)
     # 90% for training, 10% for verify
     index = int(0.9 * len(labels))
     training_set = dataset[:index]
     training_lables = labels[:index]
     ref_set = dataset[index:]
     ref_labels = labels[index:]
     # start testing
     errno = 0
     for (x, label) in itertools.izip(ref_set, ref_labels):
         result = classify(x, training_set, training_lables, k)
         msg = 'Data: %s, label: %s, result: %s' % (x, label, result)
         logging.debug(msg)
         if result != label:
             errno += 1
     return float(errno) / len(ref_labels)
import kNN
import numpy

result_array = ['not at all', 'in small doses', 'in large doses']
dataset, labels = kNN.file_to_array('datingTestSet2.txt')
normalize_dataset, min_array, max_array, range_array = kNN.normalize(dataset)

fly_input = float(input('每年获得的飞行常客里程数 >>>'))
game_input = float(input('玩视频游戏所耗时间百分比 >>>'))
icecream_input = float(input('每周消费的冰淇淋公升数 >>>'))
input_array = numpy.array([fly_input, game_input, icecream_input])
normalize_input_array = (input_array - min_array) / range_array
label = kNN.classify(normalize_dataset, labels, normalize_input_array, 5)
print('label %s' % result_array[label - 1])
Example #3
0
            each_line = line.split('\t')
            to_matrix[index, :] = each_line[0:3]
            class_label_vector.append(int(
                each_line[-1]))  # int("label\n") => label: int
            index += 1

    return to_matrix, class_label_vector


if __name__ == "__main__":
    root_path = os.path.dirname(os.path.abspath(__file__))
    data_folder = os.path.join(root_path, "data")
    data_file = os.path.join(data_folder, "dating_DataSet.txt")

    training_set, class_label_vector = file_to_matrix(data_file)
    normalized_training_set, range_values, min_value = kNN.normalize(
        training_set)

    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # ax.scatter(normalized_training_set[:, 0], normalized_training_set[:, 1],
    #             np.array(class_label_vector), np.array(class_label_vector))
    # plt.show()

    # Classifier Test
    k = 5
    test_ratio = 0.15
    data_size = normalized_training_set.shape[0]
    test_set_size = int(data_size * test_ratio)

    error_count = 0
    for i in range(test_set_size):
Example #4
0
def classifyBykNN(dataSet, labels, k, predictX):
    dataSet = kNN.normalize(dataSet)
    return kNN.kNNclassify(dataSet, labels, k, predictX)
Example #5
0
    classes = Rocchio.train(dataSet, labels)
    return Rocchio.predict(classes, predictX)

def classifyByNBC(predictX):
    return NBCpredict.predict(predictX)

def classifyBySVM(predictX):
    return SVMpredict.predict(predictX)

def classifyByANN(predictX):
    return ANNpredict.predict(predictX)

if __name__ == "__main__":
    ''' Classify the document using the given method '''
    ds, labels = loadDataSet(CLASSIFY_FILE, 5400, 967)
    fh = open('D:/e.txt', 'r')
    x = vectorArticleByTFIDF(fh.read())
    fh.close()
    
    ds = kNN.normalize(ds)
    c1 = int(classifyBykNN(ds, labels, 10, x))
    c2 = int(classifyByRocchio(ds, labels, x))
    ##print c1;exit()
    for c in CLASS_LABEL:
        cid = CLASS_LABEL[c][0]
        cname = CLASS_LABEL[c][1]
        if cid == c1:
            print 'Predicted by kNN: %s - %d(%s)' % (c, cid, cname)
        if cid == c2:
            print 'Predicted by Rocchio: %s - %d(%s)' % (c, cid, cname)
Example #6
0
    # Change lines into array
    featureCount = len(lines[0].split()) - 1
    group = np.zeros((len(lines), featureCount))
    labels = []

    for i in range(len(lines)):
        lst = lines[i].split()
        group[i] = np.array(lst[:-1])
        labels.append(lst[-1])

    return (group, labels)


# Get training set
group, labels = createDataSetFromFile('data/dating/training.txt')
group = kNN.normalize(group)

# Try on test set
testGroup, testLabels = createDataSetFromFile('data/dating/test.txt')
testGroup = kNN.normalize(testGroup)

correct = 0
for i in range(testGroup.shape[0]):
    res = kNN.classify0(testGroup[i], group, labels, 3)
    if res == 'didntLike':
        res = '1'
    elif res == 'smallDoses':
        res = '2'
    else:
        res = '3'
    if res == testLabels[i]:
Example #7
0
 def test_dataset():
     group = numpy.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
     labels = ['A', 'A', 'B', 'B']
     x = numpy.array([1, 0.5])
     result = classify(x, normalize(group), labels, 3)
     logging.info(result)