Exemple #1
0
def test_doc2words():
    contents = load_positive_contents()
    cfy = Classify()
    for i in range(1):
        doc = contents.next()
        print(doc)
        for word in cfy.doc2words(doc):
            print(word),
Exemple #2
0
def test_gen_dict():
    contents = load_positive_contents()
    cfy = Classify()
    word_lists = map(cfy.doc2words, contents)
    cfy.gen_dict(word_lists)
    for tokenid, docfreq in cfy.dict.dfs.items():
        word = cfy.dict.get(tokenid)
        print tokenid, docfreq, word
Exemple #3
0
def test_negative_contents():
    contents = load_positive_contents()
    cfy = Classify(500, 0.99, 0.2, 0.05)
    cfy.train(contents)

    contents = load_negative_contents()
    r = cfy.predict(contents)
    r = map(int, r)
    print(r.count(-1), r.count(1))
Exemple #4
0
def test_predict():
    contents = load_positive_contents()
    cfy = Classify(feature_num=10)
    cfy.train(contents)

    contents = load_negative_contents()
    cs = [contents.next() for i in range(10)]
    r = cfy.predict(cs)
    print(r)
Exemple #5
0
def test_doc2vector():
    contents = load_positive_contents()
    cfy = Classify()
    word_lists = map(cfy.doc2words, contents)
    cfy.gen_dict(word_lists)
    contents = load_positive_contents()
    matrix = []
    for i in range(10):
        doc = contents.next()
        matrix.append(cfy.doc2vector(doc))
    print(matrix)
Exemple #6
0
 def load_data(self, ):
     temp_cfy = Classify()
     temp_cfy.add_jieba_dict(self.words)
     p_wls = [temp_cfy.doc2words(c) for c in load_positive_contents()]
     n_wls = [temp_cfy.doc2words(c) for c in load_negative_contents()]
     n = len(p_wls) * 80 / 100
     self.train_data = p_wls[:]
     self.test_data = n_wls
     self.test_y = [-1] * len(n_wls)
     for wl in p_wls[n:]:
         self.test_data.append(wl)
         self.test_y.append(1)
Exemple #7
0
 def test(self, params):
     fnum, na, nu, gs = params
     cfy = Classify(fnum, na, nu, gs)
     cfy.add_jieba_dict(self.words)
     cfy.use_dict(self.words)
     cfy.train_by_wls(self.train_data)
     y_pred = cfy.predict_by_wls(self.test_data)
     r = zip(map(int, y_pred), self.test_y)
     tp = r.count((1, 1))
     fp = r.count((1, -1))
     fn = r.count((-1, 1))
     if (tp == 0):
         precision = 0
         recall = 0
         f1 = 0
     else:
         precision = tp * 1.0 / (tp + fp)
         recall = tp * 1.0 / (tp + fn)
         f1 = 5 * precision * recall / (4 * precision + recall)
     print(fnum, na, nu, gs, precision, recall, f1)
     return (precision, recall, f1)
Exemple #8
0
def test_gen_matrix():
    contents = load_positive_contents()
    cfy = Classify()
    word_lists = map(cfy.doc2words, contents)
    cfy.gen_dict(word_lists)

    contents = load_positive_contents()
    wls = []
    for i in range(20):
        wls.append(cfy.doc2words(contents.next()))
    matrix = cfy.gen_matrix(wls)
    print matrix.toarray()
Exemple #9
0
def test_save_load():
    contents = load_positive_contents()
    cfy = Classify(500, 0.99, 0.2, 0.05)
    cfy.train(contents)

    contents = load_negative_contents()
    cs = [contents.next() for i in range(10)]
    r = cfy.predict(cs)

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'save')
    cfy.save(path)

    cfy1 = load_cfy(path)
    r1 = cfy1.predict(cs)
    for i in range(10):
        if r[i] != r1[i]:
            print("save load error, got different svm")