Exemple #1
0
# -*- coding: utf-8 -*-
import sys, re, nltk
from QClassifier import QClassifierImpl
import jieba.analyse
from xml.etree import ElementTree
import xml.dom.minidom

reload(sys)
sys.setdefaultencoding('utf8')

testtree = ElementTree.parse("../data/testset.xml")
root = testtree.getroot()

buildroot = ElementTree.Element("QuestionSet")

clf = QClassifierImpl(train_data_path='../data/pair.xml')
# clf = QClassifierImpl(train_data_path = 'train.xml')
clf.train()

for question in root.getchildren():
    q_text = question.getchildren()[0].text
    testtags = jieba.analyse.extract_tags(q_text, topK=5)
    testkeys = ' '.join(testtags)

    testquestion = ElementTree.SubElement(buildroot, 'question')
    testquestion.set('id', question.get('id'))
    # question text
    q = ElementTree.SubElement(testquestion, 'q')
    q.text = q_text

    # question category