#获取测试数据 ##testtree = ElementTree.parse(r"D:\Program Files\Python\Sample.xml") testtree = ElementTree.parse(r"D:\QAhomework\testset\testset.xml") #buildroot用于以xml形式保存测试数据stage2格式的生成文档 buildroot = ElementTree.Element("buildroot") i=0 print i clf = QClassifierImpl(train_data_path = '../data/pair.xml') clf.train() print i fo = open("testwrite.txt", "a+") for question in testtree.iter('question'): testtags = jieba.analyse.extract_tags(question.getchildren ()[0].text, topK=5) testkeys=" ".join(testtags) ## print testkeys ## testtype= classifier.classify(gender_features(testkeys)) ## print testtype #在buildroot建立子节点testquestion testquestion = ElementTree.SubElement(buildroot, "testquestion") #设置testquestion的各个属性
# -*- coding: utf-8 -*- import sys, re, nltk from QClassifier import QClassifierImpl import jieba.analyse from xml.etree import ElementTree import xml.dom.minidom reload(sys) sys.setdefaultencoding('utf8') testtree = ElementTree.parse("../data/testset.xml") root = testtree.getroot() buildroot = ElementTree.Element("QuestionSet") clf = QClassifierImpl(train_data_path='../data/pair.xml') # clf = QClassifierImpl(train_data_path = 'train.xml') clf.train() for question in root.getchildren(): q_text = question.getchildren()[0].text testtags = jieba.analyse.extract_tags(q_text, topK=5) testkeys = ' '.join(testtags) testquestion = ElementTree.SubElement(buildroot, 'question') testquestion.set('id', question.get('id')) # question text q = ElementTree.SubElement(testquestion, 'q') q.text = q_text # question category
# -*- coding: utf-8 -*- import sys, re, nltk from QClassifier import QClassifierImpl import jieba.analyse from xml.etree import ElementTree import xml.dom.minidom reload(sys) sys.setdefaultencoding('utf8') testtree = ElementTree.parse("../data/testset.xml") root = testtree.getroot() buildroot = ElementTree.Element("QuestionSet") clf = QClassifierImpl(train_data_path = '../data/pair.xml') # clf = QClassifierImpl(train_data_path = 'train.xml') clf.train() for question in root.getchildren(): q_text = question.getchildren()[0].text testtags = jieba.analyse.extract_tags(q_text, topK=5) testkeys = ' '.join(testtags) testquestion = ElementTree.SubElement(buildroot, 'question') testquestion.set('id', question.get('id')) # question text q = ElementTree.SubElement(testquestion, 'q') q.text = q_text # question category
# -*- coding: utf-8 -*- from QClassifier import QClassifierImpl # clf = QClassifierImpl(train_data_path = 'train.xml') clf = QClassifierImpl(train_data_path='../data/pair.xml') clf.train() print clf.get_type(u'西班牙的首都是哪座城市?') print clf.get_type(u'飞轮海中的吴尊现在多大了?') print clf.get_type(u'小美指的是哪位明星?') print clf.get_type(u'冯小刚的现任妻子是谁?') print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?') print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?') print clf.get_type(u'唱感动天感动地的人是那个歌手?')
stdi, stdo, stde = sys.stdin, sys.stdout, sys.stderr reload(sys) sys.setdefaultencoding('utf8') sys.stdin, sys.stdout, sys.stderr = stdi, stdo, stde print sys.getdefaultencoding() #获取测试数据 ##testtree = ElementTree.parse(r"D:\Program Files\Python\Sample.xml") testtree = ElementTree.parse(r"D:\QAhomework\testset\testset.xml") #buildroot用于以xml形式保存测试数据stage2格式的生成文档 buildroot = ElementTree.Element("buildroot") i = 0 print i clf = QClassifierImpl(train_data_path='../data/pair.xml') clf.train() print i fo = open("testwrite.txt", "a+") for question in testtree.iter('question'): testtags = jieba.analyse.extract_tags(question.getchildren()[0].text, topK=5) testkeys = " ".join(testtags) ## print testkeys ## testtype= classifier.classify(gender_features(testkeys)) ## print testtype #在buildroot建立子节点testquestion testquestion = ElementTree.SubElement(buildroot, "testquestion") #设置testquestion的各个属性
# -*- coding: utf-8 -*- from QClassifier import QClassifierImpl # clf = QClassifierImpl(train_data_path = 'train.xml') clf = QClassifierImpl(train_data_path = '../data/pair.xml') clf.train() print clf.get_type(u'西班牙的首都是哪座城市?') print clf.get_type(u'飞轮海中的吴尊现在多大了?') print clf.get_type(u'小美指的是哪位明星?') print clf.get_type(u'冯小刚的现任妻子是谁?') print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?') print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?') print clf.get_type(u'唱感动天感动地的人是那个歌手?')