Beispiel #1
0
#在buildroot建立子节点testquestion
      testquestion = ElementTree.SubElement(buildroot, "testquestion")  
#设置testquestion的各个属性
      testquestion_id = ElementTree.SubElement(testquestion, "id")  
      testquestion_id.text = str(i)
##      print testquestion_id.text
      fo.write( testquestion_id.text);
      
      testquestion_q = ElementTree.SubElement(testquestion, "q")  
      testquestion_q.text = str(question.getchildren ()[0].text.decode('utf8'))
##      print testquestion_q.text
      fo.write( testquestion_q.text);
      
      testquestion_category = ElementTree.SubElement(testquestion, "category")  
##      testquestion_category.text = str(testtype)
      testquestion_category.text = clf.get_type(question.getchildren ()[0].text)
##      print testquestion_category.text
      fo.write( testquestion_category.text);
      
      testquestion_query = ElementTree.SubElement(testquestion, "query")  
      testquestion_query.text = str(testkeys.decode('utf8'))
##      print testquestion_query.text
      fo.write( testquestion_query.text);
      
##      testquestion_query.text = str(testkeys.encode('utf8'))
##      print testquestion_query.text      
      i=i+1


#将输出文档写入uu.xml
buildtree = ElementTree.ElementTree(buildroot)  
Beispiel #2
0
buildroot = ElementTree.Element("QuestionSet")

clf = QClassifierImpl(train_data_path='../data/pair.xml')
# clf = QClassifierImpl(train_data_path = 'train.xml')
clf.train()

for question in root.getchildren():
    q_text = question.getchildren()[0].text
    testtags = jieba.analyse.extract_tags(q_text, topK=5)
    testkeys = ' '.join(testtags)

    testquestion = ElementTree.SubElement(buildroot, 'question')
    testquestion.set('id', question.get('id'))
    # question text
    q = ElementTree.SubElement(testquestion, 'q')
    q.text = q_text

    # question category
    category = clf.get_type(q_text)
    cate_tree = ElementTree.SubElement(testquestion, 'category')
    cate_tree.text = category

    # query word
    query = ElementTree.SubElement(testquestion, 'query')
    query.text = testkeys
    # break

xml_string = ElementTree.tostring(buildroot, encoding='utf-8')
xml = xml.dom.minidom.parseString(xml_string)
print xml.toprettyxml()
Beispiel #3
0
buildroot = ElementTree.Element("QuestionSet")

clf = QClassifierImpl(train_data_path = '../data/pair.xml')
# clf = QClassifierImpl(train_data_path = 'train.xml')
clf.train()

for question in root.getchildren():
      q_text = question.getchildren()[0].text
      testtags = jieba.analyse.extract_tags(q_text, topK=5)
      testkeys = ' '.join(testtags)

      testquestion = ElementTree.SubElement(buildroot, 'question')
      testquestion.set('id', question.get('id'))
      # question text
      q = ElementTree.SubElement(testquestion, 'q')
      q.text = q_text

      # question category
      category = clf.get_type(q_text)
      cate_tree = ElementTree.SubElement(testquestion, 'category')
      cate_tree.text = category

      # query word
      query = ElementTree.SubElement(testquestion, 'query')
      query.text = testkeys
      # break

xml_string = ElementTree.tostring(buildroot, encoding = 'utf-8')
xml = xml.dom.minidom.parseString(xml_string)
print xml.toprettyxml()
Beispiel #4
0
# -*- coding: utf-8 -*-
from QClassifier import QClassifierImpl

# clf = QClassifierImpl(train_data_path = 'train.xml')
clf = QClassifierImpl(train_data_path='../data/pair.xml')
clf.train()
print clf.get_type(u'西班牙的首都是哪座城市?')
print clf.get_type(u'飞轮海中的吴尊现在多大了?')
print clf.get_type(u'小美指的是哪位明星?')
print clf.get_type(u'冯小刚的现任妻子是谁?')
print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?')
print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?')
print clf.get_type(u'唱感动天感动地的人是那个歌手?')
Beispiel #5
0
    #在buildroot建立子节点testquestion
    testquestion = ElementTree.SubElement(buildroot, "testquestion")
    #设置testquestion的各个属性
    testquestion_id = ElementTree.SubElement(testquestion, "id")
    testquestion_id.text = str(i)
    ##      print testquestion_id.text
    fo.write(testquestion_id.text)

    testquestion_q = ElementTree.SubElement(testquestion, "q")
    testquestion_q.text = str(question.getchildren()[0].text.decode('utf8'))
    ##      print testquestion_q.text
    fo.write(testquestion_q.text)

    testquestion_category = ElementTree.SubElement(testquestion, "category")
    ##      testquestion_category.text = str(testtype)
    testquestion_category.text = clf.get_type(question.getchildren()[0].text)
    ##      print testquestion_category.text
    fo.write(testquestion_category.text)

    testquestion_query = ElementTree.SubElement(testquestion, "query")
    testquestion_query.text = str(testkeys.decode('utf8'))
    ##      print testquestion_query.text
    fo.write(testquestion_query.text)

    ##      testquestion_query.text = str(testkeys.encode('utf8'))
    ##      print testquestion_query.text
    i = i + 1

#将输出文档写入uu.xml
buildtree = ElementTree.ElementTree(buildroot)
buildtree.write("testsetstep2.xml")
Beispiel #6
0
# -*- coding: utf-8 -*-
from QClassifier import QClassifierImpl

# clf = QClassifierImpl(train_data_path = 'train.xml')
clf = QClassifierImpl(train_data_path = '../data/pair.xml')
clf.train()
print clf.get_type(u'西班牙的首都是哪座城市?')
print clf.get_type(u'飞轮海中的吴尊现在多大了?')
print clf.get_type(u'小美指的是哪位明星?')
print clf.get_type(u'冯小刚的现任妻子是谁?')
print clf.get_type(u'哪场战役代表美国正式卷入第二次世界大战?')
print clf.get_type(u'《三国演义》中“官渡之战”中交战双方的指挥官分别是?')
print clf.get_type(u'唱感动天感动地的人是那个歌手?')