Exemple #1
0
    print("平均同义层过滤后实体个数: %s"%(tf/qnum))

if __name__ == '__main__':
    print(__doc__)

    parser = OptionParser()  
    parser.add_option("-d", "--data", dest="data",help="选择数据集")
    parser.add_option("-t", "--level1", dest="level1",help="同义层层数")
    parser.add_option("-r", "--level2", dest="level2",help="同义层层数")
  
    (options, args) = parser.parse_args()

    print("载入数据")
    questions = ""
    if options.data == "ya":
        questions = data.get_data()
    elif options.data == "duc":
        questions = data.get_duc()
    else:
        logging.error("载入数据出现错误")
        sys.exit(1)


    length = len(questions)
    level1 = int(options.level1)
    level2 = int(options.level2)

    
    exp(questions,length,level1,level2)
    print("数据集%s 数据长度 %s, 同义层层数 %s 关联层层数 %s"%(options.data,length,level1,level2))
Exemple #2
0
def exp(questions, qnum):
    tratio, tquantity, te, tf = 0, 0, 0, 0
    for i in range(qnum):
        print("问题 %s" % (i))
        q = questions[i]
        ose = dummy2(q, 250)

        result = ose.extract()
        ose.evaluation(result, 'dumm1')


if __name__ == '__main__':
    print(__doc__)

    parser = OptionParser()
    parser.add_option("-d", "--data", dest="data", help="选择数据集")

    (options, args) = parser.parse_args()

    print("载入数据")
    questions = ""
    if options.data == "ya":
        questions = data.get_data()
    elif options.data == "duc":
        questions = data.get_duc()
    else:
        logging.error("载入数据出现错误")
        sys.exit(1)

    exp(questions, len(questions))
Exemple #3
0
from insummer.common_type import Question
from insummer.util import rule_based_sentence_cleaner as RBSC
from insummer.util import NLP

import random

def rand() :
    return random.random()

nlp = NLP()

rbsc = RBSC()

print("读数据")
questions = get_duc()

def main():

    cand_sent = []

    origin_length = 0
    new_length = 0
    
    for question in questions:
        for ans in question.get_nbest():
            content = ans.get_content()

            origin_length += nlp.sentence_length(content)
            
            sents = nlp.sent_tokenize(content)