print("平均同义层过滤后实体个数: %s"%(tf/qnum)) if __name__ == '__main__': print(__doc__) parser = OptionParser() parser.add_option("-d", "--data", dest="data",help="选择数据集") parser.add_option("-t", "--level1", dest="level1",help="同义层层数") parser.add_option("-r", "--level2", dest="level2",help="同义层层数") (options, args) = parser.parse_args() print("载入数据") questions = "" if options.data == "ya": questions = data.get_data() elif options.data == "duc": questions = data.get_duc() else: logging.error("载入数据出现错误") sys.exit(1) length = len(questions) level1 = int(options.level1) level2 = int(options.level2) exp(questions,length,level1,level2) print("数据集%s 数据长度 %s, 同义层层数 %s 关联层层数 %s"%(options.data,length,level1,level2))
def exp(questions, qnum): tratio, tquantity, te, tf = 0, 0, 0, 0 for i in range(qnum): print("问题 %s" % (i)) q = questions[i] ose = dummy2(q, 250) result = ose.extract() ose.evaluation(result, 'dumm1') if __name__ == '__main__': print(__doc__) parser = OptionParser() parser.add_option("-d", "--data", dest="data", help="选择数据集") (options, args) = parser.parse_args() print("载入数据") questions = "" if options.data == "ya": questions = data.get_data() elif options.data == "duc": questions = data.get_duc() else: logging.error("载入数据出现错误") sys.exit(1) exp(questions, len(questions))
from insummer.common_type import Question from insummer.util import rule_based_sentence_cleaner as RBSC from insummer.util import NLP import random def rand() : return random.random() nlp = NLP() rbsc = RBSC() print("读数据") questions = get_duc() def main(): cand_sent = [] origin_length = 0 new_length = 0 for question in questions: for ans in question.get_nbest(): content = ans.get_content() origin_length += nlp.sentence_length(content) sents = nlp.sent_tokenize(content)