예제 #1
0
        query_in_word_set = []
        ques_mark = []
        for i in range(0, len(lines)):
            l = lines[i]
            ge = json.loads(l)

            que = SnowNLP(ge.get('query', '')).han
            query.append(que)

            query_id.append(ge.get('query_id', ''))
            pas = SnowNLP(ge.get('passage', '')).han
            try:
                pas = pas.split('?')[-1].strip()
            except:
                pass
            pas = pas.replace(que, '')

            if len(pas) < 2:
                pas = SnowNLP(ge.get('passage', '')).han
                print(ge.get('passage', ''))
                print(ge.get('query', ''))
                print(ge.get('answer', ''))
                print('---')
            passage.append(pas)

            answer.append(ge.get('answer', ''))
            gege = clear_alternatives(ge.get('alternatives', ''))
            alternatives.append(gege)
            passage_len.append(1 if len(passage) > 150 else len(passage) / 150)
            ques_mark.append(1 if '?' in pas else 0)
            char_set, char = in_char(que, pas)