query_in_word_set = [] ques_mark = [] for i in range(0, len(lines)): l = lines[i] ge = json.loads(l) que = SnowNLP(ge.get('query', '')).han query.append(que) query_id.append(ge.get('query_id', '')) pas = SnowNLP(ge.get('passage', '')).han try: pas = pas.split('?')[-1].strip() except: pass pas = pas.replace(que, '') if len(pas) < 2: pas = SnowNLP(ge.get('passage', '')).han print(ge.get('passage', '')) print(ge.get('query', '')) print(ge.get('answer', '')) print('---') passage.append(pas) answer.append(ge.get('answer', '')) gege = clear_alternatives(ge.get('alternatives', '')) alternatives.append(gege) passage_len.append(1 if len(passage) > 150 else len(passage) / 150) ques_mark.append(1 if '?' in pas else 0) char_set, char = in_char(que, pas)