test_file = f_n assert test_file is not None cat = 'other' if 'Chinese' in path or 'Japanese' in path: cat = 'zh' for line in codecs.open(path + '/' + test_file, 'r', encoding='utf-8'): if len(line) < 2: break if '# sentence' in line or '# text' in line: cat = 'gold' reader.get_raw(path, test_file, 'raw_test.txt', cat, form=args.format) raws_test = reader.raw(path + '/raw_test.txt') test_y_gold = reader.test_gold(path + '/' + test_file, form=args.format, is_space=is_space, ignore_mwt=args.ignore_mwt) new_chars = toolbox.get_new_chars(path + '/raw_test.txt', char2idx, is_space) if emb_path is not None: valid_chars = toolbox.get_valid_chars(new_chars + char2idx.keys(), emb_path) else: valid_chars = None char2idx, idx2char, unk_chars_idx, sub_dict = toolbox.update_char_dict( char2idx, new_chars, unk_chars_idx, valid_chars) test_x, max_len_test = toolbox.get_input_vec_raw(
test_file = f_n assert test_file is not None cat = 'other' if 'Chinese' in path or 'Japanese' in path: cat = 'zh' for line in codecs.open(path + '/' + test_file, 'r', encoding='utf-8'): if len(line) < 2: break if '# sentence' in line or '# text' in line: cat = 'gold' reader.get_raw(path, test_file, 'raw_test.txt', cat, form=args.format) raws_test = reader.raw(path + '/raw_test.txt') test_y_gold = reader.test_gold(path + '/' + test_file, form=args.format, is_space=is_space) new_chars = toolbox.get_new_chars(path + '/raw_test.txt', char2idx, is_space) if emb_path is not None: valid_chars = toolbox.get_valid_chars(new_chars + char2idx.keys(), emb_path) else: valid_chars = None char2idx, idx2char, unk_chars = toolbox.update_char_dict( char2idx, new_chars, unk_chars, valid_chars) test_x, max_len_test = toolbox.get_input_vec_raw(
for line in codecs.open(test_language_dir + '/' + test_file, 'r', encoding='utf-8'): if len(line) < 2: break if '# sentence' in line or '# text' in line: cat = 'gold' reader.get_raw(test_language_dir, test_file, 'raw_test.txt', cat, form=args.format) raws_test = reader.raw(test_language_dir + '/raw_test.txt') test_y_gold = reader.test_gold(test_language_dir + '/' + test_file, form=args.format, is_space=is_space, ignore_mwt=args.ignore_mwt) new_chars = toolbox.get_new_chars(test_language_dir + '/raw_test.txt', char2idx, is_space) if emb_path is not None: valid_chars = toolbox.get_valid_chars(new_chars + char2idx.keys(), emb_path) else: valid_chars = None char2idx, idx2char, unk_chars_idx, sub_dict = toolbox.update_char_dict( char2idx, new_chars, unk_chars_idx, valid_chars) test_x1, test_x2, max_len_test = toolbox.get_input_vec_raw_test_new(