def verify_post_context_rule(): context = {} context[ "line"] = "i start 中文 很好 i center 晚餐 end i [pet-dn] ' abc & def _ g : ood & bad' 翻译 i" context[ "raw_line"] = "I STarT Chinese good I CENTEr dinner END I [PET-DN] 'ABc&DEF_G:Ood&bAd' translate I" rule_engine_context = {} rule_engine_context["bifrost"] = bifrost_simulate() ruleEngine = rule_engine.RuleEngine( "../schema_lib_tech_zh_post_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) ruleEngine.execute(context, True) print context["line"] assert context[ "line"] == "I STarT中文很好I中间晚餐END I[PET-DN]'ABc&DEF_G:Ood&bAd'翻译I" # regression test for infinite loop. print "starting infinite loop test. If the test is not finished in serveral seconds, then something wrong." context[ "line"] = "在 pet / mr m3 项目 中 , pet 和 患者 表 子系统 保持 不变 , 而 mr 子系统 升级 到 具有 多 传输 ( tx ) 功能 的 xx@@ x <N> t “ tx ” 。" context[ "raw_line"] = "in the pet / mr m3 project , the pet and patient table sub@@ systems remains unchanged while the mr sub@@ system upgrades to an xx@@ x <N> t " tx " with the multi <-> transmit ( tx ) functionality ." ruleEngine.execute(context, True) print "finished infinite loop test."
def main(): # check file existing infiles = None if args.infiles: infiles = args.infiles # check schema file schema_file = args.schema if not os.path.exists(schema_file): print 'file %s does not exist' % schema_file raise Exception if args.workdir: workdir = args.workdir if not os.path.exists(args.workdir): print 'create working dir %s' % workdir os.mkdir(workdir) else: workdir = "work_%s" % str(uuid.uuid4().hex) print 'not specified working dir, create one with random name: %s' % workdir os.mkdir(workdir) st = time.time() with rule_engine.RuleEngine(schema_file, workdir) as engine: engine.run(infiles) print '[All done!]' print '[Results were saved to: %s]' % workdir print["Total time: %.2f" % (time.time() - st)]
def test_Align_Target_Source(): post_schema_path = '../schema_law_zh2en_post_context.json' bifrost_instance = bifrost_simulate() post_rule_engine = rule_engine.RuleEngine( post_schema_path, None, mode='lib', rule_engine_context={'bifrost': bifrost_instance}) context = { "tgt_array": [ "The", "refrac@@", "tory", "clay", "is", "soil", "and", "its", "aluminum", "is", "less", "than", "2", "", "6", ",", "and", "the", "aluminum", "content", "is", "more", "than", "30", "%", "." ], "align_enabled": True, "raw_line": "耐@@ 火 粘@@ 土 呈 土@@ 状 , 其 铝 硅 比 小于 2 6 , 含@@ 铝@@ 量 一般 大于 30 % 。", "src_array": [ "耐@@", "火", "粘@@", "土", "呈", "土@@", "状", ",", "其", "铝", "硅", "比", "小于", "2", "", "6", ",", "含@@", "铝@@", "量", "一般", "大于", "30", "%", "。" ], "line": "The refrac@@ tory clay is soil and its aluminum is less than 2 6 , and the aluminum content is more than 30 % .", "tgt_src_mapping": [ 1, 1, 1, 2, 4, 5, 6, 9, 9, 10, 12, 13, 13, 14, 15, 16, 18, 18, 18, 17, 17, 20, 21, 22, 23, 20 ] } post_rule_engine.execute(context, True) res = [] for i, index in enumerate(context['mapping']): res.append(' '.join([context['target'][i], context['source'][index]])) assert '\n'.join(res) == '''The refractory 耐火 clay 粘土 is 呈 soil and 土状 its aluminum 铝 is 硅 less 小于 than 2 2 6 6 , , and the aluminum content is 含铝量 more 一般 than 大于 30 30 % % . 一般''' context = {"tgt_array": ["When", "the", "local", "<->","people", "'s", "congresses", "at", "or", "above", "the", "county", "level", "hold", "a", "meeting", ",", "the", "deputies", "who", "have", "been", "proposed", "to", "be", "removed", "shall", "have", "the", "right", "to", "submit", "their", "arguments", "in", "the", "meeting", "of", "the", "presidium", "and", "the", "plenary", "meeting", "of", "the", "General", "Assembly", ",", "or", "make", "a", "written", "defense", "in", "writing", ",", "and", "the", "presidium", "shall", "issue", "the", "meeting", "."], "align_enabled": True, "raw_line": "县级 以上 的 地方 各级 人民代表大会 举行 会议 的 时候 , 被 提出 罢免 的 代表 有权 在 主席团 会议 和 大会 全体会议 上 提出 申@@ 辩 意见 , 或者 书面 提出 申@@ 辩 意见 , 由 主席团 印发 会议 。", "src_array": ["县级", "以上", "的", "地方", "各级", "人民代表大会", "举行", "会议", "的", "时候", ",", "被", "提出", "罢免", "的", "代表", "有权", "在", "主席团", "会议", "和", "大会", "全体会议", "上", "提出", "申@@", "辩", "意见", ",", "或者", "书面", "提出", "申@@", "辩", "意见", ",", "由", "主席团", "印发", "会议", "。"], "line": "When the local people 's congresses at or above the county level hold a meeting , the deputies who have been proposed to be removed shall have the right to submit their arguments in the meeting of the presidium and the plenary meeting of the General Assembly , or make a written defense in writing , and the presidium shall issue the meeting .", "tgt_src_mapping": \ [9, 6, 5, 5,5, 5, 5, 4, 1, 1, 0, 0, 0, 7, 7, 7, 10, 15, 15, 12, 12, 12, 13, 13, 13, 13, 16, 16, 16, 16, 17, 26, 26, 26, 28, 18, 18, 20, 18, 18, 20, 21, 22, 22, 29, 21, 21, 21, 29, 29, 31, 33, 33, 33, 34, 30, 36, 36, 38, 37, 38, 38, 39, 39, 40]} post_rule_engine.execute(context, True) res1 = [] for i, index in enumerate(context['mapping']): res1.append(' '.join([context['target'][i], context['source'][index]])) assert '\n'.join(res1) == '''When 时候
def verify_pre_context_rule(): context = {} context["line"] = "STarT Chinese good CENTEr dinner END" rule_engine_context = {} rule_engine_context["bifrost"] = bifrost_simulate() ruleEngine = rule_engine.RuleEngine( "../schema_lib_tech_en_pre_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) ruleEngine.execute(context, True) assert context["line"] == "start chinese nice center dinner end"
def verify_pre_context_rule_with_real_bifrost(): context = {} context["line"] = "STarT Chinese good CENTEr dinner END" rule_engine_context = {} rule_engine_context["bifrost"] = Bifrost('../data/rule.txt') ruleEngine = rule_engine.RuleEngine( "../schema_lib_tech_en_pre_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) ruleEngine.execute(context, True) print context["line"] assert context["line"] == "start chinese good center dinner end"
def __init__(self, **kwargs): super(self.__class__, self).__init__() self.set_desc('File: run schema per generator', self) if 'schema' in kwargs: self.schema = kwargs['schema'] else: print 'schema field is required, check your shcema' raise Exception if 'generator' in kwargs: self.generator = kwargs['generator'] else: print 'generator field is required, check your shcema' raise Exception self.engine = rule_engine.RuleEngine(self.schema, ".") self.rerun = 0 if 'rerun' in kwargs: self.rerun = kwargs['rerun']
def verify_pre_context_rule_with_bpe(): context = {} context[ "line"] = "Proteinuria is defined as more than 3000 mg in a 24-hour collection. whichone?" rule_engine_context = {} rule_engine_context["bifrost"] = Bifrost('../data/rule.txt') rule_engine_context["bpe"] = BPE(file('../data/codebook.en.txt'), '@@', file('../data/nonbreakword.en.txt')) ruleEngine = rule_engine.RuleEngine( "../schema_lib_medical_en_pre_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) ruleEngine.execute(context, True) print context["line"] assert context[ "line"] == "proteinuria is defined as more than <N:MzAwMA==> mg in a 24 <-> hour collec@@ tion. which@@ one ?"
def test_word_case(): rule_engine_context = {} rule_engine_context["bifrost"] = bifrost_simulate() expect_res = '''包括上述ADR,下表列出了在临床试验和上市后经验中使用Sibelium报告的ADR。 这些风险因素将包括:异常游离轻链(FLC)比率(<0.126或>8),血清M蛋白≥3 g/dL,尿M蛋白>500 mg/24小时,IgA亚型和免疫缺陷(至少1个未参与的免疫球蛋白[IgG,IgA,IgM]低于正常[LLN])。 所有AE的摘要将基于治疗紧急不良事件(TEAE)定义为在最后一次研究药物给药后30天首次施用研究药物后发生的任何AE;或任何被认为与药物相关的AE(很可能,可能或可能相关)不管事件的开始日期;或在基线时存在但在毒性等级中恶化或随后被研究者认为与药物相关的任何AE。 MedDRA SOC和优选术语最常见的(任何手臂中至少10%)TEAE的发生率 研究者认为与研究药物合理相关的TEAE的发生率,MedDRA SOC和首选术语TEAE.''' ruleEngine = rule_engine.RuleEngine( "../schema_lib_tech_zh_post_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) restored_sen = [] for data in test_data_restore_word_case: print "before restore word case:" print data['line'] ruleEngine.execute(data, True) print "after restore word case:" print data['line'] restored_sen.append(data['line']) assert '\n'.join(restored_sen) == expect_res
def test_align_context_file_rule(self): tgt_src = [] lines = [] with open(file_in, 'r') as filein: count = 0 whole_time = 0 for line in filein: context = {} data = line.split('<.>') print data context['line'] = data[1] context['raw_line'] = data[0] context['align_enabled'] = True context['tgt_array'] = data[1].split(' ') context['src_array'] = data[2].split(' ') context['tgt_src_mapping'] = [ int(item) for item in data[3].split(' ') ] rule_engine_context = {} rule_engine_context["bifrost"] = bifrost_simulate() engine = rule_engine.RuleEngine( "../schema_lib_medical_zh_post_align_gpu_context.json", None, mode='lib', rule_engine_context=rule_engine_context) start_time = time.time() engine.execute(context) end_time = time.time() print context whole_time += end_time - start_time count += 1 with open(file_out, 'a+') as fileout: fileout.write(data[0]) fileout.write('\n') fileout.write(context['line']) fileout.write('\n') fileout.write(data[1]) fileout.write('\n') tgt_origin = [] for item in context['tgt_origin']: tgt_origin.append(item.decode('utf8')) fileout.write(' '.join(tgt_origin)) fileout.write('\n') src_origin = [] for item in context['src_origin']: src_origin.append(item.decode('utf8')) fileout.write(' '.join(src_origin)) fileout.write('\n') mapping = [str(item) for item in context['mapping']] fileout.write(' '.join(mapping)) fileout.write('\n') target = context['target'] fileout.write(' '.join(target)) fileout.write('\n') source = context['source'] fileout.write(' '.join(source)) fileout.write('\n') fileout.write('\n') fileout.write('\n') mapping_array = context['mapping'] tgt_src.append(' '.join([ target[i] + ":" + source[mapping_array[i]] for i in range(len(target)) if mapping_array[i] < len(source) ])) lines.append(context['line']) print "average process time = %.2f" % (whole_time / count) self.assertEqual(Common.count_lines_of_file(file_in), Common.count_blocks_of_file(file_out), 'Align function is not correct!') test = open(test_out, 'r') test_data = test.readlines() for i in range(len(tgt_src)): self.assertEqual(test_data[2 * i].strip(), lines[i].strip(), 'Translate result is not correct!') self.assertEqual(test_data[2 * i + 1].strip(), tgt_src[i].strip(), 'Align result is not correct!')