def verify_post_context_rule():
    context = {}
    context[
        "line"] = "i start 中文 很好 i center 晚餐 end i [pet-dn] ' abc & def _ g : ood & bad' 翻译 i"
    context[
        "raw_line"] = "I STarT Chinese good I CENTEr dinner END I [PET-DN] 'ABc&DEF_G:Ood&bAd' translate I"
    rule_engine_context = {}
    rule_engine_context["bifrost"] = bifrost_simulate()

    ruleEngine = rule_engine.RuleEngine(
        "../schema_lib_tech_zh_post_gpu_context.json",
        None,
        mode='lib',
        rule_engine_context=rule_engine_context)

    ruleEngine.execute(context, True)

    print context["line"]
    assert context[
        "line"] == "I STarT中文很好I中间晚餐END I[PET-DN]'ABc&DEF_G:Ood&bAd'翻译I"

    # regression test for infinite loop.
    print "starting infinite loop test. If the test is not finished in serveral seconds, then something wrong."
    context[
        "line"] = "在 pet / mr m3 项目 中 , pet 和 患者 表 子系统 保持 不变 , 而 mr 子系统 升级 到 具有 多 传输 ( tx ) 功能 的 xx@@ x <N> t “ tx ” 。"
    context[
        "raw_line"] = "in the pet / mr m3 project , the pet and patient table sub@@ systems remains unchanged while the mr sub@@ system upgrades to an xx@@ x <N> t &quot; tx &quot; with the multi <-> transmit ( tx ) functionality ."
    ruleEngine.execute(context, True)
    print "finished infinite loop test."
Esempio n. 2
0
def main():
    # check file existing
    infiles = None
    if args.infiles: infiles = args.infiles

    # check schema file
    schema_file = args.schema
    if not os.path.exists(schema_file):
        print 'file %s does not exist' % schema_file
        raise Exception

    if args.workdir:
        workdir = args.workdir
        if not os.path.exists(args.workdir):
            print 'create working dir %s' % workdir
            os.mkdir(workdir)
    else:
        workdir = "work_%s" % str(uuid.uuid4().hex)
        print 'not specified working dir, create one with random name: %s' % workdir
        os.mkdir(workdir)

    st = time.time()
    with rule_engine.RuleEngine(schema_file, workdir) as engine:
        engine.run(infiles)

    print '[All done!]'
    print '[Results were saved to: %s]' % workdir
    print["Total time: %.2f" % (time.time() - st)]
def test_Align_Target_Source():
    post_schema_path = '../schema_law_zh2en_post_context.json'
    bifrost_instance = bifrost_simulate()
    post_rule_engine = rule_engine.RuleEngine(
        post_schema_path,
        None,
        mode='lib',
        rule_engine_context={'bifrost': bifrost_instance})
    context = {
        "tgt_array": [
            "The", "refrac@@", "tory", "clay", "is", "soil", "and", "its",
            "aluminum", "is", "less", "than", "2", "", "6", ",", "and", "the",
            "aluminum", "content", "is", "more", "than", "30", "%", "."
        ],
        "align_enabled":
        True,
        "raw_line":
        "耐@@ 火 粘@@ 土 呈 土@@ 状 , 其 铝 硅 比 小于 2  6 , 含@@ 铝@@ 量 一般 大于 30 % 。",
        "src_array": [
            "耐@@", "火", "粘@@", "土", "呈", "土@@", "状", ",", "其", "铝", "硅", "比",
            "小于", "2", "", "6", ",", "含@@", "铝@@", "量", "一般", "大于", "30", "%",
            "。"
        ],
        "line":
        "The refrac@@ tory clay is soil and its aluminum is less than 2  6 , and the aluminum content is more than 30 % .",
        "tgt_src_mapping": [
            1, 1, 1, 2, 4, 5, 6, 9, 9, 10, 12, 13, 13, 14, 15, 16, 18, 18, 18,
            17, 17, 20, 21, 22, 23, 20
        ]
    }
    post_rule_engine.execute(context, True)
    res = []
    for i, index in enumerate(context['mapping']):
        res.append(' '.join([context['target'][i], context['source'][index]]))
    assert '\n'.join(res) == '''The refractory 耐火
clay 粘土
is 呈
soil and 土状
its aluminum 铝
is 硅
less 小于
than 2 2
 
6 6
, ,
and the aluminum content is 含铝量
more 一般
than 大于
30 30
% %
. 一般'''
    context = {"tgt_array": ["When", "the", "local", "<->","people", "&apos;s", "congresses", "at", "or", "above", "the", "county", "level", "hold", "a", "meeting", ",", "the", "deputies", "who", "have", "been", "proposed", "to", "be", "removed", "shall", "have", "the", "right", "to", "submit", "their", "arguments", "in", "the", "meeting", "of", "the", "presidium", "and", "the", "plenary", "meeting", "of", "the", "General", "Assembly", ",", "or", "make", "a", "written", "defense", "in", "writing", ",", "and", "the", "presidium", "shall", "issue", "the", "meeting", "."], "align_enabled": True, "raw_line": "县级 以上 的 地方 各级 人民代表大会 举行 会议 的 时候 , 被 提出 罢免 的 代表 有权 在 主席团 会议 和 大会 全体会议 上 提出 申@@ 辩 意见 , 或者 书面 提出 申@@ 辩 意见 , 由 主席团 印发 会议 。", "src_array": ["县级", "以上", "的", "地方", "各级", "人民代表大会", "举行", "会议", "的", "时候", ",", "被", "提出", "罢免", "的", "代表", "有权", "在", "主席团", "会议", "和", "大会", "全体会议", "上", "提出", "申@@", "辩", "意见", ",", "或者", "书面", "提出", "申@@", "辩", "意见", ",", "由", "主席团", "印发", "会议", "。"], "line": "When the local people &apos;s congresses at or above the county level hold a meeting , the deputies who have been proposed to be removed shall have the right to submit their arguments in the meeting of the presidium and the plenary meeting of the General Assembly , or make a written defense in writing , and the presidium shall issue the meeting .", "tgt_src_mapping": \
        [9, 6, 5, 5,5, 5, 5, 4, 1, 1, 0, 0, 0, 7, 7, 7, 10, 15, 15, 12, 12, 12, 13, 13, 13, 13, 16, 16, 16, 16, 17, 26, 26, 26, 28, 18, 18, 20, 18, 18, 20, 21, 22, 22, 29, 21, 21, 21, 29, 29, 31, 33, 33, 33, 34, 30, 36, 36, 38, 37, 38, 38, 39, 39, 40]}
    post_rule_engine.execute(context, True)
    res1 = []
    for i, index in enumerate(context['mapping']):
        res1.append(' '.join([context['target'][i], context['source'][index]]))
    assert '\n'.join(res1) == '''When 时候
def verify_pre_context_rule():
    context = {}
    context["line"] = "STarT Chinese good CENTEr dinner END"
    rule_engine_context = {}
    rule_engine_context["bifrost"] = bifrost_simulate()

    ruleEngine = rule_engine.RuleEngine(
        "../schema_lib_tech_en_pre_gpu_context.json",
        None,
        mode='lib',
        rule_engine_context=rule_engine_context)

    ruleEngine.execute(context, True)
    assert context["line"] == "start chinese nice center dinner end"
def verify_pre_context_rule_with_real_bifrost():
    context = {}
    context["line"] = "STarT Chinese good CENTEr dinner END"
    rule_engine_context = {}
    rule_engine_context["bifrost"] = Bifrost('../data/rule.txt')

    ruleEngine = rule_engine.RuleEngine(
        "../schema_lib_tech_en_pre_gpu_context.json",
        None,
        mode='lib',
        rule_engine_context=rule_engine_context)

    ruleEngine.execute(context, True)
    print context["line"]
    assert context["line"] == "start chinese good center dinner end"
Esempio n. 6
0
 def __init__(self, **kwargs):
     super(self.__class__, self).__init__()
     self.set_desc('File: run schema per generator', self)
     if 'schema' in kwargs: self.schema = kwargs['schema']
     else:
         print 'schema field is required, check your shcema'
         raise Exception
     if 'generator' in kwargs:
         self.generator = kwargs['generator']
     else:
         print 'generator field is required, check your shcema'
         raise Exception
     self.engine = rule_engine.RuleEngine(self.schema, ".")
     self.rerun = 0
     if 'rerun' in kwargs: self.rerun = kwargs['rerun']
def verify_pre_context_rule_with_bpe():
    context = {}
    context[
        "line"] = "Proteinuria is defined as more than 3000 mg in a 24-hour collection. whichone?"
    rule_engine_context = {}
    rule_engine_context["bifrost"] = Bifrost('../data/rule.txt')
    rule_engine_context["bpe"] = BPE(file('../data/codebook.en.txt'), '@@',
                                     file('../data/nonbreakword.en.txt'))

    ruleEngine = rule_engine.RuleEngine(
        "../schema_lib_medical_en_pre_gpu_context.json",
        None,
        mode='lib',
        rule_engine_context=rule_engine_context)

    ruleEngine.execute(context, True)
    print context["line"]
    assert context[
        "line"] == "proteinuria is defined as more than <N:MzAwMA==> mg in a 24 <-> hour collec@@ tion. which@@ one ?"
def test_word_case():
    rule_engine_context = {}
    rule_engine_context["bifrost"] = bifrost_simulate()
    expect_res = '''包括上述ADR,下表列出了在临床试验和上市后经验中使用Sibelium报告的ADR。
这些风险因素将包括:异常游离轻链(FLC)比率(<0.126或>8),血清M蛋白≥3 g/dL,尿M蛋白>500 mg/24小时,IgA亚型和免疫缺陷(至少1个未参与的免疫球蛋白[IgG,IgA,IgM]低于正常[LLN])。
所有AE的摘要将基于治疗紧急不良事件(TEAE)定义为在最后一次研究药物给药后30天首次施用研究药物后发生的任何AE;或任何被认为与药物相关的AE(很可能,可能或可能相关)不管事件的开始日期;或在基线时存在但在毒性等级中恶化或随后被研究者认为与药物相关的任何AE。
MedDRA SOC和优选术语最常见的(任何手臂中至少10%)TEAE的发生率
研究者认为与研究药物合理相关的TEAE的发生率,MedDRA SOC和首选术语TEAE.'''
    ruleEngine = rule_engine.RuleEngine(
        "../schema_lib_tech_zh_post_gpu_context.json",
        None,
        mode='lib',
        rule_engine_context=rule_engine_context)
    restored_sen = []
    for data in test_data_restore_word_case:
        print "before restore word case:"
        print data['line']
        ruleEngine.execute(data, True)
        print "after restore word case:"
        print data['line']
        restored_sen.append(data['line'])
    assert '\n'.join(restored_sen) == expect_res
Esempio n. 9
0
    def test_align_context_file_rule(self):
        tgt_src = []
        lines = []
        with open(file_in, 'r') as filein:
            count = 0
            whole_time = 0
            for line in filein:
                context = {}
                data = line.split('<.>')
                print data
                context['line'] = data[1]
                context['raw_line'] = data[0]

                context['align_enabled'] = True

                context['tgt_array'] = data[1].split(' ')
                context['src_array'] = data[2].split(' ')
                context['tgt_src_mapping'] = [
                    int(item) for item in data[3].split(' ')
                ]

                rule_engine_context = {}
                rule_engine_context["bifrost"] = bifrost_simulate()

                engine = rule_engine.RuleEngine(
                    "../schema_lib_medical_zh_post_align_gpu_context.json",
                    None,
                    mode='lib',
                    rule_engine_context=rule_engine_context)
                start_time = time.time()
                engine.execute(context)
                end_time = time.time()
                print context
                whole_time += end_time - start_time
                count += 1
                with open(file_out, 'a+') as fileout:
                    fileout.write(data[0])
                    fileout.write('\n')
                    fileout.write(context['line'])
                    fileout.write('\n')
                    fileout.write(data[1])
                    fileout.write('\n')

                    tgt_origin = []
                    for item in context['tgt_origin']:
                        tgt_origin.append(item.decode('utf8'))
                    fileout.write(' '.join(tgt_origin))
                    fileout.write('\n')

                    src_origin = []
                    for item in context['src_origin']:
                        src_origin.append(item.decode('utf8'))
                    fileout.write(' '.join(src_origin))
                    fileout.write('\n')

                    mapping = [str(item) for item in context['mapping']]
                    fileout.write(' '.join(mapping))
                    fileout.write('\n')

                    target = context['target']
                    fileout.write(' '.join(target))
                    fileout.write('\n')

                    source = context['source']
                    fileout.write(' '.join(source))
                    fileout.write('\n')

                    fileout.write('\n')
                    fileout.write('\n')
                    mapping_array = context['mapping']
                    tgt_src.append(' '.join([
                        target[i] + ":" + source[mapping_array[i]]
                        for i in range(len(target))
                        if mapping_array[i] < len(source)
                    ]))
                    lines.append(context['line'])
            print "average process time = %.2f" % (whole_time / count)
        self.assertEqual(Common.count_lines_of_file(file_in),
                         Common.count_blocks_of_file(file_out),
                         'Align function is not correct!')

        test = open(test_out, 'r')
        test_data = test.readlines()

        for i in range(len(tgt_src)):
            self.assertEqual(test_data[2 * i].strip(), lines[i].strip(),
                             'Translate result is not correct!')
            self.assertEqual(test_data[2 * i + 1].strip(), tgt_src[i].strip(),
                             'Align result is not correct!')