parser.add_argument('--rbpe', action='store_true')
 parser.add_argument('--tt1_beam', action='store_true')
 args = parser.parse_args(sys.argv[1:])
 
 if args.debug:
     DEBUG=True
     ruleLearningLib.DEBUG=True
 
 if args.rbpe:
     ruleLearningLib.RBPE=True
 
 ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name)
 
 ruleList=RuleList()
 
 RuleApplicationHypothesis.set_target_language(args.target_language)
 RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
 RuleApplicationHypothesis.set_minimum_covered_words(args.minimum_covered_words)
 
 #load alignment templates
 if args.alignment_templates.endswith(".gz"):
     gfile=gzip.open(args.alignment_templates)
 else:
     gfile=open(args.alignment_templates)
 for line in gfile:
     line=line.strip().decode('utf-8')
     at=AlignmentTemplate()
     at.parse(line)
     ruleList.add(at)
 gfile.close()
 ruleLists=[ruleList]
 parser.add_argument('--target_language',default='ca')
 parser.add_argument('--apertium_data_dir')
 parser.add_argument('--rbpe', action='store_true')
 parser.add_argument('--tt1_beam', action='store_true')
 args = parser.parse_args(sys.argv[1:])
 
 if args.debug:
     DEBUG=True
     ruleLearningLib.DEBUG=True
 
 if args.rbpe:
     ruleLearningLib.RBPE=True
     
 ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name)
 
 RuleApplicationHypothesis.set_target_language(args.target_language)
 RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
 
 #this is useless, I think
 RuleApplicationHypothesis.set_minimum_covered_words(True)
 
 ruleList=RuleList()
 #load alignment templates
 if args.alignment_templates:
     if args.alignment_templates.endswith(".gz"):
         gfile=gzip.open(args.alignment_templates)
     else:
         gfile=open(args.alignment_templates)
     for line in gfile:
         line=line.strip().decode('utf-8')
         at=AlignmentTemplate()
'''
from beamSearchLib import RuleApplicationHypothesis,RuleList,ParallelSentence
import argparse
import ruleLearningLib
import sys,gzip
from ruleLearningLib import debug,AlignmentTemplate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='select alternative sets of Ats which maximise 1-BLEU score')
    parser.add_argument('--tag_groups_file_name',required=True)
    parser.add_argument('--tag_sequences_file_name',required=True)
    parser.add_argument('--debug', action='store_true')
    
    args = parser.parse_args(sys.argv[1:])
    
    if args.debug:
        DEBUG=True
        ruleLearningLib.DEBUG=True
        
    ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name)
    
    l_best_hypothesis=list()
    for line in sys.stdin:
        line=line.decode('utf-8').strip()
        parts=line.split(u"|||")
        l_best_hypothesis.append(RuleApplicationHypothesis.create_and_parse(parts[0]))
        
    resultTuples=RuleApplicationHypothesis.select_boxes_from_alternative_at_sets(l_best_hypothesis)
    for boxid,altatset in resultTuples:
        print str(boxid)+"\t"+str(altatset)
from ruleLearningLib import debug, AlignmentTemplate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='select alternative sets of Ats which maximise 1-BLEU score'
    )
    parser.add_argument('--tag_groups_file_name', required=True)
    parser.add_argument('--tag_sequences_file_name', required=True)
    parser.add_argument('--debug', action='store_true')

    args = parser.parse_args(sys.argv[1:])

    if args.debug:
        DEBUG = True
        ruleLearningLib.DEBUG = True

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)

    l_best_hypothesis = list()
    for line in sys.stdin:
        line = line.decode('utf-8').strip()
        parts = line.split(u"|||")
        l_best_hypothesis.append(
            RuleApplicationHypothesis.create_and_parse(parts[0]))

    resultTuples = RuleApplicationHypothesis.select_boxes_from_alternative_at_sets(
        l_best_hypothesis)
    for boxid, altatset in resultTuples:
        print str(boxid) + "\t" + str(altatset)
Example #5
0
    parser.add_argument('--rbpe', action='store_true')
    parser.add_argument('--tt1_beam', action='store_true')
    parser.add_argument('--ternary_search', default='0')
    args = parser.parse_args(sys.argv[1:])

    if args.debug:
        DEBUG = True
        ruleLearningLib.DEBUG = True

    if args.rbpe:
        ruleLearningLib.RBPE = True

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)

    RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
    RuleApplicationHypothesis.set_target_language(args.target_language)

    print >> sys.stderr, "Loading ATs ..."
    ruleList = RuleList()
    #load alignment templates
    if args.alignment_templates:
        if args.alignment_templates.lower().endswith(".gz"):
            gfile = gzip.open(args.alignment_templates)
        else:
            gfile = open(args.alignment_templates)
        for line in gfile:
            line = line.strip().decode('utf-8')
            at = AlignmentTemplate()
            at.parse(line)
            ruleList.add(at)
Example #6
0
    parser.add_argument('--tt1_beam', action='store_true')
    args = parser.parse_args(sys.argv[1:])

    if args.debug:
        DEBUG = True
        ruleLearningLib.DEBUG = True

    if args.rbpe:
        ruleLearningLib.RBPE = True

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)

    ruleList = RuleList()

    RuleApplicationHypothesis.set_target_language(args.target_language)
    RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
    RuleApplicationHypothesis.set_minimum_covered_words(
        args.minimum_covered_words)

    #load alignment templates
    if args.alignment_templates.endswith(".gz"):
        gfile = gzip.open(args.alignment_templates)
    else:
        gfile = open(args.alignment_templates)
    for line in gfile:
        line = line.strip().decode('utf-8')
        at = AlignmentTemplate()
        at.parse(line)
        ruleList.add(at)
    gfile.close()
Example #7
0
    parser.add_argument('--apertium_data_dir')
    parser.add_argument('--rbpe', action='store_true')
    parser.add_argument('--tt1_beam', action='store_true')
    args = parser.parse_args(sys.argv[1:])

    if args.debug:
        DEBUG = True
        ruleLearningLib.DEBUG = True

    if args.rbpe:
        ruleLearningLib.RBPE = True

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)

    RuleApplicationHypothesis.set_target_language(args.target_language)
    RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)

    #this is useless, I think
    RuleApplicationHypothesis.set_minimum_covered_words(True)

    ruleList = RuleList()
    #load alignment templates
    if args.alignment_templates:
        if args.alignment_templates.endswith(".gz"):
            gfile = gzip.open(args.alignment_templates)
        else:
            gfile = open(args.alignment_templates)
        for line in gfile:
            line = line.strip().decode('utf-8')
            at = AlignmentTemplate()