parser.add_argument('--rbpe', action='store_true') parser.add_argument('--tt1_beam', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG=True ruleLearningLib.DEBUG=True if args.rbpe: ruleLearningLib.RBPE=True ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name) ruleList=RuleList() RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_minimum_covered_words(args.minimum_covered_words) #load alignment templates if args.alignment_templates.endswith(".gz"): gfile=gzip.open(args.alignment_templates) else: gfile=open(args.alignment_templates) for line in gfile: line=line.strip().decode('utf-8') at=AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close() ruleLists=[ruleList]
parser.add_argument('--target_language',default='ca') parser.add_argument('--apertium_data_dir') parser.add_argument('--rbpe', action='store_true') parser.add_argument('--tt1_beam', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG=True ruleLearningLib.DEBUG=True if args.rbpe: ruleLearningLib.RBPE=True ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name) RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) #this is useless, I think RuleApplicationHypothesis.set_minimum_covered_words(True) ruleList=RuleList() #load alignment templates if args.alignment_templates: if args.alignment_templates.endswith(".gz"): gfile=gzip.open(args.alignment_templates) else: gfile=open(args.alignment_templates) for line in gfile: line=line.strip().decode('utf-8') at=AlignmentTemplate()
''' from beamSearchLib import RuleApplicationHypothesis,RuleList,ParallelSentence import argparse import ruleLearningLib import sys,gzip from ruleLearningLib import debug,AlignmentTemplate if __name__ == "__main__": parser = argparse.ArgumentParser(description='select alternative sets of Ats which maximise 1-BLEU score') parser.add_argument('--tag_groups_file_name',required=True) parser.add_argument('--tag_sequences_file_name',required=True) parser.add_argument('--debug', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG=True ruleLearningLib.DEBUG=True ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name) l_best_hypothesis=list() for line in sys.stdin: line=line.decode('utf-8').strip() parts=line.split(u"|||") l_best_hypothesis.append(RuleApplicationHypothesis.create_and_parse(parts[0])) resultTuples=RuleApplicationHypothesis.select_boxes_from_alternative_at_sets(l_best_hypothesis) for boxid,altatset in resultTuples: print str(boxid)+"\t"+str(altatset)
from ruleLearningLib import debug, AlignmentTemplate if __name__ == "__main__": parser = argparse.ArgumentParser( description='select alternative sets of Ats which maximise 1-BLEU score' ) parser.add_argument('--tag_groups_file_name', required=True) parser.add_argument('--tag_sequences_file_name', required=True) parser.add_argument('--debug', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG = True ruleLearningLib.DEBUG = True ruleLearningLib.AT_LexicalTagsProcessor.initialize( args.tag_groups_file_name, args.tag_sequences_file_name) l_best_hypothesis = list() for line in sys.stdin: line = line.decode('utf-8').strip() parts = line.split(u"|||") l_best_hypothesis.append( RuleApplicationHypothesis.create_and_parse(parts[0])) resultTuples = RuleApplicationHypothesis.select_boxes_from_alternative_at_sets( l_best_hypothesis) for boxid, altatset in resultTuples: print str(boxid) + "\t" + str(altatset)
parser.add_argument('--rbpe', action='store_true') parser.add_argument('--tt1_beam', action='store_true') parser.add_argument('--ternary_search', default='0') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG = True ruleLearningLib.DEBUG = True if args.rbpe: ruleLearningLib.RBPE = True ruleLearningLib.AT_LexicalTagsProcessor.initialize( args.tag_groups_file_name, args.tag_sequences_file_name) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_target_language(args.target_language) print >> sys.stderr, "Loading ATs ..." ruleList = RuleList() #load alignment templates if args.alignment_templates: if args.alignment_templates.lower().endswith(".gz"): gfile = gzip.open(args.alignment_templates) else: gfile = open(args.alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate() at.parse(line) ruleList.add(at)
parser.add_argument('--tt1_beam', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG = True ruleLearningLib.DEBUG = True if args.rbpe: ruleLearningLib.RBPE = True ruleLearningLib.AT_LexicalTagsProcessor.initialize( args.tag_groups_file_name, args.tag_sequences_file_name) ruleList = RuleList() RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_minimum_covered_words( args.minimum_covered_words) #load alignment templates if args.alignment_templates.endswith(".gz"): gfile = gzip.open(args.alignment_templates) else: gfile = open(args.alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close()
parser.add_argument('--apertium_data_dir') parser.add_argument('--rbpe', action='store_true') parser.add_argument('--tt1_beam', action='store_true') args = parser.parse_args(sys.argv[1:]) if args.debug: DEBUG = True ruleLearningLib.DEBUG = True if args.rbpe: ruleLearningLib.RBPE = True ruleLearningLib.AT_LexicalTagsProcessor.initialize( args.tag_groups_file_name, args.tag_sequences_file_name) RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) #this is useless, I think RuleApplicationHypothesis.set_minimum_covered_words(True) ruleList = RuleList() #load alignment templates if args.alignment_templates: if args.alignment_templates.endswith(".gz"): gfile = gzip.open(args.alignment_templates) else: gfile = open(args.alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate()