#this is useless, I think RuleApplicationHypothesis.set_minimum_covered_words(True) ruleList=RuleList() #load alignment templates if args.alignment_templates: if args.alignment_templates.endswith(".gz"): gfile=gzip.open(args.alignment_templates) else: gfile=open(args.alignment_templates) for line in gfile: line=line.strip().decode('utf-8') at=AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close() #load sentences sentences=list() if args.sentences: if args.sentences.lower().endswith('.gz'): gfile=gzip.open(args.sentences) else: gfile=open(args.sentences) for line in gfile: line=line.strip().decode('utf-8') parallelSentence=ParallelSentence() parallelSentence.parse(line, parseTlLemmasFromDic=True,forRBPE=args.rbpe) if not args.tt1_beam: parallelSentence.add_explicit_empty_tags()
RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_target_language(args.target_language) print >> sys.stderr, "Loading ATs ..." ruleList = RuleList() #load alignment templates if args.alignment_templates: if args.alignment_templates.lower().endswith(".gz"): gfile = gzip.open(args.alignment_templates) else: gfile = open(args.alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close() print >> sys.stderr, "... done" print >> sys.stderr, "Loading sentences ..." #load sentences sentences = list() if args.sentences: if args.sentences.lower().endswith(".gz"): gfile = gzip.open(args.sentences) else: gfile = open(args.sentences) for line in gfile: line = line.strip().decode('utf-8') parallelSentence = ParallelSentence() parallelSentence.parse(line,
ruleList=RuleList() RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_minimum_covered_words(args.minimum_covered_words) #load alignment templates if args.alignment_templates.endswith(".gz"): gfile=gzip.open(args.alignment_templates) else: gfile=open(args.alignment_templates) for line in gfile: line=line.strip().decode('utf-8') at=AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close() ruleLists=[ruleList] if args.alternative_alignment_templates: altRuleList=RuleList() gfile=gzip.open(args.alternative_alignment_templates) for line in gfile: line=line.strip().decode('utf-8') at=AlignmentTemplate() at.parse(line) altRuleList.add(at) gfile.close() ruleLists=[ruleList,altRuleList] boxesCoverage=False
RuleApplicationHypothesis.set_target_language(args.target_language) RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir) RuleApplicationHypothesis.set_minimum_covered_words( args.minimum_covered_words) #load alignment templates if args.alignment_templates.endswith(".gz"): gfile = gzip.open(args.alignment_templates) else: gfile = open(args.alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate() at.parse(line) ruleList.add(at) gfile.close() ruleLists = [ruleList] if args.alternative_alignment_templates: altRuleList = RuleList() gfile = gzip.open(args.alternative_alignment_templates) for line in gfile: line = line.strip().decode('utf-8') at = AlignmentTemplate() at.parse(line) altRuleList.add(at) gfile.close() ruleLists = [ruleList, altRuleList] boxesCoverage = False