boxesDic=dict()
    if args.final_boxes_index:
        for line in open(args.final_boxes_index):
            parts=line.split("\t")
            boxesDic[parts[1].strip()]=int(parts[0])
    
    #read best rule application for each sentence
    bestHypothesisForEachSentence=list()
    emptyIndexes=set()
    numLine=0
    for line in sys.stdin:
        line=line.decode('utf-8').strip()
        if len(line) > 0:
            parts=line.split(u"|||")
            try:
                bestHypothesisForEachSentence.append(RuleApplicationHypothesis.create_and_parse(parts[0],parseTranslation=True))
            except ruleLearningLib.AT_ParsingError:
                bestHypothesisForEachSentence.append(None)
                emptyIndexes.add(numLine)
                print >> sys.stderr, "ERROR parsing line: "+line.encode('utf-8')
        else:
            bestHypothesisForEachSentence.append(None)
            emptyIndexes.add(numLine)
        numLine+=1

    if len(bestHypothesisForEachSentence) != len(sentences):
        print >> sys.stderr, "ERROR: different length of sentences and best hyportheses"
        exit()
    
    for numSentence,bestHyp in enumerate(bestHypothesisForEachSentence):
        sentence=sentences[numSentence]
'''
from beamSearchLib import RuleApplicationHypothesis,RuleList,ParallelSentence
import argparse
import ruleLearningLib
import sys,gzip
from ruleLearningLib import debug,AlignmentTemplate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='select alternative sets of Ats which maximise 1-BLEU score')
    parser.add_argument('--tag_groups_file_name',required=True)
    parser.add_argument('--tag_sequences_file_name',required=True)
    parser.add_argument('--debug', action='store_true')
    
    args = parser.parse_args(sys.argv[1:])
    
    if args.debug:
        DEBUG=True
        ruleLearningLib.DEBUG=True
        
    ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name)
    
    l_best_hypothesis=list()
    for line in sys.stdin:
        line=line.decode('utf-8').strip()
        parts=line.split(u"|||")
        l_best_hypothesis.append(RuleApplicationHypothesis.create_and_parse(parts[0]))
        
    resultTuples=RuleApplicationHypothesis.select_boxes_from_alternative_at_sets(l_best_hypothesis)
    for boxid,altatset in resultTuples:
        print str(boxid)+"\t"+str(altatset)
Beispiel #3
0
    ll_hypothesis = list()

    if args.input:
        inputfile = gzip.open(args.input)
    else:
        inputfile = sys.stdin

    print >> sys.stderr, "Loading scores ..."
    for line in inputfile:
        line = line.decode('utf-8').strip()
        parts = line.split(u"|||")
        if nfirst != None:
            parts = parts[:nfirst]
        ll_hypothesis.append([
            RuleApplicationHypothesis.create_and_parse(part) for part in parts
            if len(part) > 0
        ])
    if args.input:
        inputfile.close()
    print >> sys.stderr, "... done"

    print >> sys.stderr, "Maximising score of " + str(
        len(ll_hypothesis)) + " sentences"

    if args.only_hyps_with_maximum_local or args.super_heuristic or args.select_boxes_minimum:

        #remove all non-maximum hypotheses
        for numSentence, l_hypothesis in enumerate(ll_hypothesis):
            firstNotMaximumIndex = len(l_hypothesis)
            if firstNotMaximumIndex > 0:
from ruleLearningLib import debug, AlignmentTemplate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='select alternative sets of Ats which maximise 1-BLEU score'
    )
    parser.add_argument('--tag_groups_file_name', required=True)
    parser.add_argument('--tag_sequences_file_name', required=True)
    parser.add_argument('--debug', action='store_true')

    args = parser.parse_args(sys.argv[1:])

    if args.debug:
        DEBUG = True
        ruleLearningLib.DEBUG = True

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)

    l_best_hypothesis = list()
    for line in sys.stdin:
        line = line.decode('utf-8').strip()
        parts = line.split(u"|||")
        l_best_hypothesis.append(
            RuleApplicationHypothesis.create_and_parse(parts[0]))

    resultTuples = RuleApplicationHypothesis.select_boxes_from_alternative_at_sets(
        l_best_hypothesis)
    for boxid, altatset in resultTuples:
        print str(boxid) + "\t" + str(altatset)
Beispiel #5
0
    if args.final_boxes_index:
        for line in open(args.final_boxes_index):
            parts = line.split("\t")
            boxesDic[parts[1].strip()] = int(parts[0])

    #read best rule application for each sentence
    bestHypothesisForEachSentence = list()
    emptyIndexes = set()
    numLine = 0
    for line in sys.stdin:
        line = line.decode('utf-8').strip()
        if len(line) > 0:
            parts = line.split(u"|||")
            try:
                bestHypothesisForEachSentence.append(
                    RuleApplicationHypothesis.create_and_parse(
                        parts[0], parseTranslation=True))
            except ruleLearningLib.AT_ParsingError:
                bestHypothesisForEachSentence.append(None)
                emptyIndexes.add(numLine)
                print >> sys.stderr, "ERROR parsing line: " + line.encode(
                    'utf-8')
        else:
            bestHypothesisForEachSentence.append(None)
            emptyIndexes.add(numLine)
        numLine += 1

    if len(bestHypothesisForEachSentence) != len(sentences):
        print >> sys.stderr, "ERROR: different length of sentences and best hyportheses"
        exit(1)

    for numSentence, bestHyp in enumerate(bestHypothesisForEachSentence):