예제 #1
0
#!/usr/bin/env python

import sys
from lib.abstractLearningLib import split_partial_parse

if __name__ == "__main__":
    for line in sys.stdin:
        line=line.strip()
        for partial in split_partial_parse(line):
            print partial
 sourceLanguage=list(sourcePGF.languages.keys())[0]
 targetPGF=pgf.readPGF(args.target_pgf)
 targetLanguage=list(targetPGF.languages.keys())[0]
 
 for line in sys.stdin:
     parts=line.split("~")
     sourcePart=parts[0]
     targetPart=parts[1]
     
     bilingualPhrases=BilingualPhraseSet()
     if args.with_bilingual_phrases:
         bilingualPhraseList=parts[2]
         for bil in bilingualPhraseList.split("\t"):
             bilingualPhrases.add(bil.strip())
     
     sourceTreesRaw=split_partial_parse(sourcePart)
     targetTreesRaw=split_partial_parse(targetPart)
     #targetTreesRaw=[]
     
     if DEBUG:
         print >> sys.stderr, "source trees:"
         for t in sourceTreesRaw:
             print >> sys.stderr, t
         print >> sys.stderr, "target trees:"
         for t in targetTreesRaw:
             print >> sys.stderr, t
     
     #ignore trees with non-ascii characters, which make the GF bindigs crash
     sourceExprs=[]
     for rawTree in sourceTreesRaw:
         try: