Example #1
0
    def genGHKMfiles(self,args,derivations):

        parse_file = open(args.parse_path,'w')
        align_file = open(args.align_path,'w')
        text_file = open(args.text_path,'w')

        for s,d in derivations:
            x = DerivationTree.fromDerivation(d)
            parse,align = x.getGHKMtriple_Java()
            text = s["nl"].strip(' \t\n\r')
            parse_file.write("%s\n"%parse)
            align_file.write("%s\n"%align)
            text_file.write("%s\n"%text)

        parse_file.close()
        align_file.close()
        text_file.close()

        print "Running GHKM Java rule extraction"
        mem = "2g"
        ghkm_opts = "-fCorpus %s -eParsedCorpus %s -align %s -joshuaFormat false -maxLHS 200 -maxRHS 15 -MaxUnalignedRHS 15" % (args.text_path,args.parse_path,args.align_path)
        java_opts="-Xmx%s -Xms%s -cp %s/ghkm.jar:%s/lib/fastutil.jar -XX:+UseCompressedOops"%(mem,mem,args.ghkmDir,args.ghkmDir)
        os.system("java %s edu.stanford.nlp.mt.syntax.ghkm.RuleExtractor %s > %s" % (java_opts,ghkm_opts,args.ghkm_path))

        print "Converting GHKM rules to Tiburon format"
        ghkm2tib(args.ghkm_path,args.tib_path)
Example #2
0
    def genGHKMfiles(self, args, derivations):

        parse_file = open(args.parse_path, 'w')
        align_file = open(args.align_path, 'w')
        text_file = open(args.text_path, 'w')

        for s, d in derivations:
            x = DerivationTree.fromDerivation(d)
            parse, align = x.getGHKMtriple_Java()
            text = s["nl"].strip(' \t\n\r')
            parse_file.write("%s\n" % parse)
            align_file.write("%s\n" % align)
            text_file.write("%s\n" % text)

        parse_file.close()
        align_file.close()
        text_file.close()

        print "Running GHKM Java rule extraction"
        mem = "2g"
        ghkm_opts = "-fCorpus %s -eParsedCorpus %s -align %s -joshuaFormat false -maxLHS 200 -maxRHS 15 -MaxUnalignedRHS 15" % (
            args.text_path, args.parse_path, args.align_path)
        java_opts = "-Xmx%s -Xms%s -cp %s/ghkm.jar:%s/lib/fastutil.jar -XX:+UseCompressedOops" % (
            mem, mem, args.ghkmDir, args.ghkmDir)
        os.system(
            "java %s edu.stanford.nlp.mt.syntax.ghkm.RuleExtractor %s > %s" %
            (java_opts, ghkm_opts, args.ghkm_path))

        print "Converting GHKM rules to Tiburon format"
        ghkm2tib(args.ghkm_path, args.tib_path)
Example #3
0
    def parseMRfiles(self,args,derivations):

        output_file = open(args.output_path,'w')

        for s,d in derivations:
            x = DerivationTree.fromDerivation(d)
            tibTree = x.getTiburonTree()
            genString = tibTree2String(args,tibTree)
            if genString: 
                output_file.write("%s\n"%genString)
                print genString

        output_file.close()