예제 #1
0
파일: decoder.py 프로젝트: jungikim/sbmt
 def show(ded, antvalues):
     if ded.rule:
         value = rule.subst(ded.rule.erhs, antvalues)
     else:
         value = antvalues[0]
     return ("[%.3f" %
             cost.prob(ded.dcost['posterior']), ) + value + ("]", )
예제 #2
0
    def expected_product(self, insides, ef, eg):
        ep = {}
        for item in self.bottomup():
            for ded in item.deds:
                ep[id(ded)] = ef[id(ded)] * eg[id(ded)]
                for ant in ded.ants:
                    ep[id(ded)] += ep[id(ant)] - ef[id(ant)] * eg[id(ant)]

                d = cost.prob(insides[id(ded)]-insides[id(item)])*ep[id(ded)]
                if id(item) in ep:
                    ep[id(item)] += d
                else:
                    ep[id(item)] = d

        return ep
예제 #3
0
    def expected_product(self, insides, ef, eg):
        ep = {}
        for item in self.bottomup():
            for ded in item.deds:
                ep[id(ded)] = ef[id(ded)] * eg[id(ded)]
                for ant in ded.ants:
                    ep[id(ded)] += ep[id(ant)] - ef[id(ant)] * eg[id(ant)]

                d = cost.prob(insides[id(ded)] -
                              insides[id(item)]) * ep[id(ded)]
                if id(item) in ep:
                    ep[id(item)] += d
                else:
                    ep[id(item)] = d

        return ep
예제 #4
0
    def expected_features(self, insides, f=None):
        if f is None:
            f = svector.Vector
        v = {}
        for item in self.bottomup():
            for ded in item.deds:
                v[id(ded)] = f(ded.dcost)
                for ant in ded.ants:
                    v[id(ded)] += v[id(ant)]

                d = cost.prob(insides[id(ded)]-insides[id(item)])*v[id(ded)]
                if id(item) in v:
                    v[id(item)] += d
                else:
                    v[id(item)] = d

        return v
예제 #5
0
    def expected_features(self, insides, f=None):
        if f is None:
            f = svector.Vector
        v = {}
        for item in self.bottomup():
            for ded in item.deds:
                v[id(ded)] = f(ded.dcost)
                for ant in ded.ants:
                    v[id(ded)] += v[id(ant)]

                d = cost.prob(insides[id(ded)] -
                              insides[id(item)]) * v[id(ded)]
                if id(item) in v:
                    v[id(item)] += d
                else:
                    v[id(item)] = d

        return v
예제 #6
0
    def random_deriv(self, insides, deriv=None):
        if deriv is None:
            deriv = Derivation(self)
            
        r = random.random()
        p = 0.
        for ded in self.deds:
            p += cost.prob(insides[id(ded)]-insides[id(self)])
            if p > r:
                break
        else: # shouldn't happen
            ded = self.deds[-1]

        deriv.select(self, ded)

        for ant in ded.ants:
            ant.random_deriv(insides, deriv)

        return deriv
예제 #7
0
    def random_deriv(self, insides, deriv=None):
        if deriv is None:
            deriv = Derivation(self)

        r = random.random()
        p = 0.
        for ded in self.deds:
            p += cost.prob(insides[id(ded)] - insides[id(self)])
            if p > r:
                break
        else:  # shouldn't happen
            ded = self.deds[-1]

        deriv.select(self, ded)

        for ant in ded.ants:
            ant.random_deriv(insides, deriv)

        return deriv
예제 #8
0
파일: decoder.py 프로젝트: isi-nlp/sbmt
 def show(ded, antvalues):
     if ded.rule:
         value = rule.subst(ded.rule.erhs, antvalues)
     else:
         value = antvalues[0]
     return ("[%.3f" % cost.prob(ded.dcost["posterior"]),) + value + ("]",)
예제 #9
0
파일: decoder.py 프로젝트: isi-nlp/sbmt
    def process(sent):
        goal = thedecoder.translate(sent)

        thedecoder.process_output(sent, goal)

        if goal is None:
            log.writeln("warning: parse failure")
            return None

        if opts.forest_dir:
            forest_file = gzip.open(os.path.join(opts.forest_dir, "forest.%s.gz" % sent.id), "w")
            forest_file.write(
                forest.forest_to_json(
                    goal, fwords=sent.words, mode="english", models=thedecoder.models, weights=thedecoder.weights
                )
            )
            forest_file.close()

        if opts.rule_posterior_dir:
            rule_posterior_file = open(os.path.join(opts.rule_posterior_dir, "rule_posterior.%s" % sent.id), "w")
            beta = 1.0
            insides = goal.compute_inside(thedecoder.weights, beta=beta)
            outsides = goal.compute_outside(thedecoder.weights, insides, beta=beta)
            z = insides[id(goal)]
            for item in goal.bottomup():
                for ded in item.deds:
                    c = outsides[id(item)]
                    c += thedecoder.weights.dot(ded.dcost)
                    c += sum(insides[id(ant)] for ant in ded.ants)
                    c -= z
                    rule_posterior_file.write(
                        "%s ||| span=%s posterior=%s\n" % (ded.rule, (item.i, item.j), cost.prob(c))
                    )
                    ded.dcost["posterior"] = c
            rule_posterior_file.close()
            max_posterior_file = open(os.path.join(opts.rule_posterior_dir, "max_posterior.%s" % sent.id), "w")
            goal.reweight(svector.Vector("posterior=1"))
            max_posterior = goal.viterbi_deriv()

            def show(ded, antvalues):
                if ded.rule:
                    value = rule.subst(ded.rule.erhs, antvalues)
                else:
                    value = antvalues[0]
                return ("[%.3f" % cost.prob(ded.dcost["posterior"]),) + value + ("]",)

            value = max_posterior.value(show)
            s = " ".join(value)
            max_posterior_file.write("%s\n" % s)

            max_posterior_file.close()

        outputs = get_nbest(goal, n_best, ambiguity_limit)

        if n_best_file:
            for (v, e) in outputs:
                e = " ".join(e)
                # n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, -thedecoder.weights.dot(v)))
                n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, v))
            n_best_file.flush()

        (bestv, best) = outputs[0]

        if french_parse_file:
            french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree()))
            french_parse_file.flush()
        if english_parse_file:
            english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree()))
            english_parse_file.flush()

        if log.level >= 1:
            gc.collect()
            log.write("  done decoding, memory=%s\n" % monitor.memory())
            log.write("  features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv)))

        sent.ewords = best
        return sent
 def show(ded, antvalues):
     if ded.rule:
         value = ded.rule.e.subst((), antvalues)
     else:
         value = antvalues[0]
     return ("[%.3f" % cost.prob(ded.dcost['posterior']),) + value + ("]",)
예제 #11
0
    def process(sent):
        goal = thedecoder.translate(sent)

        thedecoder.process_output(sent, goal)

        if goal is None:
            return None

        if opts.forest_dir:
            forest_file = gzip.open(os.path.join(opts.forest_dir, "forest.%s.gz" % sent.id), "w")
            forest_file.write(forest.forest_to_json(goal, fwords=sent.fwords, mode='english', models=thedecoder.models, weights=thedecoder.weights))
            forest_file.close()

        if opts.rule_posterior_dir:
            rule_posterior_file = open(os.path.join(opts.rule_posterior_dir, "rule_posterior.%s" % sent.id), "w")
            beta = 1.
            insides = goal.compute_inside(thedecoder.weights, beta=beta)
            outsides = goal.compute_outside(thedecoder.weights, insides, beta=beta)
            z = insides[id(goal)]
            for item in goal.bottomup():
                for ded in item.deds:
                    c = outsides[id(item)]
                    c += thedecoder.weights.dot(ded.dcost)
                    c += sum(insides[id(ant)] for ant in ded.ants)
                    c -= z
                    rule_posterior_file.write("%s ||| span=%s posterior=%s\n" % (ded.rule, (item.i, item.j), cost.prob(c)))
                    ded.dcost['posterior'] = c
            rule_posterior_file.close()
            max_posterior_file = open(os.path.join(opts.rule_posterior_dir, "max_posterior.%s" % sent.id), "w")
            goal.reweight(svector.Vector('posterior=1'))
            max_posterior = goal.viterbi_deriv()

            def show(ded, antvalues):
                if ded.rule:
                    value = ded.rule.e.subst((), antvalues)
                else:
                    value = antvalues[0]
                return ("[%.3f" % cost.prob(ded.dcost['posterior']),) + value + ("]",)
            value = max_posterior.value(show)
            s = " ".join((sym.tostring(e) if type(e) is int else e) for e in value)
            max_posterior_file.write("%s\n" % s)

            max_posterior_file.close()

        outputs = get_nbest(goal, n_best, ambiguity_limit)

        if n_best_file:
            for (v,e) in outputs:
                e = " ".join(sym.tostring(w) for w in e)
                #n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, -thedecoder.weights.dot(v)))
                n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, v))
            n_best_file.flush()

        (bestv,best) = outputs[0]

        if french_parse_file:
            french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree()))
            french_parse_file.flush()
        if english_parse_file:
            english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree()))
            english_parse_file.flush()

        if log.level >= 1:
            gc.collect()
            log.write("  done decoding, memory=%s\n" % monitor.memory())
            log.write("  features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv)))

        sent.ewords = [sym.tostring(e) for e in best]
        return sent