def show(ded, antvalues): if ded.rule: value = rule.subst(ded.rule.erhs, antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost['posterior']), ) + value + ("]", )
def expected_product(self, insides, ef, eg): ep = {} for item in self.bottomup(): for ded in item.deds: ep[id(ded)] = ef[id(ded)] * eg[id(ded)] for ant in ded.ants: ep[id(ded)] += ep[id(ant)] - ef[id(ant)] * eg[id(ant)] d = cost.prob(insides[id(ded)]-insides[id(item)])*ep[id(ded)] if id(item) in ep: ep[id(item)] += d else: ep[id(item)] = d return ep
def expected_product(self, insides, ef, eg): ep = {} for item in self.bottomup(): for ded in item.deds: ep[id(ded)] = ef[id(ded)] * eg[id(ded)] for ant in ded.ants: ep[id(ded)] += ep[id(ant)] - ef[id(ant)] * eg[id(ant)] d = cost.prob(insides[id(ded)] - insides[id(item)]) * ep[id(ded)] if id(item) in ep: ep[id(item)] += d else: ep[id(item)] = d return ep
def expected_features(self, insides, f=None): if f is None: f = svector.Vector v = {} for item in self.bottomup(): for ded in item.deds: v[id(ded)] = f(ded.dcost) for ant in ded.ants: v[id(ded)] += v[id(ant)] d = cost.prob(insides[id(ded)]-insides[id(item)])*v[id(ded)] if id(item) in v: v[id(item)] += d else: v[id(item)] = d return v
def expected_features(self, insides, f=None): if f is None: f = svector.Vector v = {} for item in self.bottomup(): for ded in item.deds: v[id(ded)] = f(ded.dcost) for ant in ded.ants: v[id(ded)] += v[id(ant)] d = cost.prob(insides[id(ded)] - insides[id(item)]) * v[id(ded)] if id(item) in v: v[id(item)] += d else: v[id(item)] = d return v
def random_deriv(self, insides, deriv=None): if deriv is None: deriv = Derivation(self) r = random.random() p = 0. for ded in self.deds: p += cost.prob(insides[id(ded)]-insides[id(self)]) if p > r: break else: # shouldn't happen ded = self.deds[-1] deriv.select(self, ded) for ant in ded.ants: ant.random_deriv(insides, deriv) return deriv
def random_deriv(self, insides, deriv=None): if deriv is None: deriv = Derivation(self) r = random.random() p = 0. for ded in self.deds: p += cost.prob(insides[id(ded)] - insides[id(self)]) if p > r: break else: # shouldn't happen ded = self.deds[-1] deriv.select(self, ded) for ant in ded.ants: ant.random_deriv(insides, deriv) return deriv
def show(ded, antvalues): if ded.rule: value = rule.subst(ded.rule.erhs, antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost["posterior"]),) + value + ("]",)
def process(sent): goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) if goal is None: log.writeln("warning: parse failure") return None if opts.forest_dir: forest_file = gzip.open(os.path.join(opts.forest_dir, "forest.%s.gz" % sent.id), "w") forest_file.write( forest.forest_to_json( goal, fwords=sent.words, mode="english", models=thedecoder.models, weights=thedecoder.weights ) ) forest_file.close() if opts.rule_posterior_dir: rule_posterior_file = open(os.path.join(opts.rule_posterior_dir, "rule_posterior.%s" % sent.id), "w") beta = 1.0 insides = goal.compute_inside(thedecoder.weights, beta=beta) outsides = goal.compute_outside(thedecoder.weights, insides, beta=beta) z = insides[id(goal)] for item in goal.bottomup(): for ded in item.deds: c = outsides[id(item)] c += thedecoder.weights.dot(ded.dcost) c += sum(insides[id(ant)] for ant in ded.ants) c -= z rule_posterior_file.write( "%s ||| span=%s posterior=%s\n" % (ded.rule, (item.i, item.j), cost.prob(c)) ) ded.dcost["posterior"] = c rule_posterior_file.close() max_posterior_file = open(os.path.join(opts.rule_posterior_dir, "max_posterior.%s" % sent.id), "w") goal.reweight(svector.Vector("posterior=1")) max_posterior = goal.viterbi_deriv() def show(ded, antvalues): if ded.rule: value = rule.subst(ded.rule.erhs, antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost["posterior"]),) + value + ("]",) value = max_posterior.value(show) s = " ".join(value) max_posterior_file.write("%s\n" % s) max_posterior_file.close() outputs = get_nbest(goal, n_best, ambiguity_limit) if n_best_file: for (v, e) in outputs: e = " ".join(e) # n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, -thedecoder.weights.dot(v))) n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, v)) n_best_file.flush() (bestv, best) = outputs[0] if french_parse_file: french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree())) french_parse_file.flush() if english_parse_file: english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree())) english_parse_file.flush() if log.level >= 1: gc.collect() log.write(" done decoding, memory=%s\n" % monitor.memory()) log.write(" features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv))) sent.ewords = best return sent
def show(ded, antvalues): if ded.rule: value = ded.rule.e.subst((), antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost['posterior']),) + value + ("]",)
def process(sent): goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) if goal is None: return None if opts.forest_dir: forest_file = gzip.open(os.path.join(opts.forest_dir, "forest.%s.gz" % sent.id), "w") forest_file.write(forest.forest_to_json(goal, fwords=sent.fwords, mode='english', models=thedecoder.models, weights=thedecoder.weights)) forest_file.close() if opts.rule_posterior_dir: rule_posterior_file = open(os.path.join(opts.rule_posterior_dir, "rule_posterior.%s" % sent.id), "w") beta = 1. insides = goal.compute_inside(thedecoder.weights, beta=beta) outsides = goal.compute_outside(thedecoder.weights, insides, beta=beta) z = insides[id(goal)] for item in goal.bottomup(): for ded in item.deds: c = outsides[id(item)] c += thedecoder.weights.dot(ded.dcost) c += sum(insides[id(ant)] for ant in ded.ants) c -= z rule_posterior_file.write("%s ||| span=%s posterior=%s\n" % (ded.rule, (item.i, item.j), cost.prob(c))) ded.dcost['posterior'] = c rule_posterior_file.close() max_posterior_file = open(os.path.join(opts.rule_posterior_dir, "max_posterior.%s" % sent.id), "w") goal.reweight(svector.Vector('posterior=1')) max_posterior = goal.viterbi_deriv() def show(ded, antvalues): if ded.rule: value = ded.rule.e.subst((), antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost['posterior']),) + value + ("]",) value = max_posterior.value(show) s = " ".join((sym.tostring(e) if type(e) is int else e) for e in value) max_posterior_file.write("%s\n" % s) max_posterior_file.close() outputs = get_nbest(goal, n_best, ambiguity_limit) if n_best_file: for (v,e) in outputs: e = " ".join(sym.tostring(w) for w in e) #n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, -thedecoder.weights.dot(v))) n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, v)) n_best_file.flush() (bestv,best) = outputs[0] if french_parse_file: french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree())) french_parse_file.flush() if english_parse_file: english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree())) english_parse_file.flush() if log.level >= 1: gc.collect() log.write(" done decoding, memory=%s\n" % monitor.memory()) log.write(" features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv))) sent.ewords = [sym.tostring(e) for e in best] return sent