def inside_outside_speedup(E, root): """Inside-outside speed-up for computing second-order expectations on a hypergraph. """ # Perform inside-outside in the "cheap" semiring. g = Hypergraph() g.root = root for e, (p, r, _) in E.items(): ke = ExpectationSemiring(p, p * r) g.edge(ke, *e) B = inside(g, zero=ExpectationSemiring.Zero) A = outside(g, B, zero=ExpectationSemiring.Zero, one=ExpectationSemiring.One) # The (s,t) component is an efficient linear combination with coefficients # from the cheap semiring. xs = LogValVector() xt = LogValVector() for e, (p, r, s) in E.items(): x = e[0] kebar = ExpectationSemiring.Zero() kebar.p = A[x].p kebar.r = A[x].r for u in e[1:]: kebar.r = B[u].p * kebar.r + kebar.p * B[u].r kebar.p *= B[u].p xs += s * p * kebar.p xt += s * p * (r * kebar.p + kebar.r) return (B[g.root].p, B[g.root].r, xs, xt)
def brute_force(derivations, E): "Brute-force enumeration method for computing (Z, rbar, sbar, tbar)." Z = LogVal(0) rbar = LogVal(0) sbar = LogValVector() tbar = LogValVector() for d in derivations: #print #print 'Derivation:', d rd = LogVal(0) pd = LogVal(1) sd = LogValVector() for [x, [y, z]] in tree_edges(d): (p, r, s) = E[x, y, z] #print (x,y,z), tuple(x.to_real() for x in (p, r, s)) pd *= p rd += r sd += s #print 'p=%s,r=%s,s=%s' % tuple(x.to_real() for x in (pd, rd, sd)) Z += pd rbar += pd * rd sbar += pd * sd tbar += pd * rd * sd return Semiring2(Z, rbar, sbar, tbar)
def small(): # Define the set of valid derivations. D = [ Tree('(0,3)', [Tree('(0,2)', ['(0,1)', '(1,2)']), '(2,3)']), Tree('(0,3)', ['(0,1)', Tree('(1,3)', ['(1,2)', '(2,3)'])]), ] # Define the set of edges and the associated (p,r,s) values that are local # to each edge. E = { ('(0,3)', '(0,2)', '(2,3)'): ( LogVal(10), LogVal(1), LogValVector({ '023': LogVal(1), '23': LogVal(1) }), ), ('(0,2)', '(0,1)', '(1,2)'): ( LogVal(10), LogVal(1), LogValVector({'012': LogVal(1)}), ), ('(0,3)', '(0,1)', '(1,3)'): ( LogVal(20), LogVal(1), LogValVector({'013': LogVal(1)}), ), ('(1,3)', '(1,2)', '(2,3)'): ( LogVal(10), LogVal(3), LogValVector({ '123': LogVal(1), '23': LogVal(1) }), ), ('(0,1)', ): (LogVal(1), LogVal(0), LogValVector()), ('(1,2)', ): (LogVal(1), LogVal(0), LogValVector()), ('(2,3)', ): (LogVal(1), LogVal(0), LogValVector()), } # Define the root of all derivations hypergraph. root = '(0,3)' assert all(d.label() == root for d in D), \ 'All derivations must have a common root node.' assert all(isinstance(k, tuple) for k in E) return root, D, E
def Zero(cls): return cls(LogValVector.Zero(), LogValVector.Zero())
def One(): return Semiring2(LogVal.One(), LogVal.Zero(), LogValVector(), LogValVector())
def fdcheck(E, root, eps=1e-4): """Finite-difference approximation of gradient of numerator and denominator wrt edge probability. """ def fn(W): "Evaluate numerator and denominator of risk." g = Hypergraph() g.root = root for e, [_, r, f] in list(E.items()): p = LogVal(np.exp(f.dot(W).to_real())) g.edge(Semiring1(p, p * r), *e) B = g.inside(Semiring1.Zero) Q = B[g.root] return Q.p.to_real(), Q.r.to_real(), (Q.r / Q.p).to_real() features = {k for [_, _, f] in E.values() for k in f} W = LogValVector({k: LogVal(np.random.uniform(-1, 1)) for k in features}) # For gradient of risk we use <p, p*r, D[p], r*D[p]>, but my code computes # <p, p*r, p*s, p*r*s>, so we pass in s=D[p]/p. # # D[p] = D[exp(f.dot(W))] = exp(s.dot(W))*D[f.dot(W)] = exp(s.dot(W))*f # # therefore D[p]/p = f if 0: E1 = {} for e, [_, r, f] in list(E.items()): p = LogVal(np.exp(f.dot(W).to_real())) E1[e] = (p, r, f * p) #S = secondorder_expectation_semiring(E, root) from hypergraphs.insideout3 import inside_outside_speedup khat, xhat = inside_outside_speedup(E1, root) else: E1 = {} for e, [_, r, f] in list(E.items()): p = LogVal(np.exp(f.dot(W).to_real())) E1[e] = (p, r, f) #S = secondorder_expectation_semiring(E, root) from hypergraphs.insideout import inside_outside_speedup khat, xhat = inside_outside_speedup(E1, root) ad_Z = xhat.s ad_rbar = xhat.t Z = khat.p rbar = khat.r ad_risk = ad_rbar / Z - rbar * ad_Z / Z / Z dd = [] for k in features: was = W[k] W.x[k] = was + LogVal(eps) b_Z, b_rbar, b_risk = fn(W) W.x[k] = was - LogVal(eps) a_Z, a_rbar, a_risk = fn(W) W.x[k] = was fd_rbar = (b_rbar - a_rbar) / (2 * eps) fd_Z = (b_Z - a_Z) / (2 * eps) fd_risk = (b_risk - a_risk) / (2 * eps) dd.append({ 'key': k, 'ad_risk': ad_risk[k].to_real(), 'fd_risk': fd_risk, 'ad_Z': ad_Z[k].to_real(), 'fd_Z': fd_Z, 'ad_rbar': ad_rbar[k].to_real(), 'fd_rbar': fd_rbar }) from arsenal.maths import compare from pandas import DataFrame df = DataFrame(dd) compare(df.fd_Z, df.ad_Z, alphabet=df.key).show() compare(df.fd_rbar, df.ad_rbar, alphabet=df.key).show() compare(df.fd_risk, df.ad_risk, alphabet=df.key).show()