def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5, verbose=1, progressbar=1): """ Check gradient along random directions (a faster alternative to axis-aligned directions). Tim Vieira (2017) "How to test gradient implementations" https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/ """ keys = ['rand_%s' % i for i in range(n_checks)] H = {} G = {} was = w.flatten() w = np.asarray(w.flat) g = np.asarray(g.flat) dim = len(w) for k in (iterview(keys) if progressbar else keys): d = spherical(dim) G[k] = g.dot(d) w[:] = was + eps*d b = func() w[:] = was - eps*d a = func() w[:] = was H[k] = (b-a) / (2*eps) return compare(H, G, verbose=verbose)
def quick_fdcheck(func, w, g, n_checks=20, eps=1e-5, verbose=1, progressbar=1): """ Check gradient along random directions (a faster alternative to axis-aligned directions). Tim Vieira (2017) "How to test gradient implementations" https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/ """ keys = ['rand_%s' % i for i in range(n_checks)] H = {} G = {} was = w.flatten() w = np.asarray(w.flat) g = np.asarray(g.flat) dim = len(w) for k in (iterview(keys) if progressbar else keys): d = spherical(dim) G[k] = g.dot(d) w[:] = was + eps * d b = func() w[:] = was - eps * d a = func() w[:] = was H[k] = (b - a) / (2 * eps) return compare(H, G, verbose=verbose)
def test_gradients(M): J = lambda: M.J(π) π = random_dist(M.S, M.A) r = M.r # The policy gradient theorem fdcheck( J, π, 1 / (1 - M.γ) * M.d(π)[:, None] * M.Q(π), # Note: not Q is not interchangeable with Advantage! ) #.show(title='policy gradient v1.') print('[policy gradient theorem]', ok) # Jacobians of the implicit d(p) and v(p) functions. z = spherical(M.S) _d, d_grad = M.d(π, jac=True) fdcheck(lambda: z @ M.d(π), π, d_grad(z)) #.show(title='implicit d') _v, v_grad = M.V(π, jac=True) fdcheck(lambda: z @ M.V(π), π, v_grad(z)) #.show(title='implicit v') # check that the implicit functions are consistent with the other methods for computing them. assert np.allclose(_d, M.d(π)) assert np.allclose(_v, M.V(π)) # The policy gradient theorem # fdcheck(J, p, # 1/(1-M.γ) * ( # np.einsum('s,sa->sa', M.d(p), M.Advantage(p)) # + (M.d(p) * M.V(p))[:,None] # ) # ) .show(title='policy gradient v1.') # Extract the full Jacobian, flatten SA dim of policy Jdp = np.zeros((M.S, M.S * M.A)) for s in range(M.S): Jdp[s, :] = d_grad(onehot(s, M.S)).flat # The stuff below is the chaining from J to derivatives thru π fdcheck(J, π, 1 / (1 - M.γ) * (np.einsum('sa,sa->s', r, π) @ Jdp + np.einsum('s,sa->sa', M.d(π), r).flatten()) ) #.show(title='policy gradient v2.') # Extract the full Jacobian, flatten SA dim of policy Jvp = np.zeros((M.S, M.S * M.A)) for s in range(M.S): Jvp[s, :] = v_grad(onehot(s, M.S)).flat fdcheck(J, π, M.s0 @ Jvp) #.show(title='policy gradient v2a.') fdcheck(J, π, v_grad(M.s0)) #.show(title='policy gradient v2b.')
def test(): from arsenal.maths import spherical A = np.array([[0.06052216, 1.93689366], [-0.84213754, 1.19210556], [-0.16304193, 1.48302313]]) B = np.array([-0.6249662, -1.85593953, -0.82883232]) A = [[0.22032854, 0.65650877], [0.78670232, 0.24127292], [0.08878384, 0.68543539]] B = [1.47872332, 0.24802336, 1.60300663] # A *= -1 # B *= -1 A = np.array(A) B = np.array(B) for _ in range(10): [a, b, c] = spherical(3) print([a, b, c]) # for i in range(len(B)): # [a,b],c = A[i], B[i] # print('>>>>', a, b, c) H = Halfspaces( [[a, b]], [c], ) r = 10 H.viz([-r, r], [-r, r]) pts = np.random.uniform(-r, r, size=(10, 2)) for x, y in pts: pl.scatter([x], [y], c='g' if H([x, y]) else 'r') pl.title(f'{a} x + {b} y <= {c}') pl.show()
def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5, verbose=1, progressbar=1): "Check gradient along random directions (a faster alternative to axis-aligned directions)." keys = ['rand_%s' % i for i in range(n_checks)] H = {} G = {} was = w.flatten() w = np.asarray(w.flat) g = np.asarray(g.flat) dim = len(w) for k in (iterview(keys) if progressbar else keys): d = spherical(dim) G[k] = g.dot(d) w[:] = was + eps*d b = func() w[:] = was - eps*d a = func() w[:] = was H[k] = (b-a) / (2*eps) return compare(H, G, verbose=verbose)
def main(): from arsenal.integerizer import FeatureHashing from arsenal.maths import spherical bits = 8 D = 2**bits alphabet = FeatureHashing(lambda x: abs(hash(x)), bits) weights = spherical(D) direction = spherical(D) #sentence = 'Papa ate the caviar with the spoon in the park .'.split() sentence = 'Papa ate the caviar with the spoon .'.split() grammar = load_grammar(""" S X . X X X X Papa X ate X the X caviar X with X spoon X in X park """) if 1: # This code branch enumerates all (exponentially many) valid # derivations. root = semiring_enumeration(sentence, grammar) for d in root: print(d) # print(post_process(lambda e: e, d)) assert len(list(root)) == len(set(root)) def binary_features(_, X, Y, Z, i, j, k): return alphabet(['%s -> %s %s [%s,%s,%s]' % (X, Y, Z, i, j, k)]) def unary_features(_, X, Y, i, k): return alphabet(['%s -> %s [%s,%s]' % (X, Y, i, k)]) root = semiring_mert(sentence, grammar, weights, direction, binary_features, unary_features) mert_derivations = [] #set() for x in root: print(x) mert_derivations.append(derivation(x)) assert len(mert_derivations) == len(set(mert_derivations)) #root.draw() # Compare the set of derivations found by the MERT semiring to 'brute force' # linesearch. Note: Linesearch might only find a subset of derivations found # by MERT if the grid isn't fine enough. brute_derivations = set() for step in np.linspace(-20, 20, 1000): root = semiring_linesearch(sentence, grammar, weights, step, direction, binary_features, unary_features) d = derivation(root) brute_derivations.add(d) # NOTE: need to take upper hull of mert (so it's currently an over estimate) print('mert:', len(mert_derivations)) print('brute:', len(brute_derivations)) assert brute_derivations.issubset(mert_derivations)
def main(): from arsenal.alphabet import Alphabet from arsenal.maths import spherical D = 100 alphabet = Alphabet(random_int=D) weights = spherical(D) direction = spherical(D) #sentence = 'Papa ate the caviar with the spoon in the park .'.split() sentence = 'Papa ate the caviar with the spoon .'.split() if 0: grammar = """ S S . S NP VP NP D N NP NP PP VP V NP VP VP PP PP P NP NP Papa N caviar N spoon N park V ate P with P in D the """ else: grammar = """ S X . X X X X Papa X ate X the X caviar X with X spoon X in X park """ rhs = load_grammar(grammar) if 1: # This code branch enumerates all (exponentially many) valid # derivations. root = semiring_enumeration(sentence, rhs) for d in root.x: print(post_process(d)) assert len(root.x) == len(set(root.x)) def binary_features(sentence,X,Y,Z,i,j,k): return alphabet.map(['%s -> %s %s [%s,%s,%s]' % (X,Y,Z,i,j,k)]) def unary_features(sentence,X,Y,i,k): return alphabet.map(['%s -> %s [%s,%s]' % (X,Y,i,k)]) root = semiring_mert(sentence, rhs, weights, direction, binary_features, unary_features) mert_derivations = [] #set() for x in root.points: print(x) d = x.derivation() mert_derivations.append(d) assert len(mert_derivations) == len(set(mert_derivations)) #root.draw() # Compare the set of derivations found by the MERT semiring to 'brute force' # linesearch. Note: Linesearch might only find a subset of derivations found # by MERT if the grid isn't fine enough. brute_derivations = set() for step in np.linspace(-20,20,1000): root = semiring_linesearch(sentence, rhs, weights, step, direction, binary_features, unary_features) d = root.derivation() brute_derivations.add(d) # NOTE: need to take upper hull of mert (so it's currently an over estimate) print('mert:', len(mert_derivations)) print('brute:', len(brute_derivations)) assert brute_derivations.issubset(mert_derivations)