def test_ddp_sorting(): beta = 0.95 # Sorted s_indices = [0, 0, 1] a_indices = [0, 1, 0] a_indptr = [0, 2, 3] R = [0, 1, 2] Q = [(1, 0), (1 / 2, 1 / 2), (0, 1)] Q_sparse = sparse.csr_matrix(Q) # Shuffled s_indices_shuffled = [0, 1, 0] a_indices_shuffled = [0, 0, 1] R_shuffled = [0, 2, 1] Q_shuffled = [(1, 0), (0, 1), (1 / 2, 1 / 2)] Q_shuffled_sparse = sparse.csr_matrix(Q_shuffled) ddp0 = DiscreteDP(R, Q, beta, s_indices, a_indices) ddp_sparse = DiscreteDP(R, Q_sparse, beta, s_indices, a_indices) ddp_shuffled = DiscreteDP(R_shuffled, Q_shuffled, beta, s_indices_shuffled, a_indices_shuffled) ddp_shuffled_sparse = DiscreteDP(R_shuffled, Q_shuffled_sparse, beta, s_indices_shuffled, a_indices_shuffled) for ddp in [ddp0, ddp_sparse, ddp_shuffled, ddp_shuffled_sparse]: assert_array_equal(ddp.s_indices, s_indices) assert_array_equal(ddp.a_indices, a_indices) assert_array_equal(ddp.a_indptr, a_indptr) assert_array_equal(ddp.R, R) if sparse.issparse(ddp.Q): ddp_Q = ddp.Q.toarray() else: ddp_Q = ddp.Q assert_array_equal(ddp_Q, Q)
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) methods = ['vi', 'pi', 'mpi', 'lp'] for ddp in [ddp0, ddp1, ddp2]: for method in methods: if method == 'lp' and ddp._sparse: assert_raises(NotImplementedError, ddp.solve, method=method, v_init=v_init) else: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def test_ddp_to_sa_and_to_product(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [-np.inf, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n Q[0, 0, 0] = 0 Q[0, 0, 1] = 2/n beta = 0.95 sparse_R = np.array([0, 1, 1, 0, 1]) _Q = np.full((5, 3), 1/3) _Q[0, 0] = 0 _Q[0, 1] = 2/n sparse_Q = sparse.coo_matrix(_Q) ddp = DiscreteDP(R, Q, beta) ddp_sa = ddp.to_sa_pair_form() ddp_sa2 = ddp_sa.to_sa_pair_form() ddp_sa3 = ddp.to_sa_pair_form(sparse=False) ddp2 = ddp_sa.to_product_form() ddp3 = ddp_sa2.to_product_form() ddp4 = ddp.to_product_form() # make sure conversion worked for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: assert_allclose(ddp_s.R, sparse_R) # allcose doesn't work on sparse np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15 assert_allclose(ddp_s.beta, beta) # these two will have probability 0 in state 2, action 0 b/c # of the infeasiability in R funky_Q = np.empty((n, m, n)) funky_Q[:] = 1/n funky_Q[0, 0, 0] = 0 funky_Q[0, 0, 1] = 2/n funky_Q[2, 0, :] = 0 for ddp_f in [ddp2, ddp3]: assert_allclose(ddp_f.R, ddp.R) assert_allclose(ddp_f.Q, funky_Q) assert_allclose(ddp_f.beta, ddp.beta) # this one is just the original one. assert_allclose(ddp4.R, ddp.R) assert_allclose(ddp4.Q, ddp.Q) assert_allclose(ddp4.beta, ddp.beta) for method in ["pi", "vi", "mpi"]: sol1 = ddp.solve(method=method) for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: sol2 = ddp_other.solve(method=method) for k in ["v", "sigma", "num_iter"]: assert_allclose(sol1[k], sol2[k])
def prices_to_capital_stock(am, r): w = r_to_w(r) am.set_prices(r, w) aiyagari_ddp = DiscreteDP(am.R, am.Q, β) # Compute the optimal policy results = aiyagari_ddp.solve(method='policy_iteration') # Compute the stationary distribution stationary_probs = results.mc.stationary_distributions[0] # Extract the marginal distribution for assets asset_probs = asset_marginal(stationary_probs, am.a_size, am.z_size) # Return K return np.sum(asset_probs * am.a_vals)
def voter_dpp(self, etah, etal, tauy, pipar, prg, prb, Pm, pol_br, etam, beta, delta): R = self.rewardv(etam, beta) Q = self.populate_Q(etah, etal, tauy, pipar, prg, prb, Pm, pol_br) dpp = DiscreteDP(R=R, Q=Q, beta=delta, s_indices=self.state_indices, a_indices=self.action_indices) results = dpp.solve(method='policy_iteration') sol = DPPsol(results, R, Q) return (sol)
def test_ddp_beta_1_not_implemented_error(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n beta = 1 ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] for ddp in [ddp0, ddp1, ddp2]: assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) for method in solution_methods: assert_raises(NotImplementedError, getattr(ddp, method))
def test_ddp_beta_1_not_implemented_error(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 1 ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] for ddp in [ddp0, ddp1, ddp2]: assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) for method in solution_methods: assert_raises(NotImplementedError, getattr(ddp, method))
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: for method in methods: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: for method in methods: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def setUp(self): # From Puterman 2005, Section 3.2, Section 4.6.1 # "single-product stochastic inventory control" s_indices = [0, 0, 0, 0, 1, 1, 1, 2, 2, 3] a_indices = [0, 1, 2, 3, 0, 1, 2, 0, 1, 0] R = [0., -1., -2., -5., 5., 0., -3., 6., -1., 5.] Q = [[1., 0., 0., 0.], [0.75, 0.25, 0., 0.], [0.25, 0.5, 0.25, 0.], [0., 0.25, 0.5, 0.25], [0.75, 0.25, 0., 0.], [0.25, 0.5, 0.25, 0.], [0., 0.25, 0.5, 0.25], [0.25, 0.5, 0.25, 0.], [0., 0.25, 0.5, 0.25], [0., 0.25, 0.5, 0.25]] beta = 1 self.ddp = DiscreteDP(R, Q, beta, s_indices, a_indices)
def test_ddp_beta_1_not_implemented_error(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 1 ddp0 = DiscreteDP(R, Q, beta) s_indices, a_indices = np.where(R > -np.inf) R_sa = R[s_indices, a_indices] Q_sa = Q[s_indices, a_indices] ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) Q_sa_sp = sparse.csr_matrix(Q_sa) ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] for ddp in [ddp0, ddp1, ddp2]: assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) for method in solution_methods: assert_raises(NotImplementedError, getattr(ddp, method))
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) s_indices, a_indices = np.where(R > -np.inf) R_sa = R[s_indices, a_indices] Q_sa = Q[s_indices, a_indices] ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: for method in methods: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def prices_to_capital_stock(am, r): """ Map prices to the induced level of capital stock. Parameters: ---------- am : Household An instance of an aiyagari_household.Household r : float The interest rate """ w = r_to_w(r) am.set_prices(r, w) aiyagari_ddp = DiscreteDP(am.R, am.Q, beta) # Compute the optimal policy results = aiyagari_ddp.solve(method='policy_iteration') # Compute the stationary distribution stationary_probs = results.mc.stationary_distributions[0] # Extract the marginal distribution for assets asset_probs = asset_marginal(stationary_probs, am.a_size, am.z_size) # Return K return np.sum(asset_probs * am.a_vals)
def setUp(self): # From Puterman 2005, Section 3.1 beta = 0.95 # Formulation with R: n x m, Q: n x m x n n, m = 2, 2 # number of states, number of actions R = [[5, 10], [-1, -np.inf]] Q = np.empty((n, m, n)) Q[0, 0, :] = 0.5, 0.5 Q[0, 1, :] = 0, 1 Q[1, :, :] = 0, 1 ddp0 = DiscreteDP(R, Q, beta) # Formulation with state-action pairs L = 3 # number of state-action pairs s_indices = [0, 0, 1] a_indices = [0, 1, 0] R_sa = [R[0][0], R[0][1], R[1][0]] Q_sa = sparse.lil_matrix((L, n)) Q_sa[0, :] = Q[0, 0, :] Q_sa[1, :] = Q[0, 1, :] Q_sa[2, :] = Q[1, 0, :] ddp_sa_sparse = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) ddp_sa_dense = \ DiscreteDP(R_sa, Q_sa.toarray(), beta, s_indices, a_indices) self.ddps = [ddp0, ddp_sa_sparse, ddp_sa_dense] for ddp in self.ddps: ddp.max_iter = 200 self.epsilon = 1e-2 # Analytical solution for beta > 10/11, Example 6.2.1 self.v_star = [(5 - 5.5 * beta) / ((1 - 0.5 * beta) * (1 - beta)), -1 / (1 - beta)] self.sigma_star = [0, 0]
def test_ddp_to_sa_and_to_product(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [-np.inf, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n Q[0, 0, 0] = 0 Q[0, 0, 1] = 2 / n beta = 0.95 sparse_R = np.array([0, 1, 1, 0, 1]) _Q = np.full((5, 3), 1 / 3) _Q[0, 0] = 0 _Q[0, 1] = 2 / n sparse_Q = sparse.coo_matrix(_Q) ddp = DiscreteDP(R, Q, beta) ddp_sa = ddp.to_sa_pair_form() ddp_sa2 = ddp_sa.to_sa_pair_form() ddp_sa3 = ddp.to_sa_pair_form(sparse=False) ddp2 = ddp_sa.to_product_form() ddp3 = ddp_sa2.to_product_form() ddp4 = ddp.to_product_form() # make sure conversion worked for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: assert_allclose(ddp_s.R, sparse_R) # allcose doesn't work on sparse np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15 assert_allclose(ddp_s.beta, beta) # these two will have probability 0 in state 2, action 0 b/c # of the infeasiability in R funky_Q = np.empty((n, m, n)) funky_Q[:] = 1 / n funky_Q[0, 0, 0] = 0 funky_Q[0, 0, 1] = 2 / n funky_Q[2, 0, :] = 0 for ddp_f in [ddp2, ddp3]: assert_allclose(ddp_f.R, ddp.R) assert_allclose(ddp_f.Q, funky_Q) assert_allclose(ddp_f.beta, ddp.beta) # this one is just the original one. assert_allclose(ddp4.R, ddp.R) assert_allclose(ddp4.Q, ddp.Q) assert_allclose(ddp4.beta, ddp.beta) for method in ["pi", "vi", "mpi"]: sol1 = ddp.solve(method=method) for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: sol2 = ddp_other.solve(method=method) for k in ["v", "sigma", "num_iter"]: assert_allclose(sol1[k], sol2[k])
def prices_to_capital_stock(am, r): w = r_to_w(r) am.set_prices(r, w) aiyagari_ddp = DiscreteDP(am.R, am.Q, β) # Compute the optimal policy results = aiyagari_ddp.solve(method='policy_iteration') # Compute the stationary distribution stationary_probs = results.mc.stationary_distributions[0] # Extract the marginal distribution for assets asset_probs = asset_marginal(stationary_probs, am.a_size, am.z_size) # Return K return np.sum(asset_probs * am.a_vals) am = Household(a_max=30) am_ddp = DiscreteDP(am.R, am.Q, am.β) num_points = 20 r_vals = np.linspace(0.005, 0.04, num_points) k_vals = np.empty(num_points) for i, r in enumerate(r_vals): k_vals[i] = prices_to_capital_stock(am, r) fig, ax = plt.subplots() ax.plot(k_vals, r_vals, lw=2, alpha=0.6, label='Supply') ax.plot(k_vals, rd(k_vals), lw=2, alpha=0.6, label='Demand') ax.set_xlabel('K') ax.set_ylabel('r') ax.legend()
aiyagari_ddp = DiscreteDP(am.R, am.Q, beta) # Compute the optimal policy results = aiyagari_ddp.solve(method='policy_iteration') # Compute the stationary distribution stationary_probs = results.mc.stationary_distributions[0] # Extract the marginal distribution for assets asset_probs = asset_marginal(stationary_probs, am.a_size, am.z_size) # Return K return np.sum(asset_probs * am.a_vals) # Create an instance of Household am = Household(a_max=20) # Use the instance to build a discrete dynamic program am_ddp = DiscreteDP(am.R, am.Q, am.beta) # Create a grid of r values at which to compute demand and supply of capital num_points = 20 r_vals = np.linspace(0.005, 0.04, num_points) # Compute supply of capital k_vals = np.empty(num_points) for i, r in enumerate(r_vals): k_vals[i] = prices_to_capital_stock(am, r) # Plot against demand for capital by firms fig, ax = plt.subplots(figsize=(11, 8)) ax.plot(k_vals, r_vals, lw=2, alpha=0.6, label='supply of capital') ax.plot(k_vals, rd(k_vals), lw=2, alpha=0.6, label='demand for capital') ax.grid()
aiyagari_ddp = DiscreteDP(am.R, am.Q, β) # Compute the optimal policy results = aiyagari_ddp.solve(method='policy_iteration') # Compute the stationary distribution stationary_probs = results.mc.stationary_distributions[0] # Extract the marginal distribution for assets asset_probs = asset_marginal(stationary_probs, am.a_size, am.z_size) # Return K return np.sum(asset_probs * am.a_vals) # Create an instance of Household am = Household(a_max=30) # Use the instance to build a discrete dynamic program am_ddp = DiscreteDP(am.R, am.Q, am.β) # Create a grid of r values at which to compute demand and supply of capital num_points = 30 r_vals = np.linspace(0.005, 0.04, num_points) # Compute supply of capital k_vals = np.empty(num_points) for i, r in enumerate(r_vals): k_vals[i] = prices_to_capital_stock(am, r) # Plot against demand for capital by firms fig, ax = plt.subplots(figsize=(11, 8)) ax.plot(k_vals, r_vals, lw=2, alpha=0.6, color='b', label='supply of capital') ax.plot(k_vals, rd(k_vals),
import numpy as np import quantecon as qe import matplotlib.pyplot as plt from aiyagari_household import Household from quantecon.markov import DiscreteDP # Example prices r = 0.03 w = 0.956 # Create an instance of Household am = Household(a_max=20, r=r, w=w) # Use the instance to build a discrete dynamic program am_ddp = DiscreteDP(am.R, am.Q, am.beta) # Solve using policy function iteration results = am_ddp.solve(method='policy_iteration') # Simplify names z_size, a_size = am.z_size, am.a_size z_vals, a_vals = am.z_vals, am.a_vals n = a_size * z_size # Get all optimal actions across the set of a indices with z fixed in each row a_star = np.empty((z_size, a_size)) for s_i in range(n): a_i = s_i // z_size z_i = s_i % z_size a_star[z_i, a_i] = a_vals[results.sigma[s_i]]