def value_iteration(model, grid={}, tol=1e-6, maxit=500, maxit_howard=20, verbose=False): """ Solve for the value function and associated Markov decision rule by iterating over the value function. Parameters: ----------- model : "dtmscc" model. Must contain a 'felicity' function. grid : grid options dr : decision rule to evaluate Returns: -------- mdr : Markov decision rule The solved decision rule/policy function mdrv: decision rule The solved value function """ transition = model.functions['transition'] felicity = model.functions['felicity'] controls_lb = model.functions['controls_lb'] controls_ub = model.functions['controls_ub'] parms = model.calibration['parameters'] discount = model.calibration['beta'] x0 = model.calibration['controls'] m0 = model.calibration['exogenous'] s0 = model.calibration['states'] r0 = felicity(m0, s0, x0, parms) process = model.exogenous dprocess = process.discretize() n_ms = dprocess.n_nodes() # number of exogenous states n_mv = dprocess.n_inodes( 0) # this assume number of integration nodes is constant endo_grid = model.get_grid(**grid) exo_grid = dprocess.grid mdrv = DecisionRule(exo_grid, endo_grid) grid = mdrv.endo_grid.nodes() N = grid.shape[0] n_x = len(x0) mdr = constant_policy(model) controls_0 = np.zeros((n_ms, N, n_x)) for i_ms in range(n_ms): controls_0[i_ms, :, :] = mdr.eval_is(i_ms, grid) values_0 = np.zeros((n_ms, N, 1)) # for i_ms in range(n_ms): # values_0[i_ms, :, :] = mdrv(i_ms, grid) mdr = DecisionRule(exo_grid, endo_grid) # mdr.set_values(controls_0) # THIRD: value function iterations until convergence it = 0 err_v = 100 err_v_0 = 0 gain_v = 0.0 err_x = 100 err_x_0 = 0 tol_x = 1e-5 tol_v = 1e-7 itprint = IterationsPrinter( ('N', int), ('Error_V', float), ('Gain_V', float), ('Error_x', float), ('Gain_x', float), ('Eval_n', int), ('Time', float), verbose=verbose) itprint.print_header('Start value function iterations.') while (it < maxit) and (err_v > tol or err_x > tol_x): t_start = time.time() it += 1 mdr.set_values(controls_0) if it > 2: ev = evaluate_policy(model, mdr, initial_guess=mdrv, verbose=False, details=True) else: ev = evaluate_policy(model, mdr, verbose=False, details=True) mdrv = ev.solution for i_ms in range(n_ms): values_0[i_ms, :, :] = mdrv.eval_is(i_ms, grid) values = values_0.copy() controls = controls_0.copy() for i_m in range(n_ms): m = dprocess.node(i_m) for n in range(N): s = grid[n, :] x = controls[i_m, n, :] lb = controls_lb(m, s, parms) ub = controls_ub(m, s, parms) bnds = [e for e in zip(lb, ub)] def valfun(xx): return -choice_value(transition, felicity, i_m, s, xx, mdrv, dprocess, parms, discount)[0] res = scipy.optimize.minimize(valfun, x, bounds=bnds) controls[i_m, n, :] = res.x values[i_m, n, 0] = -valfun(x) # compute error, update value and dr err_x = abs(controls - controls_0).max() err_v = abs(values - values_0).max() t_end = time.time() elapsed = t_end - t_start values_0 = values controls_0 = controls gain_x = err_x / err_x_0 gain_v = err_v / err_v_0 err_x_0 = err_x err_v_0 = err_v itprint.print_iteration(N=it, Error_V=err_v, Gain_V=gain_v, Error_x=err_x, Gain_x=gain_x, Eval_n=ev.iterations, Time=elapsed) itprint.print_finished() mdr = DecisionRule(exo_grid, endo_grid) mdr.set_values(controls) mdrv.set_values(values_0) return mdr, mdrv
def simulate( model: Model, dr: DecisionRule, *, process=None, N=1, T=40, s0=None, i0=None, m0=None, driving_process=None, seed=42, stochastic=True, ): """Simulate a model using the specified decision rule. Parameters ---------- model: Model dr: decision rule process: s0: ndarray initial state where all simulations start driving_process: ndarray realization of exogenous driving process (drawn randomly if None) N: int number of simulations T: int horizon for the simulations seed: int used to initialize the random number generator. Use it to replicate exact same results among simulations discard: boolean (False) if True, then all simulations containing at least one non finite value are discarded Returns ------- xarray.DataArray: returns a ``T x N x n_v`` array where ``n_v`` is the number of variables. """ if isinstance(dr, AlgoResult): dr = dr.dr calib = model.calibration parms = numpy.array(calib["parameters"]) if s0 is None: s0 = calib["states"] n_x = len(model.symbols["controls"]) n_s = len(model.symbols["states"]) s_simul = numpy.zeros((T, N, n_s)) x_simul = numpy.zeros((T, N, n_x)) s_simul[0, :, :] = s0[None, :] # are we simulating a markov chain or a continuous process ? if driving_process is not None: if len(driving_process.shape) == 3: m_simul = driving_process sim_type = "continuous" if m0 is None: m0 = model.calibration["exogenous"] x_simul[0, :, :] = dr.eval_ms(m0[None, :], s0[None, :])[0, :] elif len(driving_process.shape) == 2: i_simul = driving_process nodes = dr.exo_grid.nodes m_simul = nodes[i_simul] # inds = i_simul.ravel() # m_simul = np.reshape( np.concatenate( [nodes[i,:][None,:] for i in inds.ravel()], axis=0 ), inds.shape + (-1,) ) sim_type = "discrete" x_simul[0, :, :] = dr.eval_is(i0, s0[None, :])[0, :] else: raise Exception("Incorrect specification of driving values.") m0 = m_simul[0, :, :] else: from dolo.numeric.processes import DiscreteProcess if process is None: if hasattr(dr, "dprocess") and hasattr(dr.dprocess, "simulate"): process = dr.dprocess else: process = model.exogenous # detect type of simulation if not isinstance(process, DiscreteProcess): sim_type = "continuous" else: sim_type = "discrete" if sim_type == "discrete": if i0 is None: i0 = 0 dp = process m_simul = dp.simulate(N, T, i0=i0, stochastic=stochastic) i_simul = find_index(m_simul, dp.values) m0 = dp.node(i0) x0 = dr.eval_is(i0, s0[None, :])[0, :] else: m_simul = process.simulate(N, T, m0=m0, stochastic=stochastic) if isinstance(m_simul, xr.DataArray): m_simul = m_simul.data sim_type = "continuous" if m0 is None: m0 = model.calibration["exogenous"] x0 = dr.eval_ms(m0[None, :], s0[None, :])[0, :] x_simul[0, :, :] = x0[None, :] f = model.functions["arbitrage"] g = model.functions["transition"] numpy.random.seed(seed) mp = m0 for i in range(T): m = m_simul[i, :, :] s = s_simul[i, :, :] if sim_type == "discrete": i_m = i_simul[i, :] xx = [ dr.eval_is(i_m[ii], s[ii, :][None, :])[0, :] for ii in range(s.shape[0]) ] x = np.row_stack(xx) else: x = dr.eval_ms(m, s) x_simul[i, :, :] = x ss = g(mp, s, x, m, parms) if i < T - 1: s_simul[i + 1, :, :] = ss mp = m if "auxiliary" not in model.functions: # TODO: find a better test than this l = [s_simul, x_simul] varnames = model.symbols["states"] + model.symbols["controls"] else: aux = model.functions["auxiliary"] a_simul = aux( m_simul.reshape((N * T, -1)), s_simul.reshape((N * T, -1)), x_simul.reshape((N * T, -1)), parms, ) a_simul = a_simul.reshape(T, N, -1) l = [m_simul, s_simul, x_simul, a_simul] varnames = ( model.symbols["exogenous"] + model.symbols["states"] + model.symbols["controls"] + model.symbols["auxiliaries"] ) simul = numpy.concatenate(l, axis=2) if sim_type == "discrete": varnames = ["_i_m"] + varnames simul = np.concatenate([i_simul[:, :, None], simul], axis=2) data = xr.DataArray( simul, dims=["T", "N", "V"], coords={"T": range(T), "N": range(N), "V": varnames}, ) return data
def value_iteration(model, grid={}, tol=1e-6, maxit=500, maxit_howard=20, verbose=False, details=True): """ Solve for the value function and associated Markov decision rule by iterating over the value function. Parameters: ----------- model : "dtmscc" model. Must contain a 'felicity' function. grid : grid options dr : decision rule to evaluate Returns: -------- mdr : Markov decision rule The solved decision rule/policy function mdrv: decision rule The solved value function """ transition = model.functions['transition'] felicity = model.functions['felicity'] controls_lb = model.functions['controls_lb'] controls_ub = model.functions['controls_ub'] parms = model.calibration['parameters'] discount = model.calibration['beta'] x0 = model.calibration['controls'] m0 = model.calibration['exogenous'] s0 = model.calibration['states'] r0 = felicity(m0, s0, x0, parms) process = model.exogenous dprocess = process.discretize() n_ms = dprocess.n_nodes() # number of exogenous states n_mv = dprocess.n_inodes( 0) # this assume number of integration nodes is constant endo_grid = model.get_grid(**grid) exo_grid = dprocess.grid mdrv = DecisionRule(exo_grid, endo_grid) grid = mdrv.endo_grid.nodes() N = grid.shape[0] n_x = len(x0) mdr = constant_policy(model) controls_0 = np.zeros((n_ms, N, n_x)) for i_ms in range(n_ms): controls_0[i_ms, :, :] = mdr.eval_is(i_ms, grid) values_0 = np.zeros((n_ms, N, 1)) # for i_ms in range(n_ms): # values_0[i_ms, :, :] = mdrv(i_ms, grid) mdr = DecisionRule(exo_grid, endo_grid) # mdr.set_values(controls_0) # THIRD: value function iterations until convergence it = 0 err_v = 100 err_v_0 = 0 gain_v = 0.0 err_x = 100 err_x_0 = 0 tol_x = 1e-5 tol_v = 1e-7 itprint = IterationsPrinter( ('N', int), ('Error_V', float), ('Gain_V', float), ('Error_x', float), ('Gain_x', float), ('Eval_n', int), ('Time', float), verbose=verbose) itprint.print_header('Start value function iterations.') while (it < maxit) and (err_v > tol or err_x > tol_x): t_start = time.time() it += 1 mdr.set_values(controls_0) if it > 2: ev = evaluate_policy( model, mdr, initial_guess=mdrv, verbose=False, details=True) else: ev = evaluate_policy(model, mdr, verbose=False, details=True) mdrv = ev.solution for i_ms in range(n_ms): values_0[i_ms, :, :] = mdrv.eval_is(i_ms, grid) values = values_0.copy() controls = controls_0.copy() for i_m in range(n_ms): m = dprocess.node(i_m) for n in range(N): s = grid[n, :] x = controls[i_m, n, :] lb = controls_lb(m, s, parms) ub = controls_ub(m, s, parms) bnds = [e for e in zip(lb, ub)] def valfun(xx): return -choice_value(transition, felicity, i_m, s, xx, mdrv, dprocess, parms, discount)[0] res = scipy.optimize.minimize(valfun, x, bounds=bnds) controls[i_m, n, :] = res.x values[i_m, n, 0] = -valfun(x) # compute error, update value and dr err_x = abs(controls - controls_0).max() err_v = abs(values - values_0).max() t_end = time.time() elapsed = t_end - t_start values_0 = values controls_0 = controls gain_x = err_x / err_x_0 gain_v = err_v / err_v_0 err_x_0 = err_x err_v_0 = err_v itprint.print_iteration( N=it, Error_V=err_v, Gain_V=gain_v, Error_x=err_x, Gain_x=gain_x, Eval_n=ev.iterations, Time=elapsed) itprint.print_finished() mdr = DecisionRule(exo_grid, endo_grid) mdr.set_values(controls) mdrv.set_values(values_0) if not details: return mdr, mdrv else: return ValueIterationResult( mdr, #:AbstractDecisionRule mdrv, #:AbstractDecisionRule it, #:Int dprocess, #:AbstractDiscretizedProcess err_x<tol_x, #:Bool tol_x, #:Float64 err_x, #:Float64 err_v<tol_v, #:Bool tol_v, #:Float64 err_v, #:Float64 None, #log: #:ValueIterationLog None #trace: #:Union{Nothing,IterationTrace )
def value_iteration( model: Model, *, verbose: bool = False, # details: bool = True, # tol=1e-6, maxit=500, maxit_howard=20, ) -> ValueIterationResult: """ Solve for the value function and associated Markov decision rule by iterating over the value function. Parameters: ----------- model : model to be solved dr : decision rule to evaluate Returns: -------- mdr : Markov decision rule The solved decision rule/policy function mdrv: decision rule The solved value function """ transition = model.functions["transition"] felicity = model.functions["felicity"] controls_lb = model.functions["controls_lb"] controls_ub = model.functions["controls_ub"] parms = model.calibration["parameters"] discount = model.calibration["beta"] x0 = model.calibration["controls"] m0 = model.calibration["exogenous"] s0 = model.calibration["states"] r0 = felicity(m0, s0, x0, parms) process = model.exogenous grid, dprocess = model.discretize() endo_grid = grid["endo"] exo_grid = grid["exo"] n_ms = dprocess.n_nodes # number of exogenous states n_mv = dprocess.n_inodes( 0) # this assume number of integration nodes is constant mdrv = DecisionRule(exo_grid, endo_grid) s = mdrv.endo_grid.nodes N = s.shape[0] n_x = len(x0) mdr = constant_policy(model) controls_0 = np.zeros((n_ms, N, n_x)) for i_ms in range(n_ms): controls_0[i_ms, :, :] = mdr.eval_is(i_ms, s) values_0 = np.zeros((n_ms, N, 1)) # for i_ms in range(n_ms): # values_0[i_ms, :, :] = mdrv(i_ms, grid) mdr = DecisionRule(exo_grid, endo_grid) # mdr.set_values(controls_0) # THIRD: value function iterations until convergence it = 0 err_v = 100 err_v_0 = 0 gain_v = 0.0 err_x = 100 err_x_0 = 0 tol_x = 1e-5 tol_v = 1e-7 itprint = IterationsPrinter( ("N", int), ("Error_V", float), ("Gain_V", float), ("Error_x", float), ("Gain_x", float), ("Eval_n", int), ("Time", float), verbose=verbose, ) itprint.print_header("Start value function iterations.") while (it < maxit) and (err_v > tol or err_x > tol_x): t_start = time.time() it += 1 mdr.set_values(controls_0) if it > 2: ev = evaluate_policy(model, mdr, dr0=mdrv, verbose=False, details=True) else: ev = evaluate_policy(model, mdr, verbose=False, details=True) mdrv = ev.solution for i_ms in range(n_ms): values_0[i_ms, :, :] = mdrv.eval_is(i_ms, s) values = values_0.copy() controls = controls_0.copy() for i_m in range(n_ms): m = dprocess.node(i_m) for n in range(N): s_ = s[n, :] x = controls[i_m, n, :] lb = controls_lb(m, s_, parms) ub = controls_ub(m, s_, parms) bnds = [e for e in zip(lb, ub)] def valfun(xx): return -choice_value( transition, felicity, i_m, s_, xx, mdrv, dprocess, parms, discount, )[0] res = scipy.optimize.minimize(valfun, x, bounds=bnds) controls[i_m, n, :] = res.x values[i_m, n, 0] = -valfun(x) # compute error, update value and dr err_x = abs(controls - controls_0).max() err_v = abs(values - values_0).max() t_end = time.time() elapsed = t_end - t_start values_0 = values controls_0 = controls gain_x = err_x / err_x_0 gain_v = err_v / err_v_0 err_x_0 = err_x err_v_0 = err_v itprint.print_iteration( N=it, Error_V=err_v, Gain_V=gain_v, Error_x=err_x, Gain_x=gain_x, Eval_n=ev.iterations, Time=elapsed, ) itprint.print_finished() mdr = DecisionRule(exo_grid, endo_grid) mdr.set_values(controls) mdrv.set_values(values_0) if not details: return mdr, mdrv else: return ValueIterationResult( mdr, #:AbstractDecisionRule mdrv, #:AbstractDecisionRule it, #:Int dprocess, #:AbstractDiscretizedProcess err_x < tol_x, #:Bool tol_x, #:Float64 err_x, #:Float64 err_v < tol_v, #:Bool tol_v, #:Float64 err_v, #:Float64 None, # log: #:ValueIterationLog None, # trace: #:Union{Nothing,IterationTrace )
def improved_time_iteration( model: Model, *, dr0: DecisionRule = None, # verbose: bool = True, # details: bool = True, # ignore_constraints=False, # method="jac", dprocess=None, interp_method="cubic", mu=2, maxbsteps=10, tol=1e-8, smaxit=500, maxit=1000, compute_radius=False, invmethod="iti", # obsolete complementarities=None ) -> ImprovedTimeIterationResult: # obsolete if complementarities is not None: # TODO: warning pass else: complementarities = not ignore_constraints def vprint(*args, **kwargs): if verbose: print(*args, **kwargs) itprint = IterationsPrinter( ("N", int), ("f_x", float), ("d_x", float), ("Time_residuals", float), ("Time_inversion", float), ("Time_search", float), ("Lambda_0", float), ("N_invert", int), ("N_search", int), verbose=verbose, ) itprint.print_header("Start Improved Time Iterations.") f = model.functions["arbitrage"] g = model.functions["transition"] x_lb = model.functions["arbitrage_lb"] x_ub = model.functions["arbitrage_ub"] parms = model.calibration["parameters"] grid, dprocess_ = model.discretize() if dprocess is None: dprocess = dprocess_ endo_grid = grid["endo"] exo_grid = grid["exo"] n_m = max(dprocess.n_nodes, 1) n_s = len(model.symbols["states"]) if interp_method in ("cubic", "linear"): ddr = DecisionRule( exo_grid, endo_grid, dprocess=dprocess, interp_method=interp_method ) ddr_filt = DecisionRule( exo_grid, endo_grid, dprocess=dprocess, interp_method=interp_method ) else: raise Exception("Unsupported interpolation method.") # s = ddr.endo_grid s = endo_grid.nodes N = s.shape[0] n_x = len(model.symbols["controls"]) x0 = ( model.calibration["controls"][ None, None, ] .repeat(n_m, axis=0) .repeat(N, axis=1) ) if dr0 is not None: for i_m in range(n_m): x0[i_m, :, :] = dr0.eval_is(i_m, s) ddr.set_values(x0) steps = 0.5 ** numpy.arange(maxbsteps) lb = x0.copy() ub = x0.copy() for i_m in range(n_m): m = dprocess.node(i_m) lb[i_m, :] = x_lb(m, s, parms) ub[i_m, :] = x_ub(m, s, parms) x = x0 # both affect the precision ddr.set_values(x) ## memory allocation n_im = dprocess.n_inodes(0) # we assume it is constant for now jres = numpy.zeros((n_m, n_im, N, n_x, n_x)) S_ij = numpy.zeros((n_m, n_im, N, n_s)) for it in range(maxit): jres[...] = 0.0 S_ij[...] = 0.0 t1 = time.time() # compute derivatives and residuals: # res: residuals # dres: derivatives w.r.t. x # jres: derivatives w.r.t. ~x # fut_S: future states ddr.set_values(x) # # ub[ub>100000] = 100000 # lb[lb<-100000] = -100000 # # sh_x = x.shape # rr =euler_residuals(f,g,s,x,ddr,dp,parms, diff=False, with_jres=False,set_dr=True) # print(rr.shape) # # from iti.fb import smooth_ # jj = smooth_(rr, x, lb, ub) # # print("Errors with complementarities") # print(abs(jj.max())) # # exit() # from dolo.numeric.optimize.newton import SerialDifferentiableFunction sh_x = x.shape ff = SerialDifferentiableFunction( lambda u: euler_residuals( f, g, s, u.reshape(sh_x), ddr, dprocess, parms, diff=False, with_jres=False, set_dr=False, ).reshape((-1, sh_x[2])) ) res, dres = ff(x.reshape((-1, sh_x[2]))) res = res.reshape(sh_x) dres = dres.reshape((sh_x[0], sh_x[1], sh_x[2], sh_x[2])) junk, jres, fut_S = euler_residuals( f, g, s, x, ddr, dprocess, parms, diff=False, with_jres=True, set_dr=False, jres=jres, S_ij=S_ij, ) # if there are complementerities, we modify derivatives if complementarities: res, dres, jres = smooth(res, dres, jres, x - lb) res[...] *= -1 dres[...] *= -1 jres[...] *= -1 res, dres, jres = smooth(res, dres, jres, ub - x, pos=-1.0) res[...] *= -1 dres[...] *= -1 jres[...] *= -1 err_0 = abs(res).max() # premultiply by A jres[...] *= -1.0 for i_m in range(n_m): for j_m in range(n_im): M = jres[i_m, j_m, :, :, :] X = dres[i_m, :, :, :].copy() sol = solve_tensor(X, M) t2 = time.time() # new version if invmethod == "gmres": ddx = solve_gu(dres.copy(), res.copy()) L = Operator(jres, fut_S, ddr_filt) n0 = L.counter L.addid = True ttol = err_0 / 100 sol = scipy.sparse.linalg.gmres( L, ddx.ravel(), tol=ttol ) # , maxiter=1, restart=smaxit) lam0 = 0.01 nn = L.counter - n0 tot = sol[0].reshape(ddx.shape) else: # compute inversion tot, nn, lam0 = invert_jac( res, dres, jres, fut_S, ddr_filt, tol=tol, maxit=smaxit, verbose=(verbose == "full"), ) # lam, lam_max, lambdas = radius_jac(res,dres,jres,fut_S,tol=tol,maxit=1000,verbose=(verbose=='full'),filt=ddr_filt) # backsteps t3 = time.time() for i_bckstps, lam in enumerate(steps): new_x = x - tot * lam new_err = euler_residuals( f, g, s, new_x, ddr, dprocess, parms, diff=False, set_dr=True ) if complementarities: new_err = smooth_nodiff(new_err, new_x - lb) new_err = smooth_nodiff(-new_err, ub - new_x) new_err = abs(new_err).max() if new_err < err_0: break err_2 = abs(tot).max() t4 = time.time() itprint.print_iteration( N=it, f_x=err_0, d_x=err_2, Time_residuals=t2 - t1, Time_inversion=t3 - t2, Time_search=t4 - t3, Lambda_0=lam0, N_invert=nn, N_search=i_bckstps, ) if err_0 < tol: break x = new_x ddr.set_values(x) itprint.print_finished() # if compute_radius: # return ddx,L # lam, lam_max, lambdas = radius_jac(res,dres,jres,fut_S,ddr_filt,tol=tol,maxit=smaxit,verbose=(verbose=='full')) # return ddr, lam, lam_max, lambdas # else: if not details: return ddr else: ddx = solve_gu(dres.copy(), res.copy()) L = Operator(jres, fut_S, ddr_filt) if compute_radius: lam = scipy.sparse.linalg.eigs(L, k=1, return_eigenvectors=False) lam = abs(lam[0]) else: lam = np.nan # lam, lam_max, lambdas = radius_jac(res,dres,jres,fut_S,ddr_filt,tol=tol,maxit=smaxit,verbose=(verbose=='full')) return ImprovedTimeIterationResult( ddr, it, err_0, err_2, err_0 < tol, complementarities, lam, None, L )