def evaluate_policy(model, mdr, tol=1e-8, maxit=2000, verbose=False, hook=None, integration_orders=None): assert (model.model_type == 'mfga') [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] v0 = model.calibration['controls'] parms = model.calibration['parameters'] n_x = len(x0) n_v = len(v0) n_s = len(model.symbols['states']) approx = model.options['approximation_space'] a = approx['a'] b = approx['b'] orders = approx['orders'] from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values grid = mdr.grid N = grid.shape[0] controls = numpy.zeros((n_ms, N, n_x)) for i_m in range(n_ms): controls[i_m, :, :] = v0[None, :] values_0 = numpy.zeros((n_ms, N, n_v)) for i_m in range(n_ms): values_0[i_m, :, :] = v0[None, :] ff = model.functions['arbitrage'] gg = model.functions['transition'] aa = model.functions['auxiliary'] vaval = model.functions['value'] f = lambda m, s, x, M, S, X, p: ff(m, s, x, aa(m, s, x, p), M, S, X, aa(M, S, X, p), p) g = lambda m, s, x, M, p: gg(m, s, x, aa(m, s, x, p), M, p) val = lambda m, s, x, v, M, S, X, V, p: vaval(m, s, x, aa( m, s, x, p), v, M, S, X, aa(M, S, X, p), V, p) sh_v = values_0.shape err = 10 tol = 1e-8 inner_maxit = 50 it = 0 if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N', ' Error', 'Gain', 'Time', 'nit') stars = '-' * len(headline) print(stars) print(headline) print(stars) import time t1 = time.time() err_0 = numpy.nan verbit = (verbose == 'full') while err > tol and it < maxit: it += 1 t_start = time.time() mdrv.set_values(values_0.reshape(sh_v)) values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P, Q, parms).reshape((-1, n_x)) err = abs(values - values_0).max() err_SA = err / err_0 err_0 = err values_0 = values t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit)) # values_0 = values.reshape(sh_v) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2 - t1)) print(stars) return mdrv
def evaluate_policy(model, mdr, tol=1e-8, maxit=2000, verbose=False, hook=None, integration_orders=None): assert(model.model_type == 'mfga') [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] v0 = model.calibration['controls'] parms = model.calibration['parameters'] n_x = len(x0) n_v = len(v0) n_s = len(model.symbols['states']) approx = model.options['approximation_space'] a = approx['a'] b = approx['b'] orders = approx['orders'] from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values grid = mdr.grid N = grid.shape[0] controls = numpy.zeros((n_ms, N, n_x)) for i_m in range(n_ms): controls[i_m,:,:] = v0[None,:] values_0 = numpy.zeros((n_ms, N, n_v)) for i_m in range(n_ms): values_0[i_m,:,:] = v0[None,:] ff = model.functions['arbitrage'] gg = model.functions['transition'] aa = model.functions['auxiliary'] vaval = model.functions['value'] f = lambda m,s,x,M,S,X,p: ff(m,s,x,aa(m,s,x,p),M,S,X,aa(M,S,X,p),p) g = lambda m,s,x,M,p: gg(m,s,x,aa(m,s,x,p),M,p) val = lambda m,s,x,v,M,S,X,V,p: vaval(m,s,x,aa(m,s,x,p),v,M,S,X,aa(M,S,X,p),V,p) sh_v = values_0.shape err = 10 tol = 1e-8 inner_maxit = 50 it = 0 if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N',' Error', 'Gain','Time', 'nit' ) stars = '-'*len(headline) print(stars) print(headline) print(stars) import time t1 = time.time() err_0 = numpy.nan verbit = (verbose == 'full') while err>tol and it<maxit: it += 1 t_start = time.time() mdrv.set_values(values_0.reshape(sh_v)) values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P, Q, parms).reshape((-1,n_x)) err = abs(values-values_0).max() err_SA = err/err_0 err_0 = err values_0 = values t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit )) # values_0 = values.reshape(sh_v) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2-t1)) print(stars) return mdrv
def solve_mfg_model(model, maxit=1000, initial_guess=None, with_complementarities=True, verbose=True, orders=None, output_type='dr'): assert(model.model_type == 'mfga') [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] parms = model.calibration['parameters'] n_x = len(x0) n_s = len(model.symbols['states']) approx = model.options['approximation_space'] a = approx['a'] b = approx['b'] if orders is None: orders = approx['orders'] from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdr = MarkovDecisionRule(n_ms, a, b, orders) grid = mdr.grid N = grid.shape[0] # if isinstance(initial_guess, numpy.ndarray): # print("Using initial guess (1)") # controls = initial_guess # elif isinstance(initial_guess, dict): # print("Using initial guess (2)") # controls_0 = initial_guess['controls'] # ap_space = initial_guess['approximation_space'] # if False in (approx['orders']==orders): # print("Interpolating initial guess") # old_dr = MarkovDecisionRule(controls_0.shape[0], ap_space['smin'], ap_space['smax'], ap_space['orders']) # old_dr.set_values(controls_0) # controls_0 = numpy.zeros( (n_ms, N, n_x) ) # for i in range(n_ms): # e = old_dr(i,grid) # controls_0[i,:,:] = e # else: # controls_0 = numpy.zeros((n_ms, N, n_x)) controls_0 = numpy.zeros((n_ms, N, n_x)) if initial_guess is None: controls_0[:,:,:] = x0[None,None,:] else: for i_m in range(n_ms): m = P[i_m,:][None,:] controls_0[i_m,:,:] = initial_guess(i_m, grid) ff = model.functions['arbitrage'] gg = model.functions['transition'] aa = model.functions['auxiliary'] if 'arbitrage_lb' in model.functions and with_complementarities==True: lb_fun = model.functions['arbitrage_lb'] ub_fun = model.functions['arbitrage_ub'] lb = numpy.zeros_like(controls_0)*numpy.nan ub = numpy.zeros_like(controls_0)*numpy.nan for i_m in range(n_ms): m = P[i_m,:][None,:] p = parms[None,:] m = numpy.repeat(m, N, axis=0) p = numpy.repeat(p, N, axis=0) lb[i_m,:,:] = lb_fun(m, grid, p) ub[i_m,:,:] = ub_fun(m, grid, p) else: with_complementarities = False f = lambda m,s,x,M,S,X,p: ff(m,s,x,aa(m,s,x,p),M,S,X,aa(M,S,X,p),p) g = lambda m,s,x,M,p: gg(m,s,x,aa(m,s,x,p),M,p) # mdr.set_values(controls) sh_c = controls_0.shape controls_0 = controls_0.reshape( (-1,n_x) ) from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction from dolo.numeric.optimize.ncpsolve import ncpsolve err = 10 tol = 1e-8 inner_maxit = 50 it = 0 if with_complementarities: print("Solving WITH complementarities.") lb = lb.reshape((-1,n_x)) ub = ub.reshape((-1,n_x)) if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N',' Error', 'Gain','Time', 'nit' ) stars = '-'*len(headline) print(stars) print(headline) print(stars) import time t1 = time.time() err_0 = numpy.nan verbit = (verbose == 'full') while err>tol and it<maxit: it += 1 t_start = time.time() mdr.set_values(controls_0.reshape(sh_c)) fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms).reshape((-1,n_x)) dfn = SerialDifferentiableFunction(fn) if with_complementarities: [controls,nit] = ncpsolve(dfn, lb, ub, controls_0, verbose=verbit, maxit=inner_maxit) else: [controls, nit] = newton(dfn, controls_0, verbose=verbit, maxit=inner_maxit) err = abs(controls-controls_0).max() err_SA = err/err_0 err_0 = err controls_0 = controls t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit )) controls_0 = controls.reshape(sh_c) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2-t1)) print(stars) if output_type == 'dr': return mdr elif output_type == 'controls': return controls_0 else: raise Exception("Unsupported ouput type {}.".format(output_type))
def time_iteration(model, initial_guess=None, with_complementarities=True, verbose=True, orders=None, output_type='dr', maxit=1000, inner_maxit=10, tol=1e-6, hook=None): assert (model.model_type == 'dtmscc') def vprint(t): if verbose: print(t) [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] parms = model.calibration['parameters'] n_x = len(x0) n_s = len(model.symbols['states']) approx = model.options['approximation_space'] a = approx['a'] b = approx['b'] if orders is None: orders = approx['orders'] from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdr = MarkovDecisionRule(n_ms, a, b, orders) grid = mdr.grid N = grid.shape[0] # if isinstance(initial_guess, numpy.ndarray): # print("Using initial guess (1)") # controls = initial_guess # elif isinstance(initial_guess, dict): # print("Using initial guess (2)") # controls_0 = initial_guess['controls'] # ap_space = initial_guess['approximation_space'] # if False in (approx['orders']==orders): # print("Interpolating initial guess") # old_dr = MarkovDecisionRule(controls_0.shape[0], ap_space['smin'], ap_space['smax'], ap_space['orders']) # old_dr.set_values(controls_0) # controls_0 = numpy.zeros( (n_ms, N, n_x) ) # for i in range(n_ms): # e = old_dr(i,grid) # controls_0[i,:,:] = e # else: # controls_0 = numpy.zeros((n_ms, N, n_x)) controls_0 = numpy.zeros((n_ms, N, n_x)) if initial_guess is None: controls_0[:, :, :] = x0[None, None, :] else: for i_m in range(n_ms): m = P[i_m, :][None, :] controls_0[i_m, :, :] = initial_guess(i_m, grid) f = model.functions['arbitrage'] g = model.functions['transition'] if 'controls_lb' in model.functions and with_complementarities == True: lb_fun = model.functions['controls_lb'] ub_fun = model.functions['controls_ub'] lb = numpy.zeros_like(controls_0) * numpy.nan ub = numpy.zeros_like(controls_0) * numpy.nan for i_m in range(n_ms): m = P[i_m, :][None, :] p = parms[None, :] m = numpy.repeat(m, N, axis=0) p = numpy.repeat(p, N, axis=0) lb[i_m, :, :] = lb_fun(m, grid, p) ub[i_m, :, :] = ub_fun(m, grid, p) else: with_complementarities = False # mdr.set_values(controls) sh_c = controls_0.shape controls_0 = controls_0.reshape((-1, n_x)) from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction from dolo.numeric.optimize.ncpsolve import ncpsolve err = 10 it = 0 if with_complementarities: vprint("Solving WITH complementarities.") lb = lb.reshape((-1, n_x)) ub = ub.reshape((-1, n_x)) if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N', ' Error', 'Gain', 'Time', 'nit') stars = '-' * len(headline) print(stars) print(headline) print(stars) import time t1 = time.time() err_0 = numpy.nan verbit = (verbose == 'full') while err > tol and it < maxit: it += 1 t_start = time.time() mdr.set_values(controls_0.reshape(sh_c)) fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms ).reshape((-1, n_x)) dfn = SerialDifferentiableFunction(fn) if hook: hook() if with_complementarities: [controls, nit] = ncpsolve(dfn, lb, ub, controls_0, verbose=verbit, maxit=inner_maxit) else: [controls, nit] = newton(dfn, controls_0, verbose=verbit, maxit=inner_maxit) err = abs(controls - controls_0).max() err_SA = err / err_0 err_0 = err controls_0 = controls t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit)) controls_0 = controls.reshape(sh_c) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2 - t1)) print(stars) if output_type == 'dr': return mdr elif output_type == 'controls': return controls_0 else: raise Exception("Unsupported ouput type {}.".format(output_type))
def time_iteration(model, initial_guess=None, with_complementarities=True, verbose=True, grid={}, output_type='dr', maxit=1000, inner_maxit=10, tol=1e-6, hook=None): ''' Finds a global solution for ``model`` using backward time-iteration. This algorithm iterates on the residuals of the arbitrage equations Parameters ---------- model : NumericModel "dtmscc" model to be solved verbose : boolean if True, display iterations initial_dr : decision rule initial guess for the decision rule with_complementarities : boolean (True) if False, complementarity conditions are ignored grid: grid options maxit: maximum number of iterations inner_maxit: maximum number of iteration for inner solver tol: tolerance criterium for successive approximations Returns ------- decision rule : approximated solution ''' assert (model.model_type == 'dtmscc') def vprint(t): if verbose: print(t) [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] parms = model.calibration['parameters'] n_x = len(x0) n_s = len(model.symbols['states']) approx = model.get_grid(**grid) a = approx.a b = approx.b orders = approx.orders interp_type = approx.interpolation # unused from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdr = MarkovDecisionRule(n_ms, a, b, orders) grid = mdr.grid N = grid.shape[0] controls_0 = numpy.zeros((n_ms, N, n_x)) if initial_guess is None: controls_0[:, :, :] = x0[None, None, :] else: for i_m in range(n_ms): m = P[i_m, :][None, :] controls_0[i_m, :, :] = initial_guess(i_m, grid) f = model.functions['arbitrage'] g = model.functions['transition'] if 'controls_lb' in model.functions and with_complementarities == True: lb_fun = model.functions['controls_lb'] ub_fun = model.functions['controls_ub'] lb = numpy.zeros_like(controls_0) * numpy.nan ub = numpy.zeros_like(controls_0) * numpy.nan for i_m in range(n_ms): m = P[i_m, :][None, :] p = parms[None, :] m = numpy.repeat(m, N, axis=0) p = numpy.repeat(p, N, axis=0) lb[i_m, :, :] = lb_fun(m, grid, p) ub[i_m, :, :] = ub_fun(m, grid, p) else: with_complementarities = False # mdr.set_values(controls) sh_c = controls_0.shape controls_0 = controls_0.reshape((-1, n_x)) from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction from dolo.numeric.optimize.ncpsolve import ncpsolve err = 10 it = 0 if with_complementarities: vprint("Solving WITH complementarities.") lb = lb.reshape((-1, n_x)) ub = ub.reshape((-1, n_x)) if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N', ' Error', 'Gain', 'Time', 'nit') stars = '-' * len(headline) print(stars) print(headline) print(stars) import time t1 = time.time() err_0 = numpy.nan verbit = (verbose == 'full') while err > tol and it < maxit: it += 1 t_start = time.time() mdr.set_values(controls_0.reshape(sh_c)) fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms ).reshape((-1, n_x)) dfn = SerialDifferentiableFunction(fn) if hook: hook() if with_complementarities: [controls, nit] = ncpsolve(dfn, lb, ub, controls_0, verbose=verbit, maxit=inner_maxit) else: [controls, nit] = newton(dfn, controls_0, verbose=verbit, maxit=inner_maxit) err = abs(controls - controls_0).max() err_SA = err / err_0 err_0 = err controls_0 = controls t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit)) controls_0 = controls.reshape(sh_c) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2 - t1)) print(stars) if output_type == 'dr': return mdr elif output_type == 'controls': return controls_0 else: raise Exception("Unsupported ouput type {}.".format(output_type))
def solve_policy(model, grid={}, tol=1e-6, maxit=500, maxit_howard=20, verbose=False): """ Solve for the value function and associated Markov decision rule by iterating over the value function. Parameters: ----------- model : "dtmscc" model. Must contain a 'felicity' function. grid : grid options dr : decision rule to evaluate Returns: -------- mdr : Markov decision rule The solved decision rule/policy function mdrv: decision rule The solved value function """ assert(model.model_type == 'dtmscc') transition = model.functions['transition'] felicity = model.functions['felicity'] controls_lb = model.functions['controls_lb'] controls_ub = model.functions['controls_ub'] parms = model.calibration['parameters'] discount = model.calibration['beta'] x0 = model.calibration['controls'] m0 = model.calibration['markov_states'] s0 = model.calibration['states'] r0 = felicity(m0, s0, x0, parms) [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states approx = model.get_grid(**grid) a = approx.a b = approx.b orders = approx.orders MarkovDecisionRule mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values grid = mdrv.grid N = grid.shape[0] n_x = len(x0) controls_0 = np.zeros((n_ms, N, n_x)) controls_0[:, :, :] = model.calibration['controls'][None, None, :] # values_0 = np.zeros((n_ms, N, 1)) values_0[:, :, :] = r0/(1-discount) itprint = IterationsPrinter(('N', int), ('Error', float), ('Gain', float), ('Time', float), verbose=verbose) itprint.print_header('Evaluating value of initial guess') # FIRST: value function iterations, 10 iterations to start it = 0 err_v = 100 err_v_0 = 0.0 gain_v = 0.0 err_x = 100 err_x_0 = 100 if verbose: print('-----') print('Starting value function iteration') print('-----') while it < 10 and err_v > tol: t_start = time.time() it += 1 # update interpolation object with current values mdrv.set_values(values_0) values = values_0.copy() controls = controls_0.copy() for i_m in range(n_ms): for n in range(N): m = P[i_m, :] s = grid[n, :] x = controls[i_m, n, :] lb = controls_lb(m, s, parms) ub = controls_ub(m, s, parms) bnds = [e for e in zip(lb, ub)] def valfun(xx): return -choice_value(transition, felicity, i_m, s, xx, mdrv, P, Q, parms, discount)[0] res = scipy.optimize.minimize(valfun, x, bounds=bnds) controls[i_m, n, :] = res.x values[i_m, n, 0] = -valfun(res.x) # compute error, update value and dr err_x = abs(controls - controls_0).max() err_v = abs(values - values_0).max() t_end = time.time() elapsed = t_end-t_start values_0 = values controls_0 = controls gain_x = err_x / err_x_0 gain_v = err_v / err_v_0 err_x_0 = err_x err_v_0 = err_v itprint.print_iteration(N=it, Error=err_v, Gain=gain_v, Time=elapsed) # SECOND: Howard improvement step, 10-20 iterations it = 0 err_v = 100 err_v_0 = 0.0 gain_v = 1.0 if verbose: print('-----') print('Starting Howard improvement step') print('-----') while it < maxit_howard and err_v > tol: t_start = time.time() it += 1 # update interpolation object with current values mdrv.set_values(values_0) values = values_0.copy() for i_m in range(n_ms): for n in range(N): m = P[i_m, :] s = grid[n, :] x = controls_0[i_m, n, :] values[i_m, n, 0] = choice_value(transition, felicity, i_m, s, x, mdrv, P, Q, parms, discount) # compute error, update value function err_v = abs(values - values_0).max() values_0 = values t_end = time.time() elapsed = t_end-t_start gain_v = err_v / err_v_0 err_v_0 = err_v itprint.print_iteration(N=it, Error=err_v, Gain=gain_v, Time=elapsed) # vprint(fmt_str.format(it, err_v, gain_v, elapsed)) # THIRD: value function iterations until convergence it = 0 err_v = 100 err_v_0 = 0.0 gain_v = 0.0 err_x = 100 err_x_0 = 100 if verbose: print('-----') print('Starting value function iteration') print('-----') while it < maxit and err_v > tol: t_start = time.time() it += 1 # update interpolation object with current values mdrv.set_values(values_0) values = values_0.copy() controls = controls_0.copy() for i_m in range(n_ms): for n in range(N): m = P[i_m, :] s = grid[n, :] x = controls[i_m, n, :] lb = controls_lb(m, s, parms) ub = controls_ub(m, s, parms) bnds = [e for e in zip(lb, ub)] def valfun(xx): return -choice_value(transition, felicity, i_m, s, xx, mdrv, P, Q, parms, discount)[0] res = scipy.optimize.minimize(valfun, x, bounds=bnds) controls[i_m, n, :] = res.x values[i_m, n, 0] = -valfun(res.x) # compute error, update value and dr err_x = abs(controls - controls_0).max() err_v = abs(values - values_0).max() t_end = time.time() elapsed = t_end-t_start values_0 = values controls_0 = controls gain_x = err_x / err_x_0 gain_v = err_v / err_v_0 err_x_0 = err_x err_v_0 = err_v itprint.print_iteration(N=it, Error=err_v, Gain=gain_v, Time=elapsed) itprint.print_finished() itprint = IterationsPrinter(('N', int), ('Error_V', float), ('Gain_V', float), ('Error_x', float), ('Gain_x', float), ('Time', float), verbose=verbose) itprint.print_header('Start value function iterations.') # if verbose: # print('Finished iterating on value function only. Starting value with policy iteration.') # final value function and decision rule mdr = MarkovDecisionRule(n_ms, a, b, orders) # values mdr.set_values(controls) mdrv.set_values(values_0) itprint.print_finished() return mdr, mdrv
def evaluate_policy(model, mdr, tol=1e-8, maxit=2000, grid={}, verbose=True, initial_guess=None, hook=None, integration_orders=None): """Compute value function corresponding to policy ``dr`` Parameters: ----------- model: "dtcscc" model. Must contain a 'value' function. mdr: decision rule to evaluate Returns: -------- decision rule: value function (a function of the space similar to a decision rule object) """ assert(model.model_type == 'dtmscc') [P, Q] = model.markov_chain n_ms = P.shape[0] # number of markov states n_mv = P.shape[1] # number of markov variables x0 = model.calibration['controls'] v0 = model.calibration['values'] parms = model.calibration['parameters'] n_x = len(x0) n_v = len(v0) n_s = len(model.symbols['states']) approx = model.get_grid(**grid) a = approx.a b = approx.b orders = approx.orders from dolo.numeric.decision_rules_markov import MarkovDecisionRule mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values grid = mdrv.grid N = grid.shape[0] controls = np.zeros((n_ms, N, n_x)) for i_m in range(n_ms): controls[i_m, :, :] = mdr(i_m, grid) #x0[None,:] values_0 = np.zeros((n_ms, N, n_v)) if initial_guess is None: for i_m in range(n_ms): values_0[i_m, :, :] = v0[None, :] else: for i_m in range(n_ms): values_0[i_m, :, :] = initial_guess(i_m, grid) val = model.functions['value'] g = model.functions['transition'] sh_v = values_0.shape err = 10 inner_maxit = 50 it = 0 if verbose: headline = '|{0:^4} | {1:10} | {2:8} | {3:8} |'.format( 'N',' Error', 'Gain','Time') stars = '-'*len(headline) print(stars) print(headline) print(stars) t1 = time.time() err_0 = np.nan verbit = (verbose == 'full') while err>tol and it<maxit: it += 1 t_start = time.time() mdrv.set_values(values_0.reshape(sh_v)) values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P, Q, parms).reshape((-1,n_v)) err = abs(values.reshape(sh_v)-values_0).max() err_SA = err/err_0 err_0 = err values_0 = values.reshape(sh_v) t_finish = time.time() elapsed = t_finish - t_start if verbose: print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} |'.format( it, err, err_SA, elapsed )) # values_0 = values.reshape(sh_v) t2 = time.time() if verbose: print(stars) print("Elapsed: {} seconds.".format(t2-t1)) print(stars) return mdrv