Python MarkovDecisionRule Exemples, dolo.numeric.decision_rules_markov.MarkovDecisionRule Python Exemples

Exemple #1

0

Afficher le fichier

def evaluate_policy(model,
                    mdr,
                    tol=1e-8,
                    maxit=2000,
                    verbose=False,
                    hook=None,
                    integration_orders=None):

    assert (model.model_type == 'mfga')

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]  # number of markov states
    n_mv = P.shape[1]  # number of markov variables

    x0 = model.calibration['controls']
    v0 = model.calibration['controls']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_v = len(v0)
    n_s = len(model.symbols['states'])

    approx = model.options['approximation_space']
    a = approx['a']
    b = approx['b']
    orders = approx['orders']

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule
    mdrv = MarkovDecisionRule(n_ms, a, b, orders)  # values

    grid = mdr.grid
    N = grid.shape[0]

    controls = numpy.zeros((n_ms, N, n_x))
    for i_m in range(n_ms):
        controls[i_m, :, :] = v0[None, :]

    values_0 = numpy.zeros((n_ms, N, n_v))
    for i_m in range(n_ms):
        values_0[i_m, :, :] = v0[None, :]

    ff = model.functions['arbitrage']
    gg = model.functions['transition']
    aa = model.functions['auxiliary']
    vaval = model.functions['value']

    f = lambda m, s, x, M, S, X, p: ff(m, s, x, aa(m, s, x, p), M, S, X,
                                       aa(M, S, X, p), p)
    g = lambda m, s, x, M, p: gg(m, s, x, aa(m, s, x, p), M, p)
    val = lambda m, s, x, v, M, S, X, V, p: vaval(m, s, x, aa(
        m, s, x, p), v, M, S, X, aa(M, S, X, p), V, p)

    sh_v = values_0.shape

    err = 10
    tol = 1e-8
    inner_maxit = 50
    it = 0

    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format(
            'N', ' Error', 'Gain', 'Time', 'nit')
        stars = '-' * len(headline)
        print(stars)
        print(headline)
        print(stars)

    import time
    t1 = time.time()

    err_0 = numpy.nan

    verbit = (verbose == 'full')

    while err > tol and it < maxit:

        it += 1

        t_start = time.time()

        mdrv.set_values(values_0.reshape(sh_v))

        values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P,
                              Q, parms).reshape((-1, n_x))

        err = abs(values - values_0).max()

        err_SA = err / err_0
        err_0 = err

        values_0 = values

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format(
                it, err, err_SA, elapsed, nit))

    # values_0 = values.reshape(sh_v)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2 - t1))
        print(stars)

    return mdrv

Exemple #2

0

Afficher le fichier

Fichier : value_iteration.py Projet : spencerlyon2/dolo

def evaluate_policy(model, mdr, tol=1e-8,  maxit=2000, verbose=False, hook=None, integration_orders=None):

    assert(model.model_type == 'mfga')

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]   # number of markov states
    n_mv = P.shape[1] # number of markov variables

    x0 = model.calibration['controls']
    v0 = model.calibration['controls']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_v = len(v0)
    n_s = len(model.symbols['states'])

    approx = model.options['approximation_space']
    a = approx['a']
    b = approx['b']
    orders = approx['orders']

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule
    mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values

    grid = mdr.grid
    N = grid.shape[0]

    controls = numpy.zeros((n_ms, N, n_x))
    for i_m in range(n_ms):
        controls[i_m,:,:] = v0[None,:]

    values_0 = numpy.zeros((n_ms, N, n_v))
    for i_m in range(n_ms):
        values_0[i_m,:,:] = v0[None,:]


    ff = model.functions['arbitrage']
    gg = model.functions['transition']
    aa = model.functions['auxiliary']
    vaval = model.functions['value']


    f = lambda m,s,x,M,S,X,p: ff(m,s,x,aa(m,s,x,p),M,S,X,aa(M,S,X,p),p)
    g = lambda m,s,x,M,p: gg(m,s,x,aa(m,s,x,p),M,p)
    val = lambda m,s,x,v,M,S,X,V,p: vaval(m,s,x,aa(m,s,x,p),v,M,S,X,aa(M,S,X,p),V,p)


    sh_v = values_0.shape

    err = 10
    tol = 1e-8
    inner_maxit = 50
    it = 0


    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N',' Error', 'Gain','Time',  'nit' )
        stars = '-'*len(headline)
        print(stars)
        print(headline)
        print(stars)

    import time
    t1 = time.time()

    err_0 = numpy.nan

    verbit = (verbose == 'full')

    while err>tol and it<maxit:

        it += 1

        t_start = time.time()

        mdrv.set_values(values_0.reshape(sh_v))

        values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P, Q, parms).reshape((-1,n_x))

        err = abs(values-values_0).max()

        err_SA = err/err_0
        err_0 = err

        values_0 = values

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit  ))

    # values_0 = values.reshape(sh_v)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2-t1))
        print(stars)


    return mdrv

Exemple #3

0

Afficher le fichier

Fichier : time_iteration.py Projet : spencerlyon2/dolo

def solve_mfg_model(model, maxit=1000, initial_guess=None, with_complementarities=True, verbose=True, orders=None, output_type='dr'):

    assert(model.model_type == 'mfga')

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]   # number of markov states
    n_mv = P.shape[1] # number of markov variables

    x0 = model.calibration['controls']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_s = len(model.symbols['states'])

    approx = model.options['approximation_space']
    a = approx['a']
    b = approx['b']
    if orders is None:
        orders = approx['orders']

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule

    mdr = MarkovDecisionRule(n_ms, a, b, orders)

    grid = mdr.grid
    N = grid.shape[0]



#    if isinstance(initial_guess, numpy.ndarray):
#        print("Using initial guess (1)")
#        controls = initial_guess
#    elif isinstance(initial_guess, dict):
#        print("Using initial guess (2)")
#        controls_0 = initial_guess['controls']
#        ap_space = initial_guess['approximation_space']
#        if False in (approx['orders']==orders):
#            print("Interpolating initial guess")
#            old_dr = MarkovDecisionRule(controls_0.shape[0], ap_space['smin'], ap_space['smax'], ap_space['orders'])
#            old_dr.set_values(controls_0)
#            controls_0 = numpy.zeros( (n_ms, N, n_x) )
#            for i in range(n_ms):
#                e = old_dr(i,grid)
#                controls_0[i,:,:] = e
#    else:
#        controls_0 = numpy.zeros((n_ms, N, n_x))



    controls_0 = numpy.zeros((n_ms, N, n_x))

    if initial_guess is None:
        controls_0[:,:,:] = x0[None,None,:]
    else:
        for i_m in range(n_ms):
            m = P[i_m,:][None,:]
            controls_0[i_m,:,:] = initial_guess(i_m, grid)

    ff = model.functions['arbitrage']
    gg = model.functions['transition']
    aa = model.functions['auxiliary']

    if 'arbitrage_lb' in model.functions and with_complementarities==True:
        lb_fun = model.functions['arbitrage_lb']
        ub_fun = model.functions['arbitrage_ub']
        lb = numpy.zeros_like(controls_0)*numpy.nan
        ub = numpy.zeros_like(controls_0)*numpy.nan
        for i_m in range(n_ms):
            m = P[i_m,:][None,:]
            p = parms[None,:]
            m = numpy.repeat(m, N, axis=0)
            p = numpy.repeat(p, N, axis=0)

            lb[i_m,:,:] = lb_fun(m, grid, p)
            ub[i_m,:,:] = ub_fun(m, grid, p)

    else:
        with_complementarities = False


    f = lambda m,s,x,M,S,X,p: ff(m,s,x,aa(m,s,x,p),M,S,X,aa(M,S,X,p),p)
    g = lambda m,s,x,M,p: gg(m,s,x,aa(m,s,x,p),M,p)

    # mdr.set_values(controls)

    sh_c = controls_0.shape

    controls_0 = controls_0.reshape( (-1,n_x) )


    from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction
    from dolo.numeric.optimize.ncpsolve import ncpsolve

    err = 10
    tol = 1e-8
    inner_maxit = 50
    it = 0


    if with_complementarities:
        print("Solving WITH complementarities.")
        lb = lb.reshape((-1,n_x))
        ub = ub.reshape((-1,n_x))


    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format( 'N',' Error', 'Gain','Time',  'nit' )
        stars = '-'*len(headline)
        print(stars)
        print(headline)
        print(stars)

    import time
    t1 = time.time()

    err_0 = numpy.nan

    verbit = (verbose == 'full')

    while err>tol and it<maxit:

        it += 1

        t_start = time.time()

        mdr.set_values(controls_0.reshape(sh_c))

        fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms).reshape((-1,n_x))
        dfn = SerialDifferentiableFunction(fn)

        if with_complementarities:
            [controls,nit] = ncpsolve(dfn, lb, ub, controls_0, verbose=verbit, maxit=inner_maxit)
        else:
            [controls, nit] = newton(dfn, controls_0, verbose=verbit, maxit=inner_maxit)

        err = abs(controls-controls_0).max()

        err_SA = err/err_0
        err_0 = err

        controls_0 = controls

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format( it, err, err_SA, elapsed, nit  ))

    controls_0 = controls.reshape(sh_c)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2-t1))
        print(stars)


    if output_type == 'dr':
        return mdr
    elif output_type == 'controls':
        return controls_0
    else:
        raise Exception("Unsupported ouput type {}.".format(output_type))

Exemple #4

0

Afficher le fichier

def time_iteration(model,
                   initial_guess=None,
                   with_complementarities=True,
                   verbose=True,
                   orders=None,
                   output_type='dr',
                   maxit=1000,
                   inner_maxit=10,
                   tol=1e-6,
                   hook=None):

    assert (model.model_type == 'dtmscc')

    def vprint(t):
        if verbose:
            print(t)

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]  # number of markov states
    n_mv = P.shape[1]  # number of markov variables

    x0 = model.calibration['controls']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_s = len(model.symbols['states'])

    approx = model.options['approximation_space']
    a = approx['a']
    b = approx['b']
    if orders is None:
        orders = approx['orders']

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule

    mdr = MarkovDecisionRule(n_ms, a, b, orders)

    grid = mdr.grid
    N = grid.shape[0]

    #    if isinstance(initial_guess, numpy.ndarray):
    #        print("Using initial guess (1)")
    #        controls = initial_guess
    #    elif isinstance(initial_guess, dict):
    #        print("Using initial guess (2)")
    #        controls_0 = initial_guess['controls']
    #        ap_space = initial_guess['approximation_space']
    #        if False in (approx['orders']==orders):
    #            print("Interpolating initial guess")
    #            old_dr = MarkovDecisionRule(controls_0.shape[0], ap_space['smin'], ap_space['smax'], ap_space['orders'])
    #            old_dr.set_values(controls_0)
    #            controls_0 = numpy.zeros( (n_ms, N, n_x) )
    #            for i in range(n_ms):
    #                e = old_dr(i,grid)
    #                controls_0[i,:,:] = e
    #    else:
    #        controls_0 = numpy.zeros((n_ms, N, n_x))

    controls_0 = numpy.zeros((n_ms, N, n_x))

    if initial_guess is None:
        controls_0[:, :, :] = x0[None, None, :]
    else:
        for i_m in range(n_ms):
            m = P[i_m, :][None, :]
            controls_0[i_m, :, :] = initial_guess(i_m, grid)

    f = model.functions['arbitrage']
    g = model.functions['transition']

    if 'controls_lb' in model.functions and with_complementarities == True:
        lb_fun = model.functions['controls_lb']
        ub_fun = model.functions['controls_ub']
        lb = numpy.zeros_like(controls_0) * numpy.nan
        ub = numpy.zeros_like(controls_0) * numpy.nan
        for i_m in range(n_ms):
            m = P[i_m, :][None, :]
            p = parms[None, :]
            m = numpy.repeat(m, N, axis=0)
            p = numpy.repeat(p, N, axis=0)

            lb[i_m, :, :] = lb_fun(m, grid, p)
            ub[i_m, :, :] = ub_fun(m, grid, p)

    else:
        with_complementarities = False

    # mdr.set_values(controls)

    sh_c = controls_0.shape

    controls_0 = controls_0.reshape((-1, n_x))

    from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction
    from dolo.numeric.optimize.ncpsolve import ncpsolve

    err = 10
    it = 0

    if with_complementarities:
        vprint("Solving WITH complementarities.")
        lb = lb.reshape((-1, n_x))
        ub = ub.reshape((-1, n_x))

    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format(
            'N', ' Error', 'Gain', 'Time', 'nit')
        stars = '-' * len(headline)
        print(stars)
        print(headline)
        print(stars)

    import time
    t1 = time.time()

    err_0 = numpy.nan

    verbit = (verbose == 'full')

    while err > tol and it < maxit:

        it += 1

        t_start = time.time()

        mdr.set_values(controls_0.reshape(sh_c))

        fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms
                                 ).reshape((-1, n_x))
        dfn = SerialDifferentiableFunction(fn)

        if hook:
            hook()

        if with_complementarities:
            [controls, nit] = ncpsolve(dfn,
                                       lb,
                                       ub,
                                       controls_0,
                                       verbose=verbit,
                                       maxit=inner_maxit)
        else:
            [controls, nit] = newton(dfn,
                                     controls_0,
                                     verbose=verbit,
                                     maxit=inner_maxit)

        err = abs(controls - controls_0).max()

        err_SA = err / err_0
        err_0 = err

        controls_0 = controls

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format(
                it, err, err_SA, elapsed, nit))

    controls_0 = controls.reshape(sh_c)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2 - t1))
        print(stars)

    if output_type == 'dr':
        return mdr
    elif output_type == 'controls':
        return controls_0
    else:
        raise Exception("Unsupported ouput type {}.".format(output_type))

Exemple #5

0

Afficher le fichier

def time_iteration(model,
                   initial_guess=None,
                   with_complementarities=True,
                   verbose=True,
                   grid={},
                   output_type='dr',
                   maxit=1000,
                   inner_maxit=10,
                   tol=1e-6,
                   hook=None):
    '''
    Finds a global solution for ``model`` using backward time-iteration.

    This algorithm iterates on the residuals of the arbitrage equations

    Parameters
    ----------
    model : NumericModel
        "dtmscc" model to be solved
    verbose : boolean
        if True, display iterations
    initial_dr : decision rule
        initial guess for the decision rule
    with_complementarities : boolean (True)
        if False, complementarity conditions are ignored
    grid: grid options
    maxit: maximum number of iterations
    inner_maxit: maximum number of iteration for inner solver
    tol: tolerance criterium for successive approximations

    Returns
    -------
    decision rule :
        approximated solution
    '''

    assert (model.model_type == 'dtmscc')

    def vprint(t):
        if verbose:
            print(t)

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]  # number of markov states
    n_mv = P.shape[1]  # number of markov variables

    x0 = model.calibration['controls']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_s = len(model.symbols['states'])

    approx = model.get_grid(**grid)
    a = approx.a
    b = approx.b
    orders = approx.orders
    interp_type = approx.interpolation  # unused

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule

    mdr = MarkovDecisionRule(n_ms, a, b, orders)

    grid = mdr.grid
    N = grid.shape[0]

    controls_0 = numpy.zeros((n_ms, N, n_x))

    if initial_guess is None:
        controls_0[:, :, :] = x0[None, None, :]
    else:
        for i_m in range(n_ms):
            m = P[i_m, :][None, :]
            controls_0[i_m, :, :] = initial_guess(i_m, grid)

    f = model.functions['arbitrage']
    g = model.functions['transition']

    if 'controls_lb' in model.functions and with_complementarities == True:
        lb_fun = model.functions['controls_lb']
        ub_fun = model.functions['controls_ub']
        lb = numpy.zeros_like(controls_0) * numpy.nan
        ub = numpy.zeros_like(controls_0) * numpy.nan
        for i_m in range(n_ms):
            m = P[i_m, :][None, :]
            p = parms[None, :]
            m = numpy.repeat(m, N, axis=0)
            p = numpy.repeat(p, N, axis=0)

            lb[i_m, :, :] = lb_fun(m, grid, p)
            ub[i_m, :, :] = ub_fun(m, grid, p)

    else:
        with_complementarities = False

    # mdr.set_values(controls)

    sh_c = controls_0.shape

    controls_0 = controls_0.reshape((-1, n_x))

    from dolo.numeric.optimize.newton import newton, SerialDifferentiableFunction
    from dolo.numeric.optimize.ncpsolve import ncpsolve

    err = 10
    it = 0

    if with_complementarities:
        vprint("Solving WITH complementarities.")
        lb = lb.reshape((-1, n_x))
        ub = ub.reshape((-1, n_x))

    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} | {4:3} |'.format(
            'N', ' Error', 'Gain', 'Time', 'nit')
        stars = '-' * len(headline)
        print(stars)
        print(headline)
        print(stars)

    import time
    t1 = time.time()

    err_0 = numpy.nan

    verbit = (verbose == 'full')

    while err > tol and it < maxit:

        it += 1

        t_start = time.time()

        mdr.set_values(controls_0.reshape(sh_c))

        fn = lambda x: residuals(f, g, grid, x.reshape(sh_c), mdr, P, Q, parms
                                 ).reshape((-1, n_x))
        dfn = SerialDifferentiableFunction(fn)

        if hook:
            hook()

        if with_complementarities:
            [controls, nit] = ncpsolve(dfn,
                                       lb,
                                       ub,
                                       controls_0,
                                       verbose=verbit,
                                       maxit=inner_maxit)
        else:
            [controls, nit] = newton(dfn,
                                     controls_0,
                                     verbose=verbit,
                                     maxit=inner_maxit)

        err = abs(controls - controls_0).max()

        err_SA = err / err_0
        err_0 = err

        controls_0 = controls

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} | {4:3} |'.format(
                it, err, err_SA, elapsed, nit))

    controls_0 = controls.reshape(sh_c)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2 - t1))
        print(stars)

    if output_type == 'dr':
        return mdr
    elif output_type == 'controls':
        return controls_0
    else:
        raise Exception("Unsupported ouput type {}.".format(output_type))

Exemple #6

0

Afficher le fichier

def solve_policy(model, grid={}, tol=1e-6, maxit=500,
                 maxit_howard=20, verbose=False):
    """
    Solve for the value function and associated Markov decision rule by iterating over
    the value function.

    Parameters:
    -----------
    model :
        "dtmscc" model. Must contain a 'felicity' function.
    grid :
        grid options
    dr :
        decision rule to evaluate

    Returns:
    --------
    mdr : Markov decision rule
        The solved decision rule/policy function
    mdrv: decision rule
        The solved value function
    """

    assert(model.model_type == 'dtmscc')

    transition = model.functions['transition']
    felicity = model.functions['felicity']
    controls_lb = model.functions['controls_lb']
    controls_ub = model.functions['controls_ub']

    parms = model.calibration['parameters']
    discount = model.calibration['beta']

    x0 = model.calibration['controls']
    m0 = model.calibration['markov_states']
    s0 = model.calibration['states']
    r0 = felicity(m0, s0, x0, parms)

    [P, Q] = model.markov_chain
    n_ms = P.shape[0]   # number of markov states

    approx = model.get_grid(**grid)
    a = approx.a
    b = approx.b
    orders = approx.orders

    MarkovDecisionRule
    mdrv = MarkovDecisionRule(n_ms, a, b, orders)  # values

    grid = mdrv.grid
    N = grid.shape[0]
    n_x = len(x0)

    controls_0 = np.zeros((n_ms, N, n_x))
    controls_0[:, :, :] = model.calibration['controls'][None, None, :]
    #
    values_0 = np.zeros((n_ms, N, 1))
    values_0[:, :, :] = r0/(1-discount)

    itprint = IterationsPrinter(('N', int), ('Error', float), ('Gain', float),
                                ('Time', float), verbose=verbose)
    itprint.print_header('Evaluating value of initial guess')

    # FIRST: value function iterations, 10 iterations to start
    it = 0
    err_v = 100
    err_v_0 = 0.0
    gain_v = 0.0
    err_x = 100
    err_x_0 = 100

    if verbose:
        print('-----')
        print('Starting value function iteration')
        print('-----')

    while it < 10 and err_v > tol:

        t_start = time.time()
        it += 1

        # update interpolation object with current values
        mdrv.set_values(values_0)

        values = values_0.copy()
        controls = controls_0.copy()

        for i_m in range(n_ms):
            for n in range(N):
                m = P[i_m, :]
                s = grid[n, :]
                x = controls[i_m, n, :]
                lb = controls_lb(m, s, parms)
                ub = controls_ub(m, s, parms)
                bnds = [e for e in zip(lb, ub)]

                def valfun(xx):
                    return -choice_value(transition, felicity, i_m, s, xx,
                                         mdrv, P, Q, parms, discount)[0]
                res = scipy.optimize.minimize(valfun, x, bounds=bnds)

                controls[i_m, n, :] = res.x
                values[i_m, n, 0] = -valfun(res.x)

        # compute error, update value and dr
        err_x = abs(controls - controls_0).max()
        err_v = abs(values - values_0).max()
        t_end = time.time()
        elapsed = t_end-t_start

        values_0 = values
        controls_0 = controls

        gain_x = err_x / err_x_0
        gain_v = err_v / err_v_0

        err_x_0 = err_x
        err_v_0 = err_v

        itprint.print_iteration(N=it, Error=err_v, Gain=gain_v,
                                Time=elapsed)

    # SECOND: Howard improvement step, 10-20 iterations
    it = 0
    err_v = 100
    err_v_0 = 0.0
    gain_v = 1.0

    if verbose:
        print('-----')
        print('Starting Howard improvement step')
        print('-----')

    while it < maxit_howard and err_v > tol:

        t_start = time.time()
        it += 1

        # update interpolation object with current values
        mdrv.set_values(values_0)
        values = values_0.copy()

        for i_m in range(n_ms):
            for n in range(N):
                m = P[i_m, :]
                s = grid[n, :]
                x = controls_0[i_m, n, :]
                values[i_m, n, 0] = choice_value(transition, felicity, i_m, s, x, mdrv, P, Q, parms, discount)

        # compute error, update value function
        err_v = abs(values - values_0).max()
        values_0 = values

        t_end = time.time()
        elapsed = t_end-t_start

        gain_v = err_v / err_v_0
        err_v_0 = err_v
        itprint.print_iteration(N=it, Error=err_v, Gain=gain_v, Time=elapsed)
        # vprint(fmt_str.format(it, err_v, gain_v, elapsed))

    # THIRD: value function iterations until convergence
    it = 0
    err_v = 100
    err_v_0 = 0.0
    gain_v = 0.0
    err_x = 100
    err_x_0 = 100

    if verbose:
        print('-----')
        print('Starting value function iteration')
        print('-----')

    while it < maxit and err_v > tol:

        t_start = time.time()
        it += 1

        # update interpolation object with current values
        mdrv.set_values(values_0)

        values = values_0.copy()
        controls = controls_0.copy()

        for i_m in range(n_ms):
            for n in range(N):
                m = P[i_m, :]
                s = grid[n, :]
                x = controls[i_m, n, :]
                lb = controls_lb(m, s, parms)
                ub = controls_ub(m, s, parms)
                bnds = [e for e in zip(lb, ub)]

                def valfun(xx):
                    return -choice_value(transition, felicity, i_m, s, xx, mdrv, P, Q, parms, discount)[0]
                res = scipy.optimize.minimize(valfun, x, bounds=bnds)

                controls[i_m, n, :] = res.x
                values[i_m, n, 0] = -valfun(res.x)

        # compute error, update value and dr
        err_x = abs(controls - controls_0).max()
        err_v = abs(values - values_0).max()
        t_end = time.time()
        elapsed = t_end-t_start

        values_0 = values
        controls_0 = controls

        gain_x = err_x / err_x_0
        gain_v = err_v / err_v_0

        err_x_0 = err_x
        err_v_0 = err_v

        itprint.print_iteration(N=it,
                                Error=err_v,
                                Gain=gain_v,
                                Time=elapsed)

    itprint.print_finished()


    itprint = IterationsPrinter(('N', int), ('Error_V', float), ('Gain_V', float),
                                ('Error_x', float), ('Gain_x', float), ('Time', float), verbose=verbose)
    itprint.print_header('Start value function iterations.')

    # if verbose:
    #     print('Finished iterating on value function only. Starting value with policy iteration.')

    # final value function and decision rule
    mdr = MarkovDecisionRule(n_ms, a, b, orders)  # values
    mdr.set_values(controls)
    mdrv.set_values(values_0)

    itprint.print_finished()

    return mdr, mdrv

Exemple #7

0

Afficher le fichier

def evaluate_policy(model, mdr, tol=1e-8,  maxit=2000, grid={}, verbose=True, initial_guess=None, hook=None, integration_orders=None):

    """Compute value function corresponding to policy ``dr``

    Parameters:
    -----------

    model:
        "dtcscc" model. Must contain a 'value' function.

    mdr:
        decision rule to evaluate

    Returns:
    --------

    decision rule:
        value function (a function of the space similar to a decision rule
        object)

    """

    assert(model.model_type == 'dtmscc')

    [P, Q] = model.markov_chain

    n_ms = P.shape[0]   # number of markov states
    n_mv = P.shape[1]   # number of markov variables

    x0 = model.calibration['controls']
    v0 = model.calibration['values']
    parms = model.calibration['parameters']
    n_x = len(x0)
    n_v = len(v0)
    n_s = len(model.symbols['states'])

    approx = model.get_grid(**grid)
    a = approx.a
    b = approx.b
    orders = approx.orders

    from dolo.numeric.decision_rules_markov import MarkovDecisionRule
    mdrv = MarkovDecisionRule(n_ms, a, b, orders) # values

    grid = mdrv.grid
    N = grid.shape[0]

    controls = np.zeros((n_ms, N, n_x))
    for i_m in range(n_ms):
        controls[i_m, :, :] = mdr(i_m, grid) #x0[None,:]

    values_0 = np.zeros((n_ms, N, n_v))
    if initial_guess is None:
        for i_m in range(n_ms):
            values_0[i_m, :, :] = v0[None, :]
    else:
        for i_m in range(n_ms):
            values_0[i_m, :, :] = initial_guess(i_m, grid)

    val = model.functions['value']
    g = model.functions['transition']

    sh_v = values_0.shape

    err = 10
    inner_maxit = 50
    it = 0

    if verbose:
        headline = '|{0:^4} | {1:10} | {2:8} | {3:8} |'.format( 'N',' Error', 'Gain','Time')
        stars = '-'*len(headline)
        print(stars)
        print(headline)
        print(stars)

    t1 = time.time()

    err_0 = np.nan

    verbit = (verbose == 'full')

    while err>tol and it<maxit:

        it += 1

        t_start = time.time()

        mdrv.set_values(values_0.reshape(sh_v))

        values = update_value(val, g, grid, controls, values_0, mdr, mdrv, P, Q, parms).reshape((-1,n_v))

        err = abs(values.reshape(sh_v)-values_0).max()

        err_SA = err/err_0
        err_0 = err

        values_0 = values.reshape(sh_v)

        t_finish = time.time()
        elapsed = t_finish - t_start

        if verbose:
            print('|{0:4} | {1:10.3e} | {2:8.3f} | {3:8.3f} |'.format( it, err, err_SA, elapsed  ))

    # values_0 = values.reshape(sh_v)

    t2 = time.time()

    if verbose:
        print(stars)
        print("Elapsed: {} seconds.".format(t2-t1))
        print(stars)

    return mdrv