Python to_parameter Exemples, parametric.to_parameter Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : agents.py Projet : rldotai/rlbench

 def __init__(self, algo, pol, phi, update_params=dict()):
     self.algo = algo
     self.pol = pol
     if phi is None:
         self.phi = lambda x: x
     else:
         self.phi = phi
     # default parameter functions to use for updates
     self.param_funcs = {k: parametric.to_parameter(v)
                         for k, v in update_params.items()}

Exemple #2

0

Afficher le fichier

Fichier : agents.py Projet : rldotai/rlbench

    def __init__(self, algo, target, behavior, phi=None, update_params=dict()):
        self.algo = algo
        self.target = target
        self.behavior = behavior

        # set the feature function
        if phi is None:
            self.phi = lambda x: x
        else:
            self.phi = phi

        # default parameters to use for updating
        self.param_funcs = {k: parametric.to_parameter(v)
                            for k, v in update_params.items()}

Exemple #3

0

Afficher le fichier

Fichier : agents.py Projet : rldotai/rlbench

    def __init__(self, algo, behavior, phi=None, update_params=dict()):
        self.algo = algo
        self.behavior = behavior

        # set the feature function
        if phi is None:
            self.phi = lambda x: x
        else:
            self.phi = phi

        # default parameters to use for updating
        self.param_funcs = {k: parametric.to_parameter(v)
                            for k, v in update_params.items()}

        # in the on-policy setting, `rho` is always equal to one.
        self.rho = 1

Exemple #4

0

Afficher le fichier

Fichier : rlbench.py Projet : rldotai/rlbench

def stepwise_return(lst, gamma):
    """Compute the return at each step in a trajectory.

    Uses the fact that the return at each step 'backwards' from the end of the
    trajectory is the immediate reward plus the discounted return from the next
    state.
    """
    # convert gamma to a state-dependent parameter
    gamma = to_parameter(gamma)
    rewards = get_rewards(lst)
    gmlst = get_gammas(lst, gamma)
    n = len(lst)
    ret = []
    tmp = 0
    for r, gm in reversed(list(zip(rewards, gmlst))):
        tmp *= gm
        tmp += r
        ret.append(tmp)
    return list(reversed(ret))

Exemple #5

0

Afficher le fichier

Fichier : rlbench.py Projet : rldotai/rlbench

def run_policy_verbose(pol, env, max_steps, param_funcs=dict()):
    """Run a policy in an environment for a specified number of steps.

    Provide enough information to run the online algorithms offline by recording
    each step's entire context, potentially including the values of parameter
    functions at each point in time.
    """
    ret = []
    t = 0

    # convert parameter functions to `Parameter` type, if needed
    param_funcs = {k: to_parameter(v) for k, v in param_funcs.items()}

    # reset the environment and get initial state
    env.reset()
    s = env.state
    while not env.is_terminal() and t < max_steps:
        # record the context of the time step
        actions = env.actions
        a = pol.choose(s, actions)
        r, sp = env.do(a)

        # record the transition information
        ctx = {'s': s, 'a': a, 'r': r, 'sp': sp, 'actions': actions}

        # record values of parameters for the transition
        for name, func in param_funcs.items():
            ctx[name] = func(s, a, sp)

        # log the context of the transition
        ret.append(ctx)

        # prepare for next iteration
        s = sp
        t += 1
    return ret

Exemple #6

0

Afficher le fichier

Fichier : rlbench.py Projet : rldotai/rlbench

def stepwise_params(lst, param):
    param = to_parameter(param)
    return [param(s) for s in get_states(lst)]

Exemple #7

0

Afficher le fichier

Fichier : rlbench.py Projet : rldotai/rlbench

def get_gammas(lst, gamma):
    # convert gamma to state-dependent parameter
    gamma = to_parameter(gamma)
    return [gamma(s) for s in get_states(lst)]