class Single(immutable('x')): pass
class DerivedWithNew(immutable(['x', 'y'])): def __new__(cls, x, y): return super(DerivedWithNew, cls).__new__(cls, x, y)
class Empty(immutable(verbose=True)): pass
def make_default_env(): env = AttrDict() if isinstance(__builtins__, dict): env.update(__builtins__) else: env.update(__builtins__.__dict__) env['Symbol'] = Symbol env['Keyword'] = Keyword env.update(__builtins__.__dict__) if hasattr(__builtins__, '__dict__') else env.update(__builtins__) del env['exec'] # del env['globals'] # del env['locals'] env.update(functools.__dict__) env.update(itertools.__dict__) env.update(operator.__dict__) if GE_PYTHON_33: env['__spec__'] = sys.modules[__name__].__spec__ env[Iterable.__name__] = Iterable env[Sequence.__name__] = Sequence env[Mapping.__name__] = Mapping env['v'] = v env['pvector'] = pvector env['CheckedPVector'] = CheckedPVector env['m'] = m env['pmap'] = pmap env['s'] = s env['pset'] = pset env['l'] = l env['plist'] = plist env['b'] = b env['pbag'] = pbag env['dq'] = dq env['pdeque'] = pdeque env['thaw'] = thaw env['freeze'] = freeze env['immutable'] = immutable env['PVector'] = PVector env['PMap'] = PMap env['PSet'] = PSet env['PList'] = PList env['PBag'] = PBag if not IS_PYPY: env['union'] = union env['options'] = options env['optional'] = optional env['only'] = only env['predicate'] = predicate env[Number.__name__] = Number env['append'] = MutableSequence.append # env['clear'] = MutableSequence.clear # not supported (pypy) env['seq_count'] = MutableSequence.count env['extend'] = MutableSequence.extend env['insert'] = MutableSequence.insert env['pop'] = MutableSequence.pop env['remove'] = MutableSequence.remove env['reverse'] = MutableSequence.reverse env['mapping_get'] = MutableMapping.get env['items'] = MutableMapping.items env['values'] = MutableMapping.values env['keys'] = MutableMapping.keys env['mapping_pop'] = MutableMapping.pop env['popitem'] = MutableMapping.popitem env['setdefault'] = MutableMapping.setdefault env['update'] = MutableMapping.update env['values'] = MutableMapping.values env['doall'] = pvector env['nth'] = operator.getitem env['+'] = operator.add env['-'] = operator.sub env['/'] = operator.truediv env['*'] = operator.mul env['%'] = operator.mod env['**'] = operator.pow env['<<'] = operator.lshift env['>>'] = operator.rshift env['//'] = operator.floordiv env['=='] = operator.eq env['!='] = operator.ne env['>'] = operator.gt env['>='] = operator.ge env['<'] = operator.lt env['<='] = operator.le env['not'] = operator.not_ env['and'] = operator.and_ env['or'] = operator.or_ env['is'] = operator.is_ env['isnt'] = operator.is_not env['re'] = re env['True'] = True env['False'] = False env['None'] = None env['gensym'] = get_temp_name env['uniq'] = get_temp_name env['Record'] = immutable((), 'Record') env['spawn'] = actor.spawn env['spawn_with_mailbox'] = actor.spawn_with_mailbox env['send'] = actor.send env['recv'] = actor.recv env['ack_last_msg'] = actor.ack_last_msg env['ack'] = actor.ack env['link'] = actor.link env['unlink'] = actor.unlink env['kill'] = actor.kill env['cancel'] = actor.cancel env['self'] = actor.self env['sleep'] = actor.sleep env['wait_all'] = actor.wait_all env['wait'] = actor.wait try: env['__loader__'] = __loader__ except: pass env['__package__'] = __package__ env['__doc__'] = __doc__ if IS_PYPY: from _continuation import continulet env['continulet'] = continulet return env
class FrozenMember(immutable('x, y_')): pass
class Unmonitor(immutable('sender', name='_Unmonitor')): pass
def make_default_env(): env = AttrDict() if isinstance(__builtins__, dict): env.update(__builtins__) else: env.update(__builtins__.__dict__) env['Symbol'] = Symbol env['Keyword'] = Keyword env.update(__builtins__.__dict__) if hasattr(__builtins__, '__dict__') else env.update(__builtins__) del env['exec'] # del env['globals'] # del env['locals'] env.update(functools.__dict__) env.update(itertools.__dict__) env.update(operator.__dict__) env['pow'] = pow if GE_PYTHON_33: env['__spec__'] = sys.modules[__name__].__spec__ env[Iterable.__name__] = Iterable env[Sequence.__name__] = Sequence env[Mapping.__name__] = Mapping env['v'] = v env['pvector'] = pvector env['CheckedPVector'] = CheckedPVector env['m'] = m env['pmap'] = pmap env['s'] = s env['pset'] = pset env['l'] = l env['plist'] = plist env['b'] = b env['pbag'] = pbag env['dq'] = dq env['pdeque'] = pdeque env['thaw'] = thaw env['freeze'] = freeze env['immutable'] = immutable env['PVector'] = PVector env['PMap'] = PMap env['PSet'] = PSet env['PList'] = PList env['PBag'] = PBag if not IS_PYPY: env['union'] = union env['options'] = options env['optional'] = optional env['only'] = only env['predicate'] = predicate env[Number.__name__] = Number env['append'] = MutableSequence.append # env['clear'] = MutableSequence.clear # not supported (pypy) env['seq_count'] = MutableSequence.count env['extend'] = MutableSequence.extend env['insert'] = MutableSequence.insert env['pop'] = MutableSequence.pop env['remove'] = MutableSequence.remove env['reverse'] = MutableSequence.reverse env['mapping_get'] = MutableMapping.get env['items'] = MutableMapping.items env['values'] = MutableMapping.values env['keys'] = MutableMapping.keys env['mapping_pop'] = MutableMapping.pop env['popitem'] = MutableMapping.popitem env['setdefault'] = MutableMapping.setdefault env['update'] = MutableMapping.update env['values'] = MutableMapping.values env['doall'] = pvector env['nth'] = operator.getitem env['+'] = operator.add env['-'] = operator.sub env['/'] = operator.truediv env['*'] = operator.mul env['%'] = operator.mod env['**'] = operator.pow env['<<'] = operator.lshift env['>>'] = operator.rshift env['//'] = operator.floordiv env['=='] = operator.eq env['!='] = operator.ne env['>'] = operator.gt env['>='] = operator.ge env['<'] = operator.lt env['<='] = operator.le env['not'] = operator.not_ env['and'] = operator.and_ env['or'] = operator.or_ env['is'] = operator.is_ env['isnt'] = operator.is_not env['re'] = re env['True'] = True env['False'] = False env['None'] = None env['gensym'] = get_temp_name env['uniq'] = get_temp_name env['Record'] = immutable((), 'Record') env['py_eval'] = eval try: env['__loader__'] = __loader__ except: pass env['__package__'] = __package__ env['__doc__'] = __doc__ if IS_PYPY: from _continuation import continulet env['continulet'] = continulet return env
class Monitor(immutable('sender', name='_Monitor')): pass
class Down(immutable('sender, reason', name='_OkMessage')): pass
class ForkWithMonitor(immutable('sender, func, args, kwargs', name='_ForkWithMonitor')): pass
class ForkResponse(immutable('new_actor ', name='_ForkResponse')): pass
class Fork(immutable('sender, func, args, kwargs', name='_Fork')): pass
class Kill(immutable('sender', name='_Kill')): pass
class Cancel(immutable('sender', name='_Cancel')): pass
if not true_with_prob(e.epsi): return choose_greedy(e, o) else: epsi_a = e.act_space.sample() epsi_feat = e.feature_vec(o, epsi_a) return epsi_feat.dot(e.theta), epsi_a, epsi_feat #### Main # Note: I realize that this is getting unwieldy. At some point I should turn # this into a learner object that modifies itself, but returns stuff that is not # modified. Maybe. LinfaExperience = pyrsistent.immutable( 'feature_vec, theta, E, epsi, init_alpha, p_alpha, lmbda,' ' gamma, p_obs, p_act, p_feat, act_space,' ' is_use_alpha_bounds, map_obs, choose_action') # pylint: disable=too-many-arguments def init(lmbda, init_alpha, epsi, feature_vec, n_weights, act_space, theta=None, is_use_alpha_bounds=False, map_obs=lambda x: x, choose_action=choose_action_Sarsa, gamma=1.0): """ Arguments: feature_vec - function mapping (observation, action) pairs to FeatureVecs n_weights - number of weights == length of the feature vectors act_space - the OpenAI gym.spaces.discrete.Discrete action space of the problem """
def grouper(iterable, n, fillvalue=None): "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.izip_longest(fillvalue=fillvalue, *args) def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = itertools.tee(iterable) next(b, None) return izip(a, b) # Timestep that the driver sees DTimestep = pyrsistent.immutable( 'observation, reward, done, experience, action', name='DTimestep') # pylint: disable=too-many-arguments, too-many-locals def train(env, learner, experience, n_episodes, max_steps, is_render=False, is_continuing_env=False): steps_per_episode = np.zeros(n_episodes, dtype=np.int32) alpha_per_episode = np.empty(n_episodes) for n_episode in xrange(n_episodes): observation = env.reset() reward = 0 done = False t = 0