def wrap_policy(a_net, wrapper): """Wraps actor network with desired randomization.""" if wrapper[0] == 'none': policy = policies.RandomSoftPolicy(a_net) elif wrapper[0] == 'eps': policy = policies.EpsilonGreedyRandomSoftPolicy(a_net, wrapper[1]) elif wrapper[0] == 'gaussian': policy = policies.GaussianRandomSoftPolicy(a_net, std=wrapper[1]) elif wrapper[0] == 'gaussianeps': policy = policies.GaussianEpsilonGreedySoftPolicy(a_net, std=wrapper[1], eps=wrapper[2]) return policy
def _build_online_policy(self): return policies.RandomSoftPolicy( a_network=self._agent_module.p_net, )