def __init__(self, config=default()): super().__init__(config) self.experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) logger.info(describe(self)) self.clear()
def __init__(self, config, input_state_size, output_action_size, fc1_units=400, fc2_units=300): super(DDPG_Networks, self).__init__(config, input_state_size, output_action_size) self.actor_local = LowDimActor(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.actor_target = LowDimActor(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.critic_local = LowDimCritic(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) self.critic_target = LowDimCritic(config, self.input_state_size, self.output_action_size, fc1_units, fc2_units) logger.warn('DDPG_Networks is initialized!') logger.info(utils.describe(self))
def __init__(self, config): super(GymEnv, self).__init__(config) self._env = gym.make(self.name) self._set_attr_from_u_env(self._env) self._env.seed(self.seed) logger.info(utils.describe(self))
def __init__(self, config): super(UnityEnv, self).__init__(config) self._env = UnityEnvironment(file_name=get_env_path(self.name), seed=self.seed) self.patch_gym_spaces(self._env) self._set_attr_from_u_env(self._env) # TODO: Logging print(utils.describe(self))
def patch_gym_spaces(self, env): r""" For standardization, use gym spaces to represent observation and action spaces for Unity. This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces :param env: :return: """ observation_spaces = [] action_spaces = [] for brain_index in range(len(env.brain_names)): brain = self._get_brain(env, brain_index) # TODO: Logging utils.describe(brain) observation_shape = (brain.get_observable_dim()['state'],) action_dim = (brain.get_action_dim(),) if brain.is_discrete(): dtype = np.int32 action_space = spaces.Discrete(brain.get_action_dim()) else: dtype = np.float32 action_space = spaces.Box(low=0.0, high=1.0, shape=action_dim, dtype=dtype) observation_space = spaces.Box(low=0, high=1, shape=observation_shape, dtype=dtype) utils.set_gym_space_attr(observation_space) utils.set_gym_space_attr(action_space) observation_spaces.append(observation_space) action_spaces.append(action_space) # set for singleton env.observation_space = observation_spaces[0] env.action_space = action_spaces[0] return observation_spaces, action_spaces
def __init__(self, env: BaseEnv, models: tuple, optims: tuple, noise: OUNoise, configs: Configs = default()): super(DDPGAgent, self).__init__(env, configs) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.curr_state = np.zeros((1,)) self.noise = noise self.total_step = 0 self.episode_step = 0 self.i_episode = 0 if configs.glob.load_from is not None and os.path.exists(configs.glob.load_from): self.load_params(configs.glob.load_from) self._initialize() logger.info(describe(self))