class AnimalAIWrapper(gym.Env): def __init__( self, worker_id, env_path, config_path, reduced_actions=False, docker_training=False, ): super(AnimalAIWrapper, self).__init__() self.config = ArenaConfig(config_path) self.time_limit = self.config.arenas[0].t self.env = UnityEnvironment( file_name=env_path, worker_id=worker_id, seed=worker_id, n_arenas=1, arenas_configurations=self.config, docker_training=docker_training, ) lookup_func = lambda a: {"Learner": np.array([a], dtype=float)} if reduced_actions: lookup = itertools.product([0, 1], [0, 1, 2]) else: lookup = itertools.product([0, 1, 2], repeat=2) lookup = dict(enumerate(map(lookup_func, lookup))) self.action_map = lambda a: lookup[a] self.observation_space = gym.spaces.Box(0, 255, [84, 84, 3], dtype=np.uint8) self.action_space = gym.spaces.Discrete(len(lookup)) self.t = 0 def _process_state(self, state): img = 255 * state["Learner"].visual_observations[0][0] vec = state["Learner"].vector_observations[0] r = state["Learner"].rewards[0] done = state["Learner"].local_done[0] return np.uint8(img), vec, r, done def reset(self): self.t = 0 img, vec, r, done = self._process_state( self.env.reset(arenas_configurations=self.config)) while done: img, vec, r, done = self._process_state( self.env.reset(arenas_configurations=self.config)) return img def step(self, action): obs, vec, r, done = self._process_state( self.env.step(vector_action=self.action_map(action.item()))) self.t += 1 done = done or self.t >= self.time_limit return obs, r, done, {}
def _create_environment(config_filepath): worker_id = 0 while worker_id < 10: try: env = UnityEnvironment( file_name=ENVIRONMENT_FILEPATH, # Path to the environment worker_id= worker_id, # Unique ID for running the environment (used for connection) seed=int(os.getenv('ENV_SEED', 0)), # The random seed docker_training= False, # Whether or not you are training inside a docker n_arenas=1, # Number of arenas in your environment play=False, # Set to False for training inference=False, # Set to true to watch your agent in action resolution= None # Int: resolution of the agent's square camera (in [4,512], default 84) ) break except UnityWorkerInUseException: worker_id += 1 print('Increasing worker_id: %i' % worker_id) arena_config = ArenaConfig(config_filepath) env.reset(arenas_configurations=arena_config, train_mode=True) return env, arena_config
class AnimalAIEnv(gym.Env): """ Provides Gym wrapper for Unity Learning Environments. Multi-agent environments use lists for object types, as done here: https://github.com/openai/multiagent-particle-envs """ def __init__(self, environment_filename: str, worker_id=0, docker_training=False, n_arenas=1, seed=0, arenas_configurations=None, greyscale=False, retro=True, inference=False, resolution=None): """ Environment initialization :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym. :param worker_id: Worker number for environment. :param docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). :param n_arenas: number of arenas to create in the environment (one agent per arena) :param arenas_configurations: an ArenaConfig to configure the items present in each arena, will spawn random objects randomly if not provided :param greyscale: whether the visual observations should be grayscaled or not :param retro: Resize visual observation to 84x84 (int8) and flattens action space. """ self._env = UnityEnvironment(file_name=environment_filename, worker_id=worker_id, seed=seed, docker_training=docker_training, n_arenas=n_arenas, arenas_configurations=arenas_configurations, inference=inference, resolution=resolution) # self.name = self._env.academy_name self.vector_obs = None self.inference = inference self.resolution = resolution self._current_state = None self._n_agents = None self._flattener = None self._greyscale = greyscale or retro # self._seed = None self.retro = retro self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.arenas_configurations = arenas_configurations self.flatten_branched = self.retro self.uint8_visual = self.retro # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if brain.number_visual_observations == 0: raise UnityGymException("Environment provides no visual observations.") if brain.num_stacked_vector_observations != 1: raise UnityGymException("Environment provides no vector observations.") # Check for number of agents in scene. initial_info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name] self._check_agents(len(initial_info.agents)) if self.retro and self._n_agents > 1: raise UnityGymException("Only one agent is allowed in retro mode, set n_agents to 1.") # Set observation and action spaces if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete(brain.vector_action_space_size[0]) else: if self.flatten_branched: self._flattener = ActionFlattener(brain.vector_action_space_size) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size) # high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions # if self.visual_obs: if self._greyscale: depth = 1 else: depth = 3 if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth) ) self._observation_space = image_space else: image_space_max = 1.0 image_space_dtype = np.float32 camera_height = brain.camera_resolutions[0]["height"] camera_width = brain.camera_resolutions[0]["width"] max_float = np.finfo(np.float32).max image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(self._n_agents, camera_height, camera_width, depth) ) vector_space = spaces.Box(-max_float, max_float, shape=(self._n_agents, brain.vector_observation_space_size)) self._observation_space = spaces.Tuple((image_space, vector_space)) def reset(self, arenas_configurations=None): """Resets the state of the environment and returns an initial observation. In the case of multi-agent environments, this is a list. Returns: observation (object/list): the initial observation of the space. """ info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name] n_agents = len(info.agents) self._check_agents(n_agents) self.game_over = False if self._n_agents == 1: obs, reward, done, info = self._single_step(info) else: obs, reward, done, info = self._multi_step(info) return obs def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). In the case of multi-agent environments, these are lists. Args: action (object/list): an action provided by the environment Returns: observation (object/list): agent's observation of the current environment reward (float/list) : amount of reward returned after previous action done (boolean/list): whether the episode has ended. info (dict): contains auxiliary diagnostic information, including BrainInfo. """ # Use random actions for all other agents in environment. if self._n_agents > 1: if not isinstance(action, list): raise UnityGymException("The environment was expecting `action` to be a list.") if len(action) != self._n_agents: raise UnityGymException( "The environment was expecting a list of {} actions.".format(self._n_agents)) else: if self._flattener is not None: # Action space is discrete and flattened - we expect a list of scalars action = [self._flattener.lookup_action(_act) for _act in action] action = np.array(action) else: if self._flattener is not None: # Translate action into list action = self._flattener.lookup_action(action) info = self._env.step(action)[self.brain_name] n_agents = len(info.agents) self._check_agents(n_agents) self._current_state = info if self._n_agents == 1: obs, reward, done, info = self._single_step(info) self.game_over = done else: obs, reward, done, info = self._multi_step(info) self.game_over = all(done) return obs, reward, done, info def _single_step(self, info): self.visual_obs = self._preprocess_single(info.visual_observations[0][0, :, :, :]) self.vector_obs = info.vector_observations[0] if self._greyscale: self.visual_obs = self._greyscale_obs_single(self.visual_obs) if self.retro: self.visual_obs = self._resize_observation(self.visual_obs) default_observation = self.visual_obs else: default_observation = self.visual_obs, self.vector_obs return default_observation, info.rewards[0], info.local_done[0], { "text_observation": info.text_observations[0], "brain_info": info} def _preprocess_single(self, single_visual_obs): if self.uint8_visual: return (255.0 * single_visual_obs).astype(np.uint8) else: return single_visual_obs def _multi_step(self, info): self.visual_obs = self._preprocess_multi(info.visual_observations) self.vector_obs = info.vector_observations if self._greyscale: self.visual_obs = self._greyscale_obs_multi(self.visual_obs) default_observation = self.visual_obs return list(default_observation), info.rewards, info.local_done, { "text_observation": info.text_observations, "brain_info": info} def _preprocess_multi(self, multiple_visual_obs): if self.uint8_visual: return [(255.0 * _visual_obs).astype(np.uint8) for _visual_obs in multiple_visual_obs] else: return multiple_visual_obs def render(self, mode='rgb_array'): return self.visual_obs def close(self): """Override _close in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ self._env.close() def get_action_meanings(self): return self.action_meanings def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Currently not implemented. """ logger.warning("Could not seed environment %s", self.name) return @staticmethod def _resize_observation(observation): """ Re-sizes visual observation to 84x84 """ obs_image = Image.fromarray(observation) obs_image = obs_image.resize((84, 84), Image.NEAREST) return np.array(obs_image) def _greyscale_obs_single(self, obs): new_obs = np.floor(np.expand_dims(np.mean(obs, axis=2), axis=2)).squeeze().astype(np.uint8) return new_obs def _greyscale_obs_multi(self, obs): new_obs = [np.floor(np.expand_dims(np.mean(o, axis=2), axis=2)).squeeze().astype(np.uint8) for o in obs] return new_obs def _check_agents(self, n_agents): # if n_agents > 1: # raise UnityGymException( # "The environment was launched as a single-agent environment, however" # "there is more than one agent in the scene.") # elif self._multiagent and n_agents <= 1: # raise UnityGymException( # "The environment was launched as a mutli-agent environment, however" # "there is only one agent in the scene.") if self._n_agents is None: self._n_agents = n_agents logger.info("{} agents within environment.".format(n_agents)) elif self._n_agents != n_agents: raise UnityGymException("The number of agents in the environment has changed since " "initialization. This is not supported.") @property def metadata(self): return {'render.modes': ['rgb_array']} @property def reward_range(self): return -float('inf'), float('inf') @property def spec(self): return None @property def action_space(self): return self._action_space @property def observation_space(self): return self._observation_space @property def number_agents(self): return self._n_agents
.replace('.exe', '') .replace('.x86_64', '') .replace('.x86', '')) docker_training = docker_target_name is not None env = UnityEnvironment( n_arenas=n_arenas, file_name=env_path, worker_id=worker_id, seed=seed, docker_training=docker_training, play=False ) arena_config_in = ArenaConfig('configs/lightsOff.yaml') env.reset(arenas_configurations_input=arena_config_in) fig, axes = plt.subplots(2, 2) imshows = [] for i in range(2): for j in range(2): axes[i, j].set_title('Arena ' + str(i * 2 + j)) axes[i, j].axis('off') imshows.append(axes[i, j].imshow(np.zeros((84, 84, 3)))) def initialize_animation(): for i in range(4): imshows[i].set_data(np.zeros((84, 84, 3))) def run_step_imshow(step):
device) q_target = Rainbow_DQN_Conv(step_size, channel, height, width, action_dim, lr, device) #duju_utils.torch_network_load(q_main, "/home/duju/animal_ai_olympics/duju_animal_ai_olympics/trained/Rainbow Double-PER-Duel DQN -Animal Food_q_main_1270.torch") #duju_utils.torch_network_load(q_target, "/home/duju/animal_ai_olympics/duju_animal_ai_olympics/trained/Rainbow Double-PER-Duel DQN -Animal Food_q_target_1270.torch") target_initialize(q_main, q_target) with open(txt_path, "a") as f: f.write(str(q_main) + "\n") train_episodic_reward = [] eval_episodic_reward = [] info = env.reset(arenas_configurations=arena_config)["Learner"] for epi_i in range(1, max_episode + 1): initial_frame = info.visual_observations[0][0] #[height, width, channel] end = info.local_done[0] for _ in range(step_size): image_buffer.animal_add(initial_frame) s_idx = image_buffer.get_current_index() input_state = image_buffer.get_state(s_idx) ep_reward = 0.0 ep_count = 0 epsilon = max(1.0 * (1 - epi_i / 100), 0.05 * (1 - epi_i / 1000))
class Worker(object): def __init__(self, name, globalAC): env_id = int(name[-1]) self.env = UnityEnvironment(file_name='env/AnimalAI', worker_id=env_id, seed=0, docker_training=False, n_arenas=1, play=False, inference=True, resolution=None) reset = self.env.reset(train_mode=True) self.name = name self.AC = ACNet(name, globalAC) def work(self): global GLOBAL_RUNNING_R, GLOBAL_EP total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP: #reset = self.env.reset(train_mode=True) reset = self.env.reset(train_mode=True, arenas_configurations=ARENA) brain = reset['Learner'] s = np.array(brain.visual_observations, dtype='float32').reshape(84, 84, 3).flatten()[np.newaxis, :] ep_r = 0 rnn_state = self.AC.state_init for ep_t in range(MAX_STEPS): a = self.AC.choose_action(s, rnn_state) rnn_state = a[2] if a[0] == 0: info = [ self.env.step(vector_action=[0, 1]) for i in range(30) ][-1] else: info = self.env.step(vector_action=a[1]) brain = info['Learner'] s_ = np.array(brain.visual_observations, dtype='float32').reshape( 84, 84, 3).flatten()[np.newaxis, :] r = brain.rewards[0] done = brain.local_done[0] end = True if (ep_t == MAX_STEPS - 1) else False if r == 0: r = -0.0125 ep_r += r buffer_s.append(s) buffer_a.append(a[0]) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or end: # обновление сети if end: v_s_ = 0 else: v_s_ = SESS.run( self.AC.v, { self.AC.s: s_, self.AC.state_in[0]: rnn_state[0], self.AC.state_in[1]: rnn_state[1] })[0, 0] buffer_v_target = [] for r in buffer_r[::-1]: v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.vstack( buffer_s), np.array(buffer_a), np.vstack( buffer_v_target) feed_dict = { self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, self.AC.state_in[0]: rnn_state[0], self.AC.state_in[1]: rnn_state[1] } self.AC.update_global(feed_dict) buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() s = s_ total_step += 1 if end: if len(GLOBAL_RUNNING_R) == 0: # запись наград эпизода GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] + 0.01 * ep_r) break
actions_array = np.array([[0,0],[0,1],[0,2],[1,0], [1,1],[1,2], [2,0],[2,1],[2,2]]) return actions_array[action] if __name__=='__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env_path = 'env/AnimalAI' brain_name='Learner' arena_config_in = ArenaConfig(args.env_field) model = DQN(action_size=9).to(device) model.eval() model.load_state_dict(torch.load("./models/dqn/dqn.pt")) env=UnityEnvironment(file_name=env_path) #環境リセット action_info = env.reset(arenas_configurations_input=arena_config_in, train_mode=False) obs = action_info[brain_name].visual_observations[0][0] state = get_state(obs) for step in range(1000): time.sleep(0.05) #ランダム行動 action_values = model(state) action = np.argmax(action_values.cpu().data.numpy()) conv_action = convert_action(action) action_info = env.step(conv_action) obs = action_info[brain_name].visual_observations[0][0] reward = action_info[brain_name].rewards[0] done = action_info[brain_name].local_done[0] max_reach=action_info[brain_name].max_reached