Python UnityEnvironment.step Examples

Programming Language: Python

Namespace/Package Name: animalai.envs

Class/Type: UnityEnvironment

Method/Function: step

Examples at hotexamples.com: 5

Python UnityEnvironment.step - 5 examples found. These are the top rated real world Python examples of animalai.envs.UnityEnvironment.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

UnityEnvironment(12)

reset(7)

step(5)

close(2)

Example #1

Show file

class AnimalAIWrapper(gym.Env):
    def __init__(
        self,
        worker_id,
        env_path,
        config_path,
        reduced_actions=False,
        docker_training=False,
    ):
        super(AnimalAIWrapper, self).__init__()
        self.config = ArenaConfig(config_path)
        self.time_limit = self.config.arenas[0].t

        self.env = UnityEnvironment(
            file_name=env_path,
            worker_id=worker_id,
            seed=worker_id,
            n_arenas=1,
            arenas_configurations=self.config,
            docker_training=docker_training,
        )

        lookup_func = lambda a: {"Learner": np.array([a], dtype=float)}
        if reduced_actions:
            lookup = itertools.product([0, 1], [0, 1, 2])
        else:
            lookup = itertools.product([0, 1, 2], repeat=2)
        lookup = dict(enumerate(map(lookup_func, lookup)))
        self.action_map = lambda a: lookup[a]

        self.observation_space = gym.spaces.Box(0,
                                                255, [84, 84, 3],
                                                dtype=np.uint8)
        self.action_space = gym.spaces.Discrete(len(lookup))
        self.t = 0

    def _process_state(self, state):
        img = 255 * state["Learner"].visual_observations[0][0]
        vec = state["Learner"].vector_observations[0]
        r = state["Learner"].rewards[0]
        done = state["Learner"].local_done[0]
        return np.uint8(img), vec, r, done

    def reset(self):
        self.t = 0
        img, vec, r, done = self._process_state(
            self.env.reset(arenas_configurations=self.config))
        while done:
            img, vec, r, done = self._process_state(
                self.env.reset(arenas_configurations=self.config))
        return img

    def step(self, action):
        obs, vec, r, done = self._process_state(
            self.env.step(vector_action=self.action_map(action.item())))
        self.t += 1
        done = done or self.t >= self.time_limit
        return obs, r, done, {}

Example #2

Show file

File: environment.py Project: zbwby819/AnimalAI-Olympics

class AnimalAIEnv(gym.Env):
    """
    Provides Gym wrapper for Unity Learning Environments.
    Multi-agent environments use lists for object types, as done here:
    https://github.com/openai/multiagent-particle-envs
    """

    def __init__(self,
                 environment_filename: str,
                 worker_id=0,
                 docker_training=False,
                 n_arenas=1,
                 seed=0,
                 arenas_configurations=None,
                 greyscale=False,
                 retro=True,
                 inference=False,
                 resolution=None):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param docker_training: Whether this is running within a docker environment and should use a virtual
            frame buffer (xvfb).
        :param n_arenas: number of arenas to create in the environment (one agent per arena)
        :param arenas_configurations: an ArenaConfig to configure the items present in each arena, will spawn random
            objects randomly if not provided
        :param greyscale: whether the visual observations should be grayscaled or not
        :param retro: Resize visual observation to 84x84 (int8) and flattens action space.
        """
        self._env = UnityEnvironment(file_name=environment_filename,
                                     worker_id=worker_id,
                                     seed=seed,
                                     docker_training=docker_training,
                                     n_arenas=n_arenas,
                                     arenas_configurations=arenas_configurations,
                                     inference=inference,
                                     resolution=resolution)
        # self.name = self._env.academy_name
        self.vector_obs = None
        self.inference = inference
        self.resolution = resolution
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale or retro
        # self._seed = None
        self.retro = retro
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.arenas_configurations = arenas_configurations

        self.flatten_branched = self.retro
        self.uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException("Environment provides no visual observations.")

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException("Environment provides no vector observations.")

        # Check for number of agents in scene.
        initial_info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name]
        self._check_agents(len(initial_info.agents))

        if self.retro and self._n_agents > 1:
            raise UnityGymException("Only one agent is allowed in retro mode, set n_agents to 1.")

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
        else:
            if self.flatten_branched:
                self._flattener = ActionFlattener(brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)

        # high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        # if self.visual_obs:
        if self._greyscale:
            depth = 1
        else:
            depth = 3

        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

            image_space = spaces.Box(
                0, image_space_max,
                dtype=image_space_dtype,
                shape=(camera_height, camera_width, depth)
            )

            self._observation_space = image_space
        else:
            image_space_max = 1.0
            image_space_dtype = np.float32
            camera_height = brain.camera_resolutions[0]["height"]
            camera_width = brain.camera_resolutions[0]["width"]
            max_float = np.finfo(np.float32).max

            image_space = spaces.Box(
                0, image_space_max,
                dtype=image_space_dtype,
                shape=(self._n_agents, camera_height, camera_width, depth)
            )
            vector_space = spaces.Box(-max_float, max_float,
                                      shape=(self._n_agents, brain.vector_observation_space_size))
            self._observation_space = spaces.Tuple((image_space, vector_space))

    def reset(self, arenas_configurations=None):
        """Resets the state of the environment and returns an initial observation.
        In the case of multi-agent environments, this is a list.
        Returns: observation (object/list): the initial observation of the
            space.
        """
        info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name]
        n_agents = len(info.agents)
        self._check_agents(n_agents)
        self.game_over = False

        if self._n_agents == 1:
            obs, reward, done, info = self._single_step(info)
        else:
            obs, reward, done, info = self._multi_step(info)
        return obs

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        In the case of multi-agent environments, these are lists.
        Args:
            action (object/list): an action provided by the environment
        Returns:
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information, including BrainInfo.
        """

        # Use random actions for all other agents in environment.
        if self._n_agents > 1:
            if not isinstance(action, list):
                raise UnityGymException("The environment was expecting `action` to be a list.")
            if len(action) != self._n_agents:
                raise UnityGymException(
                    "The environment was expecting a list of {} actions.".format(self._n_agents))
            else:
                if self._flattener is not None:
                    # Action space is discrete and flattened - we expect a list of scalars
                    action = [self._flattener.lookup_action(_act) for _act in action]
                action = np.array(action)
        else:
            if self._flattener is not None:
                # Translate action into list
                action = self._flattener.lookup_action(action)

        info = self._env.step(action)[self.brain_name]
        n_agents = len(info.agents)
        self._check_agents(n_agents)
        self._current_state = info

        if self._n_agents == 1:
            obs, reward, done, info = self._single_step(info)
            self.game_over = done
        else:
            obs, reward, done, info = self._multi_step(info)
            self.game_over = all(done)
        return obs, reward, done, info

    def _single_step(self, info):

        self.visual_obs = self._preprocess_single(info.visual_observations[0][0, :, :, :])
        self.vector_obs = info.vector_observations[0]

        if self._greyscale:
            self.visual_obs = self._greyscale_obs_single(self.visual_obs)

        if self.retro:
            self.visual_obs = self._resize_observation(self.visual_obs)
            default_observation = self.visual_obs
        else:
            default_observation = self.visual_obs, self.vector_obs

        return default_observation, info.rewards[0], info.local_done[0], {
            "text_observation": info.text_observations[0],
            "brain_info": info}

    def _preprocess_single(self, single_visual_obs):
        if self.uint8_visual:
            return (255.0 * single_visual_obs).astype(np.uint8)
        else:
            return single_visual_obs

    def _multi_step(self, info):

        self.visual_obs = self._preprocess_multi(info.visual_observations)
        self.vector_obs = info.vector_observations

        if self._greyscale:
            self.visual_obs = self._greyscale_obs_multi(self.visual_obs)

        default_observation = self.visual_obs

        return list(default_observation), info.rewards, info.local_done, {
            "text_observation": info.text_observations,
            "brain_info": info}

    def _preprocess_multi(self, multiple_visual_obs):
        if self.uint8_visual:
            return [(255.0 * _visual_obs).astype(np.uint8) for _visual_obs in multiple_visual_obs]
        else:
            return multiple_visual_obs

    def render(self, mode='rgb_array'):
        return self.visual_obs

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        self._env.close()

    def get_action_meanings(self):
        return self.action_meanings

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        Currently not implemented.
        """
        logger.warning("Could not seed environment %s", self.name)
        return

    @staticmethod
    def _resize_observation(observation):
        """
        Re-sizes visual observation to 84x84
        """
        obs_image = Image.fromarray(observation)
        obs_image = obs_image.resize((84, 84), Image.NEAREST)
        return np.array(obs_image)

    def _greyscale_obs_single(self, obs):
        new_obs = np.floor(np.expand_dims(np.mean(obs, axis=2), axis=2)).squeeze().astype(np.uint8)
        return new_obs

    def _greyscale_obs_multi(self, obs):
        new_obs = [np.floor(np.expand_dims(np.mean(o, axis=2), axis=2)).squeeze().astype(np.uint8) for o in obs]
        return new_obs

    def _check_agents(self, n_agents):
        # if n_agents > 1:
        #     raise UnityGymException(
        #         "The environment was launched as a single-agent environment, however"
        #         "there is more than one agent in the scene.")
        # elif self._multiagent and n_agents <= 1:
        #     raise UnityGymException(
        #         "The environment was launched as a mutli-agent environment, however"
        #         "there is only one agent in the scene.")
        if self._n_agents is None:
            self._n_agents = n_agents
            logger.info("{} agents within environment.".format(n_agents))
        elif self._n_agents != n_agents:
            raise UnityGymException("The number of agents in the environment has changed since "
                                    "initialization. This is not supported.")

    @property
    def metadata(self):
        return {'render.modes': ['rgb_array']}

    @property
    def reward_range(self):
        return -float('inf'), float('inf')

    @property
    def spec(self):
        return None

    @property
    def action_space(self):
        return self._action_space

    @property
    def observation_space(self):
        return self._observation_space

    @property
    def number_agents(self):
        return self._n_agents

Example #3

Show file

File: animal_rainbow_baseline.py Project: ehddnr747/duju_animal_ai_olympics

    s_idx = image_buffer.get_current_index()
    input_state = image_buffer.get_state(s_idx)

    ep_reward = 0.0
    ep_count = 0

    epsilon = max(1.0 * (1 - epi_i / 100), 0.05 * (1 - epi_i / 1000))

    while True:
        a_category = q_main.epsilon_sample(
            torch.FloatTensor(input_state).to(device).view(
                1, input_channel_size, height, width), epsilon)
        a_deploy = action_dict[a_category]

        info = env.step(a_deploy)["Learner"]

        end = info.local_done[0]

        ep_count += 1
        r = info.rewards[0] * reward_scale
        s2_frame = info.visual_observations[0][0]

        image_buffer.animal_add(s2_frame)
        s2_idx = image_buffer.get_current_index()
        input_state = image_buffer.get_state(s2_idx)

        replay_buffer.store(np.array([s_idx]), np.array([a_category]),
                            np.array([r]), np.array([end]), np.array([s2_idx]))

        s_idx = s2_idx

Example #4

Show file

class Worker(object):
    def __init__(self, name, globalAC):
        env_id = int(name[-1])
        self.env = UnityEnvironment(file_name='env/AnimalAI',
                                    worker_id=env_id,
                                    seed=0,
                                    docker_training=False,
                                    n_arenas=1,
                                    play=False,
                                    inference=True,
                                    resolution=None)

        reset = self.env.reset(train_mode=True)

        self.name = name
        self.AC = ACNet(name, globalAC)

    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []

        while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
            #reset = self.env.reset(train_mode=True)
            reset = self.env.reset(train_mode=True,
                                   arenas_configurations=ARENA)

            brain = reset['Learner']
            s = np.array(brain.visual_observations,
                         dtype='float32').reshape(84, 84,
                                                  3).flatten()[np.newaxis, :]
            ep_r = 0

            rnn_state = self.AC.state_init

            for ep_t in range(MAX_STEPS):
                a = self.AC.choose_action(s, rnn_state)
                rnn_state = a[2]

                if a[0] == 0:
                    info = [
                        self.env.step(vector_action=[0, 1]) for i in range(30)
                    ][-1]
                else:
                    info = self.env.step(vector_action=a[1])
                brain = info['Learner']
                s_ = np.array(brain.visual_observations,
                              dtype='float32').reshape(
                                  84, 84, 3).flatten()[np.newaxis, :]
                r = brain.rewards[0]
                done = brain.local_done[0]

                end = True if (ep_t == MAX_STEPS - 1) else False
                if r == 0: r = -0.0125
                ep_r += r

                buffer_s.append(s)
                buffer_a.append(a[0])
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or end:  # обновление сети
                    if end:
                        v_s_ = 0
                    else:
                        v_s_ = SESS.run(
                            self.AC.v, {
                                self.AC.s: s_,
                                self.AC.state_in[0]: rnn_state[0],
                                self.AC.state_in[1]: rnn_state[1]
                            })[0, 0]
                    buffer_v_target = []
                    for r in buffer_r[::-1]:
                        v_s_ = r + GAMMA * v_s_
                        buffer_v_target.append(v_s_)
                    buffer_v_target.reverse()

                    buffer_s, buffer_a, buffer_v_target = np.vstack(
                        buffer_s), np.array(buffer_a), np.vstack(
                            buffer_v_target)
                    feed_dict = {
                        self.AC.s: buffer_s,
                        self.AC.a_his: buffer_a,
                        self.AC.v_target: buffer_v_target,
                        self.AC.state_in[0]: rnn_state[0],
                        self.AC.state_in[1]: rnn_state[1]
                    }
                    self.AC.update_global(feed_dict)

                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()

                s = s_
                total_step += 1

                if end:
                    if len(GLOBAL_RUNNING_R) == 0:  # запись наград эпизода
                        GLOBAL_RUNNING_R.append(ep_r)
                    else:
                        GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] +
                                                0.01 * ep_r)
                    break

Example #5

Show file

File: evaluate.py Project: neuroph12/Animal-AI

    model.load_state_dict(torch.load("./models/dqn/dqn.pt"))

    env=UnityEnvironment(file_name=env_path)
    #環境リセット
    action_info = env.reset(arenas_configurations_input=arena_config_in, train_mode=False)
    obs = action_info[brain_name].visual_observations[0][0]
    state = get_state(obs)

    for step in range(1000):
        time.sleep(0.05)
        #ランダム行動
        action_values = model(state)
        action = np.argmax(action_values.cpu().data.numpy())
        conv_action = convert_action(action)
    
        action_info = env.step(conv_action)
        obs = action_info[brain_name].visual_observations[0][0]
        reward = action_info[brain_name].rewards[0]
        done   = action_info[brain_name].local_done[0]
        max_reach=action_info[brain_name].max_reached
        next_state = get_state(obs)
        state = next_state
        #表示
        #print('\n ===== {} step ======'.format(step))
        #print('\naction=', action)
        #print('\nstate=', state.shape)
        #print('\nreward=', reward)
        #print('\ndone=', done)
        #print('\nmax_reach=', max_reach)

    #plt.imshow(state[0][0])