Python UnityEnv.step Exemples, gym_unity.envs.unity_env.UnityEnv.step Python Exemples

Exemple #1

0

Afficher le fichier

        KOE: Here, we take the action, observe rewards, done and skip ahead.
        game.set_action(a_t.tolist())
        skiprate = agent.frame_per_action
        game.advance_action(skiprate) #Repeats the action skiprate times and returns state after that.

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        r_t = game.get_last_reward() 
        '''
        #KOEComment: My unity agent also skips 5 frames between actions, controlled in the Unity interface.
        #The vector space in Unity has 4 branches, with multiple actions i each! Those can also be combined!
        #I need the ANN output to be able to select all combinations.
        #TODO Believe step just wants the index of the action.

        observation, reward, done, info = env.step(action_idx)
        #print("obs after step: ", total_size(observation))
        if reward != 0:
            print("Got reward: ", reward)
            print("Taking action ", action_idx)
        #TODO How to step ahead multiple steps? - I asked github- check what they suggest.

        #Observation is the image. vector_observations are the measurements.
        #battery, eaten_poison, eaten_food
        meas = info['brain_info'].vector_observations
        if (done):
            print("Game done at timestep ", t)
            if ((food - poison) > max_reward):
                max_reward = (food - poison)
            GAME += 1
            reward_buffer.append(food - poison)

Exemple #2

0

Afficher le fichier

Fichier : sample_trajectory.py Projet : gkm2708/dreamModule

def sampleTrajectory():

    action_repeat = 300

    action_range_around_zero = 20  # should be even

    #action_range_around_zero = [-9, -8, -7, -6, -5, 0, 5, 6, 7, 8, 9]

    period = 1

    if action_range_around_zero % 2 != 0:
        return False

    env = UnityEnv(
        "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fixed_camera_data_collection/Build/mazeContinuousTarget_fixed_camera_data_collection",
        0,
        use_visual=True,
        uint8_visual=True)

    list_of_data = []

    for i in range(int(-1 * action_range_around_zero / 2),
                   int(action_range_around_zero / 2 + 1),
                   period):  # velocity X [-5, -3, -1, 1, 3, 5]
        for j in range(int(-1 * action_range_around_zero / 2),
                       int(action_range_around_zero / 2 + 1),
                       period):  # velocity Y [-5, -3, -1, 1, 3, 5]
            print(i, j)
            for k in range(int(-1 * action_range_around_zero / 2),
                           int(action_range_around_zero / 2 + 1),
                           period):  # action X [-5, -3, -1, 1, 3, 5]
                for l in range(int(-1 * action_range_around_zero / 2),
                               int(action_range_around_zero / 2 + 1),
                               period):  # action Y [-5, -3, -1, 1, 3, 5]

                    single_tuple = np.zeros(4 + 2 * action_repeat)

                    obs_fovea = env.reset()
                    obs_fovea_next, reward, done, info = env.step([[i], [j],
                                                                   [k], [l]])

                    # action
                    single_tuple[0] = i
                    single_tuple[1] = j
                    # velocity
                    single_tuple[2] = k
                    single_tuple[3] = l

                    for m in range(0, action_repeat):

                        single_tuple[
                            3 + m * 2 +
                            1] = info["brain_info"].vector_observations[0][2]
                        single_tuple[
                            3 + m * 2 +
                            2] = info["brain_info"].vector_observations[0][3]

                        x_vel_new = info["brain_info"].vector_observations[0][
                            6]
                        y_vel_new = info["brain_info"].vector_observations[0][
                            7]

                        if math.sqrt(
                                math.pow((single_tuple[3 + m * 2 + 1] -
                                          single_tuple[4]), 2) +
                                math.pow((single_tuple[3 + m * 2 + 2] -
                                          single_tuple[5]), 2)) < 6:

                            obs_fovea_next, reward, done, info = env.step(
                                [[i], [j], [x_vel_new], [y_vel_new]])

                        else:

                            for n in range(m, action_repeat):
                                single_tuple[3 + n * 2 +
                                             1] = single_tuple[3 +
                                                               (m - 1) * 2 + 1]
                                single_tuple[3 + n * 2 +
                                             2] = single_tuple[3 +
                                                               (m - 1) * 2 + 2]

                            break

                    list_of_data.append(single_tuple)

    h5f = h5py.File('data.h5', 'w')
    h5f.create_dataset('dataset_1', data=list_of_data)
    h5f.close()

Exemple #3

0

Afficher le fichier

Fichier : UnityEnvWrapper.py Projet : chengscott/unity-ray

class UnityEnvWrapper:
    def __init__(self, env_config=None, use_eval=False, rpc_mode=False):
        self.env = None
        if not rpc_mode:
            assert (env_config is not None)
            self.launch(env_config, use_eval)

    def launch(self, env_config, use_eval=False):
        environment_path = (env_config["environment_path_eval"]
                            if use_eval else env_config["environment_path"])

        port = env_config.get("port", 0)
        if use_eval and port:
            port += 2
        use_visual = env_config.get("use_visual", False)
        use_vector = env_config.get("use_vector", True)
        multiagent = env_config.get("multiagent", False)
        uint8_visual = env_config.get("uint8_visual", True)
        flatten_branched = env_config.get("flatten_branched", True)

        self.env = UnityEnv(
            environment_path,
            port,
            use_visual=use_visual,
            use_vector=use_vector,
            uint8_visual=uint8_visual,
            multiagent=multiagent,
            flatten_branched=flatten_branched,
        )
        self.action_space = self.env._action_space
        self.observation_space = self.env._observation_space
        # agent name must be unique among **all** agents
        self.agent_name = [
            f'{port}_{i}' for i in range(self.env.number_agents)
        ]

    def _transform_list_to_dict(self, objs):
        return {name: obj for name, obj in zip(self.agent_name, objs)}

    def _transform_dict_to_list(self, objs):
        return [objs[name] for name in self.agent_name]

    def step(self, act, action_settings=None):
        action = np.stack(self._transform_dict_to_list(act)).tolist()
        observation, reward, done, info = self.env.step(action)
        transform = self._transform_list_to_dict
        info = list(map(json.loads, info['text_observation']))
        for i, x in enumerate(info):
            x['done'] = done[i]
        done = [False] * 4
        done_dict = transform(done)
        done_dict['__all__'] = False  # no early termination (for logging)
        return transform(observation), transform(reward), done_dict, transform(
            info)

    def reset(self, reset_settings=None):
        obs = self.env.reset()
        return self._transform_list_to_dict(obs)

    def get_env_spaces(self):
        spaces = self.action_space, self.observation_space, self.agent_name
        p = pickle.dumps(spaces)
        z = zlib.compress(p)
        return z

    def get_action_count(self):
        if isinstance(self.env.action_space, gym.spaces.Discrete):
            return self.env.action_space.n
        elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete):
            return self.env.action_space.nvec.tolist()
        raise NotImplementedError

    def sample(self):
        return self.env.action_space.sample()

    def number_agents(self):
        return self.env.number_agents

    def env_close(self):
        if self.env:
            self.env.close()
            self.env = None

    def close(self):
        self.env_close()

    def hello(self):
        print('Hello World')

Exemple #4

0

Afficher le fichier

from baselines import logger
import time

from gym_unity.envs.unity_env import UnityEnv

import subprocess as sp
import os

env = UnityEnv("../unity_envs/kais_banana",
               0,
               use_visual=True,
               uint8_visual=True,
               flatten_branched=True)

act = deepq.learn(env,
                  network='cnn',
                  total_timesteps=0,
                  load_path="logs_backup/model")  #"unity_model.pkl")

#Visualizing
#TODO Maybe slow down the simulation by inserting some delays here.
while True:
    obs, done = env.reset(), False
    episode_rew = 0
    while not done:
        env.render()
        obs, rew, done, _ = env.step(act(obs[None])[0])
        episode_rew += rew
        time.sleep(0.05)
    print("Episode reward", episode_rew)