Ejemplo n.º 1
0
    def launch(self, env_config, use_eval=False):
        environment_path = (env_config["environment_path_eval"]
                            if use_eval else env_config["environment_path"])

        port = env_config.get("port", 0)
        if use_eval and port:
            port += 2
        use_visual = env_config.get("use_visual", False)
        use_vector = env_config.get("use_vector", True)
        multiagent = env_config.get("multiagent", False)
        uint8_visual = env_config.get("uint8_visual", True)
        flatten_branched = env_config.get("flatten_branched", True)

        self.env = UnityEnv(
            environment_path,
            port,
            use_visual=use_visual,
            use_vector=use_vector,
            uint8_visual=uint8_visual,
            multiagent=multiagent,
            flatten_branched=flatten_branched,
        )
        self.action_space = self.env._action_space
        self.observation_space = self.env._observation_space
        # agent name must be unique among **all** agents
        self.agent_name = [
            f'{port}_{i}' for i in range(self.env.number_agents)
        ]
Ejemplo n.º 2
0
def main():
    env = UnityEnv(
        "/homes/gkumar/Documents/UnityProjects/maze/Build/mazeBasic_Discrete_imageOnly",
        0,
        use_visual=True,
        uint8_visual=True)

    HOSTNAME = os.uname()[1]

    logger.configure('./logs/' +
                     HOSTNAME)  # Çhange to log in a different directory

    act = deepq.learn(
        env,
        "cnn",  # conv_only is also a good choice for GridWorld
        lr=2.5e-4,
        total_timesteps=1000000,
        buffer_size=50000,
        exploration_fraction=0.05,
        exploration_final_eps=0.1,
        print_freq=20,
        train_freq=5,
        learning_starts=20000,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        checkpoint_freq=1000,
        checkpoint_path=
        './logs',  # Change to save model in a different directory
        dueling=True)
    print("Saving model to unity_model.pkl")
    act.save("unity_model.pkl")
Ejemplo n.º 3
0
    def get_gym_env(self, unity_file):
        """
            @param unity_file   the full path of Unity environment

            Returns an Open AI Gym that wraps given Unity environment
            based on selected trainer parameters.
        """
        return UnityEnv(unity_file,
                        self.params.get('worker_id', 0),
                        use_visual=self.params.get('use_visual', False),
                        uint8_visual=self.params.get('use_uint8_visual', False),
                        multiagent=self.params.get('multiagent', False),
                        flatten_branched=self.params.get('flatten_branched', False),
                        allow_multiple_visual_obs=self.params.get('allow_multiple_visual_obs', False),
                        no_graphics=self.params.get('no_graphics', True))
Ejemplo n.º 4
0
def wrap_unity_env(env_path, frame_skip=0, frame_stack=False, chw_style=False, **unity_config):

    worker_id = unity_config.get('port', 9527)
    use_visual = unity_config.get('use_visual', True)
    uint8_visual = unity_config.get('uint8_visual', True)
    flatten_branched = unity_config.get('flatten_branched', True)
    multiagent = unity_config.get('multiagent', False)

    env = UnityEnv(env_path,
                   worker_id=worker_id,
                   use_visual=use_visual,
                   uint8_visual=uint8_visual,
                   flatten_branched=flatten_branched,
                   multiagent=multiagent)

    # Be careful with `Decision Interval` in agent script of Unity ml-agent
    if frame_skip > 0:
        env = MaxAndSkipEnv(env, frame_skip)
    if chw_style:
        env = CHWStyle(env)
    if frame_stack:
        env = FrameStack(env, frame_stack, chw_style)

    return env
Ejemplo n.º 5
0
def main():
    mask_unused_gpus()
    env = UnityEnv("../unity_envs/kais_banana", 0, use_visual=True, uint8_visual=True, flatten_branched=True)
    logger.configure('./logs') # Çhange to log in a different directory
    act = deepq.learn(
        env,
        "cnn", # conv_only is also a good choice for GridWorld
        lr=2.5e-4,
        total_timesteps=100000, #0
        buffer_size=50000,
        exploration_fraction=0.05,
        exploration_final_eps=0.1,
        print_freq=20,
        train_freq=5,
        learning_starts=20000,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        checkpoint_freq=1000,
        checkpoint_path='./logs', # Change to save model in a different directory
        dueling=True
    )
    print("Saving model to unity_model.pkl")
    act.save("unity_model.pkl")
Ejemplo n.º 6
0
    start = time.time()
    mask_unused_gpus()

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    #Setting up the env
    #TODO Worker_id can be changed to run in parallell
    #Flatten_branched gives us a onehot encoding of all 54 action combinations.
    print("Opening unity env")
    env = UnityEnv(
        "../unity_envs/kais_banana2",
        worker_id=39,
        use_visual=True,
        flatten_branched=True
    )  #KOE: Note: If I accept images as uint8_visual=True, I have to convert to float later.

    print("Resetting env")
    initial_observation = env.reset()
    #KOETODO This would have to be manually configured for each environment.
    #KOE: What is this misc??

    #misc = game_state.game_variables  # [Health]
    #prev_misc = misc
    #KOE: I think this should be the same as my battery measure.
    misc = 100  # [Health]
    prev_misc = misc

    # game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
Ejemplo n.º 7
0
    start = time.time()
    mask_unused_gpus()

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    #Setting up the env
    #TODO Worker_id can be changed to run in parallell
    #Flatten_branched gives us a onehot encoding of all 54 action combinations.
    print("Opening unity env")
    env = UnityEnv("../unity_envs/kais_banana3",
                   worker_id=24,
                   use_visual=True,
                   uint8_visual=True,
                   flatten_branched=True)

    print("Resetting env")
    initial_observation = env.reset()
    #KOETODO This would have to be manually configured for each environment.
    #KOE: What is this misc??

    #misc = game_state.game_variables  # [Health]
    #prev_misc = misc
    #KOE: I think this should be the same as my battery measure.
    misc = 100  # [Health]
    prev_misc = misc

    # game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
Ejemplo n.º 8
0
class UnityEnvWrapper:
    def __init__(self, env_config=None, use_eval=False, rpc_mode=False):
        self.env = None
        if not rpc_mode:
            assert (env_config is not None)
            self.launch(env_config, use_eval)

    def launch(self, env_config, use_eval=False):
        environment_path = (env_config["environment_path_eval"]
                            if use_eval else env_config["environment_path"])

        port = env_config.get("port", 0)
        if use_eval and port:
            port += 2
        use_visual = env_config.get("use_visual", False)
        use_vector = env_config.get("use_vector", True)
        multiagent = env_config.get("multiagent", False)
        uint8_visual = env_config.get("uint8_visual", True)
        flatten_branched = env_config.get("flatten_branched", True)

        self.env = UnityEnv(
            environment_path,
            port,
            use_visual=use_visual,
            use_vector=use_vector,
            uint8_visual=uint8_visual,
            multiagent=multiagent,
            flatten_branched=flatten_branched,
        )
        self.action_space = self.env._action_space
        self.observation_space = self.env._observation_space
        # agent name must be unique among **all** agents
        self.agent_name = [
            f'{port}_{i}' for i in range(self.env.number_agents)
        ]

    def _transform_list_to_dict(self, objs):
        return {name: obj for name, obj in zip(self.agent_name, objs)}

    def _transform_dict_to_list(self, objs):
        return [objs[name] for name in self.agent_name]

    def step(self, act, action_settings=None):
        action = np.stack(self._transform_dict_to_list(act)).tolist()
        observation, reward, done, info = self.env.step(action)
        transform = self._transform_list_to_dict
        info = list(map(json.loads, info['text_observation']))
        for i, x in enumerate(info):
            x['done'] = done[i]
        done = [False] * 4
        done_dict = transform(done)
        done_dict['__all__'] = False  # no early termination (for logging)
        return transform(observation), transform(reward), done_dict, transform(
            info)

    def reset(self, reset_settings=None):
        obs = self.env.reset()
        return self._transform_list_to_dict(obs)

    def get_env_spaces(self):
        spaces = self.action_space, self.observation_space, self.agent_name
        p = pickle.dumps(spaces)
        z = zlib.compress(p)
        return z

    def get_action_count(self):
        if isinstance(self.env.action_space, gym.spaces.Discrete):
            return self.env.action_space.n
        elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete):
            return self.env.action_space.nvec.tolist()
        raise NotImplementedError

    def sample(self):
        return self.env.action_space.sample()

    def number_agents(self):
        return self.env.number_agents

    def env_close(self):
        if self.env:
            self.env.close()
            self.env = None

    def close(self):
        self.env_close()

    def hello(self):
        print('Hello World')
Ejemplo n.º 9
0
import cv2

np.set_printoptions(threshold=np.inf)

# create grid and write to file
maze_trials, max_episode, max_steps = 1, 100000, 900
action_repeat = 30

buffer = ReplayBuffer(10000)
learner = Learner(buffer)

# assume that unity reads that file and generates maze dynamically
env = UnityEnv(
    "/home/gaurav/MySharedRepository/mazeContinuousTarget_fixed_camera/Build/mazeContinuousTarget_fixed_camera",
    0,
    use_visual=True,
    uint8_visual=True)


def drawTrajectory():

    action_repeat = 150

    # size of experience
    #   11 velocities on x axis
    #   11 velocities on y axis
    #   11 actions on x axis
    #   11 actions on y axis
    #   150 action repeats
    #   2 position values
Ejemplo n.º 10
0
def sampleTrajectory():

    action_repeat = 300

    action_range_around_zero = 20  # should be even

    #action_range_around_zero = [-9, -8, -7, -6, -5, 0, 5, 6, 7, 8, 9]

    period = 1

    if action_range_around_zero % 2 != 0:
        return False

    env = UnityEnv(
        "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fixed_camera_data_collection/Build/mazeContinuousTarget_fixed_camera_data_collection",
        0,
        use_visual=True,
        uint8_visual=True)

    list_of_data = []

    for i in range(int(-1 * action_range_around_zero / 2),
                   int(action_range_around_zero / 2 + 1),
                   period):  # velocity X [-5, -3, -1, 1, 3, 5]
        for j in range(int(-1 * action_range_around_zero / 2),
                       int(action_range_around_zero / 2 + 1),
                       period):  # velocity Y [-5, -3, -1, 1, 3, 5]
            print(i, j)
            for k in range(int(-1 * action_range_around_zero / 2),
                           int(action_range_around_zero / 2 + 1),
                           period):  # action X [-5, -3, -1, 1, 3, 5]
                for l in range(int(-1 * action_range_around_zero / 2),
                               int(action_range_around_zero / 2 + 1),
                               period):  # action Y [-5, -3, -1, 1, 3, 5]

                    single_tuple = np.zeros(4 + 2 * action_repeat)

                    obs_fovea = env.reset()
                    obs_fovea_next, reward, done, info = env.step([[i], [j],
                                                                   [k], [l]])

                    # action
                    single_tuple[0] = i
                    single_tuple[1] = j
                    # velocity
                    single_tuple[2] = k
                    single_tuple[3] = l

                    for m in range(0, action_repeat):

                        single_tuple[
                            3 + m * 2 +
                            1] = info["brain_info"].vector_observations[0][2]
                        single_tuple[
                            3 + m * 2 +
                            2] = info["brain_info"].vector_observations[0][3]

                        x_vel_new = info["brain_info"].vector_observations[0][
                            6]
                        y_vel_new = info["brain_info"].vector_observations[0][
                            7]

                        if math.sqrt(
                                math.pow((single_tuple[3 + m * 2 + 1] -
                                          single_tuple[4]), 2) +
                                math.pow((single_tuple[3 + m * 2 + 2] -
                                          single_tuple[5]), 2)) < 6:

                            obs_fovea_next, reward, done, info = env.step(
                                [[i], [j], [x_vel_new], [y_vel_new]])

                        else:

                            for n in range(m, action_repeat):
                                single_tuple[3 + n * 2 +
                                             1] = single_tuple[3 +
                                                               (m - 1) * 2 + 1]
                                single_tuple[3 + n * 2 +
                                             2] = single_tuple[3 +
                                                               (m - 1) * 2 + 2]

                            break

                    list_of_data.append(single_tuple)

    h5f = h5py.File('data.h5', 'w')
    h5f.create_dataset('dataset_1', data=list_of_data)
    h5f.close()
Ejemplo n.º 11
0
import gym

from baselines import deepq
from baselines import logger
import time

from gym_unity.envs.unity_env import UnityEnv

import subprocess as sp
import os

env = UnityEnv("../unity_envs/kais_banana",
               0,
               use_visual=True,
               uint8_visual=True,
               flatten_branched=True)

act = deepq.learn(env,
                  network='cnn',
                  total_timesteps=0,
                  load_path="logs_backup/model")  #"unity_model.pkl")

#Visualizing
#TODO Maybe slow down the simulation by inserting some delays here.
while True:
    obs, done = env.reset(), False
    episode_rew = 0
    while not done:
        env.render()
        obs, rew, done, _ = env.step(act(obs[None])[0])
        episode_rew += rew