Esempio n. 1
0
def test_gym_wrapper_single_visual_and_vector(use_uint8):
    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(
        number_visual_observations=1,
        vector_observation_space_size=3,
        vector_action_space_size=[2],
    )
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1)
    setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step,
                                mock_terminal_step)

    env = UnityToGymWrapper(mock_env,
                            uint8_visual=use_uint8,
                            allow_multiple_obs=True)
    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.observation_space, spaces.Tuple)
    assert len(env.observation_space) == 2
    reset_obs = env.reset()
    assert isinstance(reset_obs, list)
    assert len(reset_obs) == 2
    assert all(isinstance(ob, np.ndarray) for ob in reset_obs)
    assert reset_obs[-1].shape == (3, )
    assert len(reset_obs[0].shape) == 3
    actions = env.action_space.sample()
    assert actions.shape == (2, )
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert len(obs) == 2
    assert all(isinstance(ob, np.ndarray) for ob in obs)
    assert reset_obs[-1].shape == (3, )
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
    assert isinstance(info, dict)

    # check behaviour for allow_multiple_obs = False
    env = UnityToGymWrapper(mock_env,
                            uint8_visual=use_uint8,
                            allow_multiple_obs=False)
    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.observation_space, spaces.Box)
    reset_obs = env.reset()
    assert isinstance(reset_obs, np.ndarray)
    assert len(reset_obs.shape) == 3
    actions = env.action_space.sample()
    assert actions.shape == (2, )
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
Esempio n. 2
0
def main():
    # Simulation path:
    env_location = "./simu_envs/SingleAgentVisualization/scene.x86_64"
    # Loading the unity environment:
    unity_env = UnityEnvironment(env_location)
    # Wrapping with the Gym Wrapper:
    env = UnityToGymWrapper(unity_env, allow_multiple_obs=True)
    
    # We reset the environment and get our initial state:
    state = env.reset()
    while True:
        # We select an action based on a policy and state:
        action = sample_policy(state)
        # We perform an action in the environment, receiving
        # the next state, the reward and a flag indicating
        # if the episode has ended:
        next_state, reward, ended, _ = env.step(action)
        # If the episode ended, we reset the environment,
        # otherwise we continue execution:
        if ended:
            state = env.reset()
        else:
            state = next_state
    def __init__(self, env_config):
        self.worker_index = 0

        if "SM_CHANNEL_TRAIN" in os.environ:
            env_name = os.environ["SM_CHANNEL_TRAIN"] + "/" + env_config["env_name"]
            os.chmod(env_name, 0o755)
            print("Changed environment binary into executable mode.")
            # Try connecting to the Unity3D game instance.
            while True:
                try:
                    unity_env = UnityEnvironment(
                        env_name,
                        no_graphics=True,
                        worker_id=self.worker_index,
                        additional_args=["-logFile", "unity.log"],
                    )
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break
        else:
            env_name = env_config["env_name"]
            while True:
                try:
                    unity_env = default_registry[env_name].make(
                        no_graphics=True,
                        worker_id=self.worker_index,
                        additional_args=["-logFile", "unity.log"],
                    )
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break

        self.env = UnityToGymWrapper(unity_env)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
def test_gym_wrapper_visual(use_uint8):
    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(
        number_visual_observations=1, vector_observation_space_size=0
    )
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1
    )
    setup_mock_unityenvironment(
        mock_env, mock_spec, mock_decision_step, mock_terminal_step
    )

    env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8)
    assert isinstance(env.observation_space, spaces.Box)
    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert env.observation_space.contains(obs)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
    assert isinstance(info, dict)
Esempio n. 5
0
 def __init__(
     self,
     env_path: Optional[str] = None,
     imprint_video: Optional[str] = None,
     test_video: Optional[str] = None,
     log_dir: Optional[str] = None,
     input_resolution: int = 64,
     episode_steps: int = 1000,
     seed: int = 0,
     test_mode: bool = False,
     base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
     time_scale: int = 20,
     capture_frame_rate: int = 60,
     width: int = 80,
     height: int = 80,
     use_visual: bool = True,
     **kwargs,
 ):
     engine_config = EngineConfig(
         width=width,
         height=height,
         quality_level=5,
         time_scale=time_scale,
         target_frame_rate=-1,
         capture_frame_rate=capture_frame_rate,
     )
     env_args = _build_chickAI_env_args(input_resolution=input_resolution,
                                        episode_steps=episode_steps,
                                        imprint_video=imprint_video,
                                        test_video=test_video,
                                        log_dir=log_dir,
                                        test_mode=test_mode)
     agent_info_channel = FloatPropertiesChannel()
     unity_env = _make_unity_env(env_path=env_path,
                                 port=base_port,
                                 seed=seed,
                                 env_args=env_args,
                                 engine_config=engine_config,
                                 side_channels=[agent_info_channel])
     env = UnityToGymWrapper(unity_env,
                             flatten_branched=True,
                             use_visual=use_visual)
     super().__init__(env)
     self.env = env
     self.agent_info_channel = agent_info_channel
Esempio n. 6
0
def start_unity_baselines():
    # Set to FALSE for CIP-Pool execution
    # env = make_unity_env('./envs/worm_dynamic_one_agent/linux/worm_dynamic', 1, False)
    # InitialTrainingExample.start_training(env)
    # env.close()

    unity_env = UnityEnvironment(
        './envs/worm_dynamic_one_agent/linux/worm_dynamic', no_graphics=True)
    env = UnityToGymWrapper(unity_env, uint8_visual=False)
    env = Monitor(env, 'results/')
    # The noise objects for TD3
    n_actions = env.action_space.shape[-1]
    action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                     sigma=0.1 * np.ones(n_actions))

    model = TD3_Baselines(MlpPolicy, env, action_noise=action_noise, verbose=1)
    model.learn(total_timesteps=int(2e6), log_interval=10)
    model.save("td3_worm")
Esempio n. 7
0
def main(params):
    config = vars(parser.parse_args())

    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=None, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=20.0)

    env = UnityToGymWrapper(unity_env)

    agent = DDQN(env, cfg['agent'])
    tag = 'DDQN'

    # Initiate the tracker for stats
    tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'],
                      ['Epoch', 'Ep_Reward'])

    # Train the agent
    agent.train(tracker,
                n_episodes=config['epochs'],
                verbose=config['verbose'],
                params=cfg['agent'],
                hyperp=config)
Esempio n. 8
0
def unity_env_fn(agent_file, time_scale, no_graphics, worker_id):
    """Wrapper function for making unity environment with custom
    speed and graphics options.

    Args:
        agent_file (str): path to the environment binary
        time_scale (float): speed at which to run the simulation
        no_graphics (bool): whether or not to show the simulation

    Returns:
        Gym environment.
    """
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(
        file_name=agent_file,
        no_graphics=no_graphics,
        side_channels=[channel],
        worker_id=worker_id,
    )
    channel.set_configuration_parameters(time_scale=time_scale, )
    env = UnityToGymWrapper(unity_env)
    return env
Esempio n. 9
0
def make_unity_env(config):
    # setup environment
    if sys.platform == "win32":
        env_build = "../env/FreeFallVer2/windows/FreeFall.exe"
    elif sys.platform == "linux":
        env_build = "../env/FreeFallVer2/linux/FreeFall.x86_64"
    elif sys.platform == "darwin":
        env_build = "../env/FreeFallVer2/mac.app"
    else:
        raise AttributeError("{} platform is not supported.".format(
            sys.platform))
    channel = EnvironmentParametersChannel()
    unity_env = UnityEnvironment(env_build,
                                 side_channels=[channel],
                                 additional_args=["-batchmode"])
    env = UnityToGymWrapper(unity_env,
                            uint8_visual=True,
                            allow_multiple_obs=True)
    env = DistanceWrapper(env)
    env = MatplotlibWrapper(env)
    assign_config(channel, config)

    return env
Esempio n. 10
0
def main():
    unity_env = UnityEnvironment("./envs/earlyRender")
    env = UnityToGymWrapper(unity_env, 0, uint8_visual=True)
    #logger.configure('./logs') # Çhange to log in a different directory
    act = deepq.learn(
        env,
        "cnn",  # conv_only is also a good choice for GridWorld
        lr=2.5e-4,
        total_timesteps=1000000,
        buffer_size=50000,
        exploration_fraction=0.05,
        exploration_final_eps=0.1,
        print_freq=20,
        train_freq=5,
        learning_starts=20000,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        checkpoint_freq=1000,
        checkpoint_path=
        './logs',  # Change to save model in a different directory
        dueling=True)
    print("Saving model to unity_model.pkl")
    act.save("unity_model.pkl")
Esempio n. 11
0
def main():
    agent_file = "3DBall_single/3DBall_single.x86_64"
    no_graphics = True
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=agent_file,
                                 seed=1,
                                 no_graphics=no_graphics,
                                 side_channels=[channel])
    channel.set_configuration_parameters(time_scale=50., )
    env = UnityToGymWrapper(unity_env)
    l1, l2 = 64, 64
    activation = nn.ReLU
    output_activation = nn.Tanh
    ac = TD3ActorCritic(env.observation_space,
                        env.action_space,
                        l1,
                        l2,
                        activation=activation)

    params = dict(
        gamma=0.99,
        polyak=0.995,
        act_noise=0.1,
        target_noise=0.2,
        epochs=100,
        steps_per_epoch=4000,
        start_steps=10000,
        batch_size=256,
        update_after=10000,
        update_every=50,
        policy_delay=2,
        lr=1e-3,
    )

    model = TD3(ac=ac, env=env, **params)
    model.train()
Esempio n. 12
0
from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel


ENV_ID = "../../Robotic-RL-Env/Build/Robotic-RL-Env"

channel = EngineConfigurationChannel()
unity_env = UnityEnvironment(ENV_ID, seed=1, side_channels=[channel])
channel.set_configuration_parameters(time_scale=1.0)
env = UnityToGymWrapper(unity_env, allow_multiple_obs=True)

env.reset()
print(f"Name of the behavior : {env.name}")

# Ausgabe der Anzahl an Observations
print("Number of observations : ", env.observation_space)

# How many actions are possible ?
print(f"There are {env.action_size} action(s)")
Esempio n. 13
0
print("Params:", params)
torch.manual_seed(seed)
np.random.seed(seed)

game_path = os.path.expanduser(
    "/data4/pdp/grantsrb/loc_games/LocationGame2dLinux_9/LocationGame2dLinux.x86_64"
)
channel = EngineConfigurationChannel()
env_channel = EnvironmentParametersChannel()
env = UnityEnvironment(file_name=game_path,
                       side_channels=[channel, env_channel],
                       seed=seed)
channel.set_configuration_parameters(time_scale=1)
for k, v in params.items():
    env_channel.set_float_parameter(k, v)
env = UnityToGymWrapper(env, allow_multiple_obs=True)
print("Environment created!")

#matplotlib.use("tkagg")
obs = env.reset()
#plt.imshow(obs[0])
#plt.show()
done = False
while True:
    print("stepping")
    x, z = [float(y.strip()) for y in str(input("action: ")).split(",")]
    # The obs is a list of length 2 in which the first element is the image and the second is the goal coordinate
    # Reward in this case is the difference between the action location and the nearest object to the action location
    obs, rew, done, _ = env.step([x, z])
    plt.imsave("sample.png", obs[0])
    print("targ:", obs[1])
Esempio n. 14
0
            assign_config(_channel, v, k)
        else:
            _channel.set_float_parameter(k, v)

# setup environment
if sys.platform == "win32":
    env_build = "../env/FreeFall/windows/FreeFall.exe"
elif sys.platform == "linux":
    env_build = "../env/FreeFall/linux/FreeFall.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FreeFall/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True)
assign_config(channel, config)

# interface
key_ws = np.array([False] * 2)
def key_press(event): # NOTE: cannot handle multiple key press at the same time
    global key_ws
    try:
        key = event.key.lower()
    except:
        key = event.key
    key_ws[0] = True if key in ['w', 'up'] else False
    key_ws[1] = True if key in ['s', 'down'] else False
    if key == 'q':
        env.close()
        sys.exit()
Esempio n. 15
0
 def __init__(self, **config):
     self.config = config
     self.env = UnityToGymWrapper(
         UnityEnvironment(),
         allow_multiple_obs=True, # not exactly sure what this does,
     )
Esempio n. 16
0
def objective(trial):
    # Domain setup
    # windows_path = "../crawler_single/UnityEnvironment"
    # build_path = windows_path
    linux_path = "../crawler_single/linux/dynamic_server/crawler_dynamic.x86_64"
    build_path = linux_path
    unity_env = UnityEnvironment(file_name=build_path,
                                 seed=1,
                                 side_channels=[],
                                 no_graphics=False)
    env = UnityToGymWrapper(unity_env=unity_env)

    training_episodes = 10000

    params = {}

    params["nr_output_features"] = env.action_space.shape[0]
    params["nr_input_features"] = env.observation_space.shape[0]
    params["env"] = env

    params["lr"] = 3e-4
    params["clip"] = 0.2
    params["hidden_units"] = 512
    params["update_episodes"] = 10
    params["minibatch_size"] = 32
    params["tau"] = 0.95
    params["std"] = 0.35

    params["update_episodes"] = trial.suggest_int(name='update_episodes',
                                                  low=5,
                                                  high=30,
                                                  step=5)
    params["ppo_epochs"] = trial.suggest_int(name='ppo_epochs',
                                             low=2,
                                             high=10,
                                             step=2)
    params["gamma"] = trial.suggest_float(name='gamma',
                                          low=0.98,
                                          high=0.99,
                                          log=True)
    params["beta"] = trial.suggest_float(name='beta',
                                         low=0.08,
                                         high=0.12,
                                         log=True)

    print(params)

    time_str = time.strftime("%y%m%d_%H")
    t = "{}_{}".format(worker_id, time_str)
    print(t)
    writer = SummaryWriter(log_dir='runs/alex/{}'.format(time_str),
                           filename_suffix=t)
    agent = a.PPOLearner(params, writer)

    returns = [
        episode(env, agent, params, writer, i)
        for i in range(training_episodes)
    ]

    torch.save(
        agent.ppo_net, "../Net_Crawler/Alex/PPONet_crawler{}_{}.pt".format(
            worker_id, time_str))
    mean_reward, std_reward = evaluate_model(agent.ppo_net,
                                             env,
                                             n_eval_episodes=10)
    print("{}, {}".format(mean_reward, std_reward))

    writer.close()
    env.close()
    return mean_reward
Esempio n. 17
0
import numpy as np
import matplotlib.pyplot as plt
import torch

from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper
import numpy as np

from agent import Agent
from ddpg_learning import ddpg

# Initialize the Environment
unity_env = UnityEnvironment(file_name="Visual3DBall\\UnityEnvironment.app")
env = UnityToGymWrapper(unity_env)

# Get the action size
action_size = 2

# Get the state size
state_shape = (84, 84, 12)

# Get number of agents
num_agents = 1

#Initialize the Agent with given hyperparameters

BUFFER_SIZE = int(3e4)  # replay buffer size
BATCH_SIZE = 128  # batch size
GAMMA = 0.99  # discount factor
TAU = 1e-2  # for soft update of target parameters
LR_ACTOR = 5e-4  # learning rate of the actor
Esempio n. 18
0
    return state[idx]

state = torch.load('D:/RL_project/FInal Project/RLCar/Path_folder/46305_0.172707200050354.pth')
def get_action(state):
    if len(state) == 34:
        state = get_il_state(state)
    with torch.no_grad():
        state = torch.Tensor(state).view(1,-1).to(device)
        print("state.shape=",state.shape)
        action = model_req(state)
    return action.cpu().numpy()

def il_eval():
    state = env.reset()
    score = 0
    max_t = 10000
    for t in range(max_t):
        action = get_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = get_il_state(next_state)
        state = next_state
        score += reward
        if done:
            break 
        
#env = UnityToGymWrapper(UnityEnvironment(base_port=5004), 0)
env = UnityToGymWrapper(UnityEnvironment('D:/RL_project/FInal Project/RLCar/Build/RLCar.exe'), 0)

il_eval()

env.close()
Esempio n. 19
0
class ActorUnity(Actor, RoadworkActorInterface):
    def __init__(self, ctx, actor_id):
        super(ActorUnity, self).__init__(ctx, actor_id)
        self.env = None # Placeholder
        self.actor_id = actor_id

    async def sim_call_method(self, data) -> object:
        method = data['method']
        args = data['args'] # Array of arguments - [] 
        kwargs = data['kwargs'] # Dict 

        return getattr(self.env, method)(*args, **kwargs)

    async def sim_get_state(self, data) -> object:
        key = data['key']
        has_value, val = await self._state_manager.try_get_state(key)
        return val

    async def sim_set_state(self, data) -> None:
        key = data['key']
        value = data['value']

        print(f'Setting Sim State for key {key}', flush=True)
        await self._state_manager.set_state(key, value)
        await self._state_manager.save_state()

    async def _on_activate(self) -> None:
        """An callback which will be called whenever actor is activated."""
        print(f'Activate {self.__class__.__name__} actor!', flush=True)

    async def _on_deactivate(self) -> None:
        """An callback which will be called whenever actor is deactivated."""
        print(f'Deactivate {self.__class__.__name__} actor!', flush=True)

    # see behavior_spec: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Python-API.md#interacting-with-a-unity-environment
    # behavior_spec.action_type and behavior_spec.action_shape is what we need here
    async def sim_action_space(self) -> object:
        behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape
        behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO)
        behavior_spec = self.env.behavior_specs[behavior_names[behavior_idx]]

        print(f"Action Type: {behavior_spec.action_type}", flush=True)
        print(f"Action Shape: {behavior_spec.action_shape}", flush=True)

        # We can use /src/Lib/python/roadwork/roadwork/json/unserializer.py as an example
        # Currently only ActionType.DISCRETE implemented, all ActionTypes can be found here: https://github.com/Unity-Technologies/ml-agents/blob/3901bad5b0b4e094e119af2f9d0d1304ad3f97ae/ml-agents-envs/mlagents_envs/base_env.py#L247
        # Note: Unity supports DISCRETE or CONTINUOUS action spaces @TODO: implement continuous in a specific env (which one??)
        if behavior_spec.is_action_discrete() == True:
            self.env.action_space = spaces.Discrete(behavior_spec.action_shape[0])

        print(f"Converted Action Space: {self.env.action_space}", flush=True)

        res = Serializer.serializeMeta(self.env.action_space)

        return res
        
    # see behavior_spec: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Python-API.md#interacting-with-a-unity-environment
    # behavior_spec.observation_shapes is what we need, this is an array of tuples [ (), (), (), ... ] which represents variables? (@TODO: Confirm) (e.g. https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md#basic)
    # @TODO: This sounds as a MultiDiscrete environment (https://github.com/openai/gym/blob/master/gym/spaces/multi_discrete.py) so we map to this currently
    async def sim_observation_space(self) -> object:
        behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape
        behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO)
        behavior_spec = self.env.behavior_specs[behavior_names[behavior_idx]]

        print(f"Observation Shapes: {behavior_spec.observation_shapes}", flush=True)
        observation_space_n_vec = []

        for i in range(0, len(behavior_spec.observation_shapes)):
          observation_space_n_vec.append(behavior_spec.observation_shapes[i][0]) # Get el 0 from the tuple, containing the size

        print(f"Converted Observation Space: {observation_space_n_vec}", flush=True)

        self.env.observation_space = spaces.MultiDiscrete(observation_space_n_vec)
        res = Serializer.serializeMeta(self.env.observation_space)

        return res

    async def sim_create(self, data) -> None:
        """An actor method to create a sim environment."""
        env_id = data['env_id']
        # seed = data['seed']

        print(f'Creating sim with value {env_id}', flush=True)
        print(f"Current dir: {os.getcwd()}", flush=True)
        try:
            print("[Server] Creating Unity Environment", flush=True)
            self.env = UnityEnvironment(f"{os.getcwd()}/src/Server/Unity/envs/{env_id}/{env_id}")

            print("[Server] Resetting environment already", flush=True)
            self.env.reset() # we need to reset first in Unity

            # self.unity_env = UnityEnvironment("./environments/GridWorld")
            # self.env = gym.make(env_id)

            # if seed:
            #     self.env.seed(seed)
        except gym.error.Error as e:
            print(e)
            raise Exception("Attempted to look up malformed environment ID '{}'".format(env_id))
        except Exception as e:
            print(e)
            raise Exception(e)
        except:
            print(sys.exc_info())
            traceback.print_tb(sys.exc_info()[2])
            raise

    async def sim_reset(self) -> object:
        observation = self.env.reset()

        # observation is a ndarray, we need to serialize this
        # therefore, change it to list type which is serializable
        if isinstance(observation, np.ndarray):
            observation = observation.tolist()

        return observation

    async def sim_render(self) -> None:
        self.env.render()

    async def sim_monitor_start(self, data) -> None:
        episodeInterval = 10 # Create a recording every X episodes

        if data['episode_interval']:
            episodeInterval = int(data['episode_interval'])

        v_c = lambda count: count % episodeInterval == 0 # Create every X episodes
        #self.env = gym.wrappers.Monitor(self.env, f'./output/{self.actor_id}', resume=False, force=True, video_callable=v_c)
        #self.env = UnityToGymWrapper(self.unity_environment)

        #defaults to BaseEnv
        self.env = UnityToGymWrapper()

    async def sim_monitor_stop(self) -> None:
        self.env.close()

    async def sim_action_sample(self) -> object:
        action = self.env.action_space.sample()
        return action

    async def sim_step(self, data) -> object:
        action = data['action']

        # Unity requires us to set the action with env.set_actions(behavior_name, action) where action is an array
        behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape
        behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO)
        behavior_name = behavior_names[behavior_idx]
        self.env.set_actions(behavior_name, np.array([ [ action ] ])) # first dimension = number of agents, second dimension = action?
        self.env.step() # step does not return in Unity

        # Get the DecisionSteps and TerminalSteps
        # -> they both contain: 
        # DecisionSteps: Which agents need an action this step? (Note: contains action masks!)
        # E.g.: DecisionStep(obs=[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)], reward=-0.01, agent_id=0, action_mask=[array([False, False, False])])
        # TerminalSteps: Which agents their episode ended?
        decision_steps, terminal_steps = self.env.get_steps(behavior_names[behavior_idx])


        # print(decision_steps, flush=True)
        # print(terminal_steps, flush=True)
        # print(decision_steps[0], flush=True)
        # print(terminal_steps[0], flush=True)

        # We support 1 decision step currently, get its observation
        # TODO
        decision_step_idx = 0
        decision_step = decision_steps[decision_step_idx]
        obs, reward, agent_id, action_mask = decision_step

        observation = obs[decision_step_idx]
        reward = float(reward)
        isDone = False
        info = {}

        # @TODO: terminal_steps should be implemented, it requires a reset

        # observation is a ndarray, we need to serialize this
        # therefore, change it to list type which is serializable
        if isinstance(observation, np.ndarray):
            observation = observation.tolist()

        return observation, reward, isDone, info
Esempio n. 20
0
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    engine_configuration_channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(side_channels=[engine_configuration_channel],
                                 file_name=args.env)
    engine_configuration_channel.set_configuration_parameters(
        width=200,
        height=200,
        quality_level=5,
        time_scale=1 if args.show else 20,
        target_frame_rate=-1,
        capture_frame_rate=60)

    env = UnityToGymWrapper(unity_env=unity_env)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    train_tools.EVAL_SEED = args.seed

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    act_bound = env.action_space.high[0]

    # create nets
    actor_net = DDPGMLPActor(obs_dim=obs_dim,
                             act_dim=act_dim,
                             act_bound=act_bound,
                             hidden_size=[400, 300],
                             hidden_activation=nn.ReLU)
Esempio n. 21
0
from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.environment import UnityEnvironment as UE

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('executable', type=str, help='path to exec')
    parser.add_argument('--num_timesteps',
                        type=int,
                        default=100,
                        help='set number of training episodes')
    args = parser.parse_args()

    env_name = args.executable
    env = UE(file_name=env_name, seed=1, side_channels=[])
    env = UnityToGymWrapper(env)

    # Create log dir
    time_int = int(time.time())
    log_dir = "stable_results/basic_env_{}/".format(time_int)
    os.makedirs(log_dir, exist_ok=True)

    model = PPO('MlpPolicy', env, verbose=1)
    #model.learn(total_timesteps=args.num_timesteps)

    obs = env.reset()
    for i in range(50):
        print(f'\nEPISODE:{i}\n')
        obs = env.reset()
        total, step, done = 0, 0, False
        while not done:
Esempio n. 22
0

# setup environment
if sys.platform == 'win32':
    env_build = "../env/FlyCamera/windows/FlyCamera.exe"
elif sys.platform == 'linux':
    env_build = "../env/FlyCamera/linux/FlyCamera.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FlyCamera/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
channel.set_float_parameter("key_speed", 10.0)
channel.set_float_parameter("cam_sens", 0.25)
env = UnityToGymWrapper(unity_env, uint8_visual=True)

# interface
max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process
mouse_position = np.zeros((2,))
def mouse_move(event):
    global mouse_position
    x, y = event.xdata, event.ydata
    mouse_position = np.array([x, y])

key_wasd = np.array([False] * 4)
def key_press(event): # NOTE: cannot handle multiple key press at the same time
    global key_wasd
    try:
        key = event.key.lower()
    except:
Esempio n. 23
0
def train(path):
    # env = gym.make("LunarLander-v2")
    # env = wrappers.Monitor(env, "tmp/lunar-lander", video_callable=lambda episode_id: True, force=True)
    unityenv = UnityEnvironment(path)
    env = UnityToGymWrapper(unity_env=unityenv, flatten_branched=True)
    ddqnAgent = DDQNAgent(alpha=0.0001,
                          gamma=0.99,
                          nActions=7,
                          epsilon=1.0,
                          batchSize=512,
                          inputShape=210)
    nEpisodes = 1000
    ddqnScores = []
    ddqnAverageScores = []
    epsilonHistory = []
    stepsPerEpisode = []
    for episode in range(nEpisodes):
        StartTime = time.time()
        done = False
        score = 0
        steps = 0
        observation = env.reset()
        while not done:
            action = ddqnAgent.chooseAction(observation)
            observationNew, reward, done, info = env.step(action)
            score += reward
            ddqnAgent.remember(state=observation,
                               stateNew=observationNew,
                               action=action,
                               reward=reward,
                               done=done)
            observation = observationNew
            ddqnAgent.learn()
            steps += 1
        epsilonHistory.append(ddqnAgent.epsilon)
        ddqnScores.append(score)
        averageScore = np.mean(ddqnScores)
        ddqnAverageScores.append(averageScore)
        stepsPerEpisode.append(steps)
        ElapsedTime = time.time() - StartTime
        ElapsedTime = ElapsedTime / 60
        print("Episode:", episode, "Score: %.2f" % score,
              "Average Score: %.2f" % averageScore, "Run Time:", ElapsedTime,
              "Minutes", "Epsilon:", ddqnAgent.epsilon, "Steps:", steps)
        if episode > 1 and episode % 9 == 0:
            ddqnAgent.saveModel()
    env.close()
    x = [i for i in range(nEpisodes)]
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10, 10))
    fig.suptitle("DDQN Hallway")
    ax1.plot(x, ddqnScores, "C1")
    ax1.set_title('Episodes vs Scores')
    ax1.set(xlabel='Episodes', ylabel='Scores')
    ax2.plot(x, ddqnAverageScores, "C2")
    ax2.set_title('Episodes vs Average Scores')
    ax2.set(xlabel='Episodes', ylabel='Average Scores')
    ax3.plot(x, epsilonHistory, "C3")
    ax3.set_title('Episodes vs Epsilon Decay')
    ax3.set(xlabel='Episodes', ylabel='Epsilon Decay')
    ax4.plot(x, stepsPerEpisode, "C4")
    ax4.set_title('Episodes vs Steps Per Epsisode')
    ax4.set(xlabel='Episodes', ylabel='Steps')
    plt.savefig('Hallway.png')
Esempio n. 24
0
def test_closing(env_name):
    """
    Run the gym test and closes the environment multiple times
    :param env_name: Name of the Unity environment binary to launch
    """

    try:
        env1 = UnityToGymWrapper(
            UnityEnvironment(env_name, worker_id=1, no_graphics=True))
        env1.close()
        env1 = UnityToGymWrapper(
            UnityEnvironment(env_name, worker_id=1, no_graphics=True))
        env2 = UnityToGymWrapper(
            UnityEnvironment(env_name, worker_id=2, no_graphics=True))
        env2.reset()
    finally:
        env1.close()
        env2.close()
Esempio n. 25
0
class FooCarEnv(gym.Env):
	_channel = EnvironmentParametersChannel()

	PathSpace = {
		'xyz': 0,
		'xy': 2,
		'yz': 2,
		'xz': 2
	}

	def __init__(self, no_graphics:bool=False, seed:int=1, **config):
		self._config = config
		worker_id = 0
		if 'worker_id' in config:
			worker_id = config['worker_id']
		self._unity_env = UnityEnvironment(
			file_name=UNITY_ENV_EXE_FILE,
			# file_name=None, # Unity Editor Mode (debug)
			no_graphics=no_graphics,
			seed=seed, 
			side_channels=[self._channel],
			worker_id=worker_id
		)
		for key, value in config.items():
			self._channel.set_float_parameter(key, float(value))
		
		self._gym_env = UnityToGymWrapper(self._unity_env)

	def step(self, action):
		obs, reward, done, info = self._gym_env.step(action)
		size = self.observation_size

		return obs[:size], reward, done, info

	def reset(self):
		obs = self._gym_env.reset()
		size = self.observation_size
		return obs[:size]

	def render(self, mode="rgb_array"):
		return self._gym_env.render(mode=mode)
	
	def seed(self, seed=None):
		self._gym_env.seed(seed=seed) # it will throw a warning

	def close(self):
		self._gym_env.close()

	@property
	def metadata(self):
		return self._gym_env.metadata

	@property
	def reward_range(self) -> Tuple[float, float]:
		return self._gym_env.reward_range

	@property
	def action_space(self):
		return self._gym_env.action_space

	@property
	def observation_space(self):
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		r = config['radius_anchor_circle'] if 'radius_anchor_circle' in config else 8.0
		r_e = config['radius_epsilon_ratio'] if 'radius_epsilon_ratio' in config else 0.7
		h = config['max_anchor_height'] if 'max_anchor_height' in config else 1.0

		xyz_mode = (path_space == space['xyz'])
		bound = max(r * (1 + r_e), h if xyz_mode else 0)
		shape = (self.observation_size,)
		
		return gym.spaces.Box(-bound, +bound, dtype=np.float32, shape=shape)
		
	@property
	def observation_size(self):
		# Reference: readonly variable (Unity)FooCar/CarAgent.ObservationSize
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		ticker_end = config['ticker_end'] if 'ticker_end' in config else 5
		ticker_start = config['ticker_start'] if 'ticker_start' in config else -3

		xyz_mode = (path_space == space['xyz'])
		basic_num = 6
		point_dim = 3 if xyz_mode else 2

		return basic_num + 2 * point_dim * (ticker_end - ticker_start + 1)
Esempio n. 26
0
    ppo_var = 0  # 0 no minibatch, >=1 minibatch

    if len(sys.argv) > 1:
        worker_id = int(sys.argv[1])
    if len(sys.argv) > 2:
        ppo_var = int(sys.argv[2])

    # Domain setup

    # windows_path = "../crawler_build/windows/dynamic/UnityEnvironment"
    # build_path = windows_path
    linux_path = "../crawler_build/linux/dynamic_server/crawler_dynamic.x86_64"
    build_path = linux_path

    unity_env = UnityEnvironment(file_name=build_path, worker_id=worker_id)
    crawler_env = UnityToGymWrapper(unity_env=unity_env)

    training_episodes = 10000

    params = {}

    params["nr_output_features"] = crawler_env.action_space.shape[0]
    params["nr_input_features"] = crawler_env.observation_space.shape[0]
    params["env"] = crawler_env

    params["update_episodes"] = 10
    params["ppo_epochs"] = 4
    params["minibatch_size"] = 32

    params["lr"] = 3e-4
    params["clip"] = 0.2
Esempio n. 27
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-m",
                        "--model",
                        default=MODEL,
                        help="Model file to load")
    parser.add_argument("-e",
                        "--env",
                        default=ENV_ID,
                        help="Environment name to use, default=" + ENV_ID)
    args = parser.parse_args()

    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(ENV_ID, seed=1, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=1.0)
    env = UnityToGymWrapper(unity_env, allow_multiple_obs=True)

    net = model.DDPGActor(env.observation_space.shape[0],
                          env.action_space.shape[0])
    net.load_state_dict(torch.load(args.model))

    obs = env.reset()
    total_reward = 0.0
    total_steps = 0
    while True:
        obs_v = torch.FloatTensor([obs])
        mu_v = net(obs_v)
        action = mu_v.squeeze(dim=0).data.numpy()
        action = np.clip(action, -1, 1)
        obs, reward, done, _ = env.step(action)
        total_reward += reward
import time

MIN_THROTTLE = 0.45
MAX_THROTTLE = 0.6
MAX_STEERING_DIFF = 0.15
JERK_REWARD_WEIGHT = 0.0
MAX_STEERING = 1
MIN_STEERING = -MAX_STEERING
STEERING_GAIN = 1
STEERING_BIAS = 0

from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.environment import UnityEnvironment

unity_gym_env = UnityToGymWrapper(
    UnityEnvironment(),
    allow_multiple_obs=True,  # not exactly sure what this does,
)


class Envrionment(object):
    def __init__(
        self,
        vae=None,
        min_throttle=0.45,
        max_throttle=0.6,
        n_command_history=0,
        frame_skip=1,
        n_stack=1,
        action_lambda=0.5,
    ):
        # copy args