def test_gym_wrapper_single_visual_and_vector(use_uint8): mock_env = mock.MagicMock() mock_spec = create_mock_group_spec( number_visual_observations=1, vector_observation_space_size=3, vector_action_space_size=[2], ) mock_decision_step, mock_terminal_step = create_mock_vector_steps( mock_spec, number_visual_observations=1) setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step, mock_terminal_step) env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=True) assert isinstance(env, UnityToGymWrapper) assert isinstance(env.observation_space, spaces.Tuple) assert len(env.observation_space) == 2 reset_obs = env.reset() assert isinstance(reset_obs, list) assert len(reset_obs) == 2 assert all(isinstance(ob, np.ndarray) for ob in reset_obs) assert reset_obs[-1].shape == (3, ) assert len(reset_obs[0].shape) == 3 actions = env.action_space.sample() assert actions.shape == (2, ) obs, rew, done, info = env.step(actions) assert isinstance(obs, list) assert len(obs) == 2 assert all(isinstance(ob, np.ndarray) for ob in obs) assert reset_obs[-1].shape == (3, ) assert isinstance(rew, float) assert isinstance(done, (bool, np.bool_)) assert isinstance(info, dict) # check behaviour for allow_multiple_obs = False env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=False) assert isinstance(env, UnityToGymWrapper) assert isinstance(env.observation_space, spaces.Box) reset_obs = env.reset() assert isinstance(reset_obs, np.ndarray) assert len(reset_obs.shape) == 3 actions = env.action_space.sample() assert actions.shape == (2, ) obs, rew, done, info = env.step(actions) assert isinstance(obs, np.ndarray)
def main(): # Simulation path: env_location = "./simu_envs/SingleAgentVisualization/scene.x86_64" # Loading the unity environment: unity_env = UnityEnvironment(env_location) # Wrapping with the Gym Wrapper: env = UnityToGymWrapper(unity_env, allow_multiple_obs=True) # We reset the environment and get our initial state: state = env.reset() while True: # We select an action based on a policy and state: action = sample_policy(state) # We perform an action in the environment, receiving # the next state, the reward and a flag indicating # if the episode has ended: next_state, reward, ended, _ = env.step(action) # If the episode ended, we reset the environment, # otherwise we continue execution: if ended: state = env.reset() else: state = next_state
def __init__(self, env_config): self.worker_index = 0 if "SM_CHANNEL_TRAIN" in os.environ: env_name = os.environ["SM_CHANNEL_TRAIN"] + "/" + env_config["env_name"] os.chmod(env_name, 0o755) print("Changed environment binary into executable mode.") # Try connecting to the Unity3D game instance. while True: try: unity_env = UnityEnvironment( env_name, no_graphics=True, worker_id=self.worker_index, additional_args=["-logFile", "unity.log"], ) except UnityWorkerInUseException: self.worker_index += 1 else: break else: env_name = env_config["env_name"] while True: try: unity_env = default_registry[env_name].make( no_graphics=True, worker_id=self.worker_index, additional_args=["-logFile", "unity.log"], ) except UnityWorkerInUseException: self.worker_index += 1 else: break self.env = UnityToGymWrapper(unity_env) self.action_space = self.env.action_space self.observation_space = self.env.observation_space
def test_gym_wrapper_visual(use_uint8): mock_env = mock.MagicMock() mock_spec = create_mock_group_spec( number_visual_observations=1, vector_observation_space_size=0 ) mock_decision_step, mock_terminal_step = create_mock_vector_steps( mock_spec, number_visual_observations=1 ) setup_mock_unityenvironment( mock_env, mock_spec, mock_decision_step, mock_terminal_step ) env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8) assert isinstance(env.observation_space, spaces.Box) assert isinstance(env, UnityToGymWrapper) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert env.observation_space.contains(obs) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, (bool, np.bool_)) assert isinstance(info, dict)
def __init__( self, env_path: Optional[str] = None, imprint_video: Optional[str] = None, test_video: Optional[str] = None, log_dir: Optional[str] = None, input_resolution: int = 64, episode_steps: int = 1000, seed: int = 0, test_mode: bool = False, base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, time_scale: int = 20, capture_frame_rate: int = 60, width: int = 80, height: int = 80, use_visual: bool = True, **kwargs, ): engine_config = EngineConfig( width=width, height=height, quality_level=5, time_scale=time_scale, target_frame_rate=-1, capture_frame_rate=capture_frame_rate, ) env_args = _build_chickAI_env_args(input_resolution=input_resolution, episode_steps=episode_steps, imprint_video=imprint_video, test_video=test_video, log_dir=log_dir, test_mode=test_mode) agent_info_channel = FloatPropertiesChannel() unity_env = _make_unity_env(env_path=env_path, port=base_port, seed=seed, env_args=env_args, engine_config=engine_config, side_channels=[agent_info_channel]) env = UnityToGymWrapper(unity_env, flatten_branched=True, use_visual=use_visual) super().__init__(env) self.env = env self.agent_info_channel = agent_info_channel
def start_unity_baselines(): # Set to FALSE for CIP-Pool execution # env = make_unity_env('./envs/worm_dynamic_one_agent/linux/worm_dynamic', 1, False) # InitialTrainingExample.start_training(env) # env.close() unity_env = UnityEnvironment( './envs/worm_dynamic_one_agent/linux/worm_dynamic', no_graphics=True) env = UnityToGymWrapper(unity_env, uint8_visual=False) env = Monitor(env, 'results/') # The noise objects for TD3 n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) model = TD3_Baselines(MlpPolicy, env, action_noise=action_noise, verbose=1) model.learn(total_timesteps=int(2e6), log_interval=10) model.save("td3_worm")
def main(params): config = vars(parser.parse_args()) channel = EngineConfigurationChannel() unity_env = UnityEnvironment(file_name=None, side_channels=[channel]) channel.set_configuration_parameters(time_scale=20.0) env = UnityToGymWrapper(unity_env) agent = DDQN(env, cfg['agent']) tag = 'DDQN' # Initiate the tracker for stats tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'], ['Epoch', 'Ep_Reward']) # Train the agent agent.train(tracker, n_episodes=config['epochs'], verbose=config['verbose'], params=cfg['agent'], hyperp=config)
def unity_env_fn(agent_file, time_scale, no_graphics, worker_id): """Wrapper function for making unity environment with custom speed and graphics options. Args: agent_file (str): path to the environment binary time_scale (float): speed at which to run the simulation no_graphics (bool): whether or not to show the simulation Returns: Gym environment. """ channel = EngineConfigurationChannel() unity_env = UnityEnvironment( file_name=agent_file, no_graphics=no_graphics, side_channels=[channel], worker_id=worker_id, ) channel.set_configuration_parameters(time_scale=time_scale, ) env = UnityToGymWrapper(unity_env) return env
def make_unity_env(config): # setup environment if sys.platform == "win32": env_build = "../env/FreeFallVer2/windows/FreeFall.exe" elif sys.platform == "linux": env_build = "../env/FreeFallVer2/linux/FreeFall.x86_64" elif sys.platform == "darwin": env_build = "../env/FreeFallVer2/mac.app" else: raise AttributeError("{} platform is not supported.".format( sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel], additional_args=["-batchmode"]) env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True) env = DistanceWrapper(env) env = MatplotlibWrapper(env) assign_config(channel, config) return env
def main(): unity_env = UnityEnvironment("./envs/earlyRender") env = UnityToGymWrapper(unity_env, 0, uint8_visual=True) #logger.configure('./logs') # Çhange to log in a different directory act = deepq.learn( env, "cnn", # conv_only is also a good choice for GridWorld lr=2.5e-4, total_timesteps=1000000, buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.1, print_freq=20, train_freq=5, learning_starts=20000, target_network_update_freq=50, gamma=0.99, prioritized_replay=False, checkpoint_freq=1000, checkpoint_path= './logs', # Change to save model in a different directory dueling=True) print("Saving model to unity_model.pkl") act.save("unity_model.pkl")
def main(): agent_file = "3DBall_single/3DBall_single.x86_64" no_graphics = True channel = EngineConfigurationChannel() unity_env = UnityEnvironment(file_name=agent_file, seed=1, no_graphics=no_graphics, side_channels=[channel]) channel.set_configuration_parameters(time_scale=50., ) env = UnityToGymWrapper(unity_env) l1, l2 = 64, 64 activation = nn.ReLU output_activation = nn.Tanh ac = TD3ActorCritic(env.observation_space, env.action_space, l1, l2, activation=activation) params = dict( gamma=0.99, polyak=0.995, act_noise=0.1, target_noise=0.2, epochs=100, steps_per_epoch=4000, start_steps=10000, batch_size=256, update_after=10000, update_every=50, policy_delay=2, lr=1e-3, ) model = TD3(ac=ac, env=env, **params) model.train()
from gym_unity.envs import UnityToGymWrapper from mlagents_envs.environment import UnityEnvironment from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel ENV_ID = "../../Robotic-RL-Env/Build/Robotic-RL-Env" channel = EngineConfigurationChannel() unity_env = UnityEnvironment(ENV_ID, seed=1, side_channels=[channel]) channel.set_configuration_parameters(time_scale=1.0) env = UnityToGymWrapper(unity_env, allow_multiple_obs=True) env.reset() print(f"Name of the behavior : {env.name}") # Ausgabe der Anzahl an Observations print("Number of observations : ", env.observation_space) # How many actions are possible ? print(f"There are {env.action_size} action(s)")
print("Params:", params) torch.manual_seed(seed) np.random.seed(seed) game_path = os.path.expanduser( "/data4/pdp/grantsrb/loc_games/LocationGame2dLinux_9/LocationGame2dLinux.x86_64" ) channel = EngineConfigurationChannel() env_channel = EnvironmentParametersChannel() env = UnityEnvironment(file_name=game_path, side_channels=[channel, env_channel], seed=seed) channel.set_configuration_parameters(time_scale=1) for k, v in params.items(): env_channel.set_float_parameter(k, v) env = UnityToGymWrapper(env, allow_multiple_obs=True) print("Environment created!") #matplotlib.use("tkagg") obs = env.reset() #plt.imshow(obs[0]) #plt.show() done = False while True: print("stepping") x, z = [float(y.strip()) for y in str(input("action: ")).split(",")] # The obs is a list of length 2 in which the first element is the image and the second is the goal coordinate # Reward in this case is the difference between the action location and the nearest object to the action location obs, rew, done, _ = env.step([x, z]) plt.imsave("sample.png", obs[0]) print("targ:", obs[1])
assign_config(_channel, v, k) else: _channel.set_float_parameter(k, v) # setup environment if sys.platform == "win32": env_build = "../env/FreeFall/windows/FreeFall.exe" elif sys.platform == "linux": env_build = "../env/FreeFall/linux/FreeFall.x86_64" elif sys.platform == "darwin": env_build = "../env/FreeFall/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True) assign_config(channel, config) # interface key_ws = np.array([False] * 2) def key_press(event): # NOTE: cannot handle multiple key press at the same time global key_ws try: key = event.key.lower() except: key = event.key key_ws[0] = True if key in ['w', 'up'] else False key_ws[1] = True if key in ['s', 'down'] else False if key == 'q': env.close() sys.exit()
def __init__(self, **config): self.config = config self.env = UnityToGymWrapper( UnityEnvironment(), allow_multiple_obs=True, # not exactly sure what this does, )
def objective(trial): # Domain setup # windows_path = "../crawler_single/UnityEnvironment" # build_path = windows_path linux_path = "../crawler_single/linux/dynamic_server/crawler_dynamic.x86_64" build_path = linux_path unity_env = UnityEnvironment(file_name=build_path, seed=1, side_channels=[], no_graphics=False) env = UnityToGymWrapper(unity_env=unity_env) training_episodes = 10000 params = {} params["nr_output_features"] = env.action_space.shape[0] params["nr_input_features"] = env.observation_space.shape[0] params["env"] = env params["lr"] = 3e-4 params["clip"] = 0.2 params["hidden_units"] = 512 params["update_episodes"] = 10 params["minibatch_size"] = 32 params["tau"] = 0.95 params["std"] = 0.35 params["update_episodes"] = trial.suggest_int(name='update_episodes', low=5, high=30, step=5) params["ppo_epochs"] = trial.suggest_int(name='ppo_epochs', low=2, high=10, step=2) params["gamma"] = trial.suggest_float(name='gamma', low=0.98, high=0.99, log=True) params["beta"] = trial.suggest_float(name='beta', low=0.08, high=0.12, log=True) print(params) time_str = time.strftime("%y%m%d_%H") t = "{}_{}".format(worker_id, time_str) print(t) writer = SummaryWriter(log_dir='runs/alex/{}'.format(time_str), filename_suffix=t) agent = a.PPOLearner(params, writer) returns = [ episode(env, agent, params, writer, i) for i in range(training_episodes) ] torch.save( agent.ppo_net, "../Net_Crawler/Alex/PPONet_crawler{}_{}.pt".format( worker_id, time_str)) mean_reward, std_reward = evaluate_model(agent.ppo_net, env, n_eval_episodes=10) print("{}, {}".format(mean_reward, std_reward)) writer.close() env.close() return mean_reward
import numpy as np import matplotlib.pyplot as plt import torch from mlagents_envs.environment import UnityEnvironment from gym_unity.envs import UnityToGymWrapper import numpy as np from agent import Agent from ddpg_learning import ddpg # Initialize the Environment unity_env = UnityEnvironment(file_name="Visual3DBall\\UnityEnvironment.app") env = UnityToGymWrapper(unity_env) # Get the action size action_size = 2 # Get the state size state_shape = (84, 84, 12) # Get number of agents num_agents = 1 #Initialize the Agent with given hyperparameters BUFFER_SIZE = int(3e4) # replay buffer size BATCH_SIZE = 128 # batch size GAMMA = 0.99 # discount factor TAU = 1e-2 # for soft update of target parameters LR_ACTOR = 5e-4 # learning rate of the actor
return state[idx] state = torch.load('D:/RL_project/FInal Project/RLCar/Path_folder/46305_0.172707200050354.pth') def get_action(state): if len(state) == 34: state = get_il_state(state) with torch.no_grad(): state = torch.Tensor(state).view(1,-1).to(device) print("state.shape=",state.shape) action = model_req(state) return action.cpu().numpy() def il_eval(): state = env.reset() score = 0 max_t = 10000 for t in range(max_t): action = get_action(state) next_state, reward, done, _ = env.step(action) next_state = get_il_state(next_state) state = next_state score += reward if done: break #env = UnityToGymWrapper(UnityEnvironment(base_port=5004), 0) env = UnityToGymWrapper(UnityEnvironment('D:/RL_project/FInal Project/RLCar/Build/RLCar.exe'), 0) il_eval() env.close()
class ActorUnity(Actor, RoadworkActorInterface): def __init__(self, ctx, actor_id): super(ActorUnity, self).__init__(ctx, actor_id) self.env = None # Placeholder self.actor_id = actor_id async def sim_call_method(self, data) -> object: method = data['method'] args = data['args'] # Array of arguments - [] kwargs = data['kwargs'] # Dict return getattr(self.env, method)(*args, **kwargs) async def sim_get_state(self, data) -> object: key = data['key'] has_value, val = await self._state_manager.try_get_state(key) return val async def sim_set_state(self, data) -> None: key = data['key'] value = data['value'] print(f'Setting Sim State for key {key}', flush=True) await self._state_manager.set_state(key, value) await self._state_manager.save_state() async def _on_activate(self) -> None: """An callback which will be called whenever actor is activated.""" print(f'Activate {self.__class__.__name__} actor!', flush=True) async def _on_deactivate(self) -> None: """An callback which will be called whenever actor is deactivated.""" print(f'Deactivate {self.__class__.__name__} actor!', flush=True) # see behavior_spec: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Python-API.md#interacting-with-a-unity-environment # behavior_spec.action_type and behavior_spec.action_shape is what we need here async def sim_action_space(self) -> object: behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO) behavior_spec = self.env.behavior_specs[behavior_names[behavior_idx]] print(f"Action Type: {behavior_spec.action_type}", flush=True) print(f"Action Shape: {behavior_spec.action_shape}", flush=True) # We can use /src/Lib/python/roadwork/roadwork/json/unserializer.py as an example # Currently only ActionType.DISCRETE implemented, all ActionTypes can be found here: https://github.com/Unity-Technologies/ml-agents/blob/3901bad5b0b4e094e119af2f9d0d1304ad3f97ae/ml-agents-envs/mlagents_envs/base_env.py#L247 # Note: Unity supports DISCRETE or CONTINUOUS action spaces @TODO: implement continuous in a specific env (which one??) if behavior_spec.is_action_discrete() == True: self.env.action_space = spaces.Discrete(behavior_spec.action_shape[0]) print(f"Converted Action Space: {self.env.action_space}", flush=True) res = Serializer.serializeMeta(self.env.action_space) return res # see behavior_spec: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Python-API.md#interacting-with-a-unity-environment # behavior_spec.observation_shapes is what we need, this is an array of tuples [ (), (), (), ... ] which represents variables? (@TODO: Confirm) (e.g. https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md#basic) # @TODO: This sounds as a MultiDiscrete environment (https://github.com/openai/gym/blob/master/gym/spaces/multi_discrete.py) so we map to this currently async def sim_observation_space(self) -> object: behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO) behavior_spec = self.env.behavior_specs[behavior_names[behavior_idx]] print(f"Observation Shapes: {behavior_spec.observation_shapes}", flush=True) observation_space_n_vec = [] for i in range(0, len(behavior_spec.observation_shapes)): observation_space_n_vec.append(behavior_spec.observation_shapes[i][0]) # Get el 0 from the tuple, containing the size print(f"Converted Observation Space: {observation_space_n_vec}", flush=True) self.env.observation_space = spaces.MultiDiscrete(observation_space_n_vec) res = Serializer.serializeMeta(self.env.observation_space) return res async def sim_create(self, data) -> None: """An actor method to create a sim environment.""" env_id = data['env_id'] # seed = data['seed'] print(f'Creating sim with value {env_id}', flush=True) print(f"Current dir: {os.getcwd()}", flush=True) try: print("[Server] Creating Unity Environment", flush=True) self.env = UnityEnvironment(f"{os.getcwd()}/src/Server/Unity/envs/{env_id}/{env_id}") print("[Server] Resetting environment already", flush=True) self.env.reset() # we need to reset first in Unity # self.unity_env = UnityEnvironment("./environments/GridWorld") # self.env = gym.make(env_id) # if seed: # self.env.seed(seed) except gym.error.Error as e: print(e) raise Exception("Attempted to look up malformed environment ID '{}'".format(env_id)) except Exception as e: print(e) raise Exception(e) except: print(sys.exc_info()) traceback.print_tb(sys.exc_info()[2]) raise async def sim_reset(self) -> object: observation = self.env.reset() # observation is a ndarray, we need to serialize this # therefore, change it to list type which is serializable if isinstance(observation, np.ndarray): observation = observation.tolist() return observation async def sim_render(self) -> None: self.env.render() async def sim_monitor_start(self, data) -> None: episodeInterval = 10 # Create a recording every X episodes if data['episode_interval']: episodeInterval = int(data['episode_interval']) v_c = lambda count: count % episodeInterval == 0 # Create every X episodes #self.env = gym.wrappers.Monitor(self.env, f'./output/{self.actor_id}', resume=False, force=True, video_callable=v_c) #self.env = UnityToGymWrapper(self.unity_environment) #defaults to BaseEnv self.env = UnityToGymWrapper() async def sim_monitor_stop(self) -> None: self.env.close() async def sim_action_sample(self) -> object: action = self.env.action_space.sample() return action async def sim_step(self, data) -> object: action = data['action'] # Unity requires us to set the action with env.set_actions(behavior_name, action) where action is an array behavior_names = list(self.env.behavior_specs.keys()) # the behavior_names which map to a Behavior Spec with observation_shapes, action_type, action_shape behavior_idx = 0 # we currently support only 1 behavior spec! even though Unity can support multiple (@TODO) behavior_name = behavior_names[behavior_idx] self.env.set_actions(behavior_name, np.array([ [ action ] ])) # first dimension = number of agents, second dimension = action? self.env.step() # step does not return in Unity # Get the DecisionSteps and TerminalSteps # -> they both contain: # DecisionSteps: Which agents need an action this step? (Note: contains action masks!) # E.g.: DecisionStep(obs=[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)], reward=-0.01, agent_id=0, action_mask=[array([False, False, False])]) # TerminalSteps: Which agents their episode ended? decision_steps, terminal_steps = self.env.get_steps(behavior_names[behavior_idx]) # print(decision_steps, flush=True) # print(terminal_steps, flush=True) # print(decision_steps[0], flush=True) # print(terminal_steps[0], flush=True) # We support 1 decision step currently, get its observation # TODO decision_step_idx = 0 decision_step = decision_steps[decision_step_idx] obs, reward, agent_id, action_mask = decision_step observation = obs[decision_step_idx] reward = float(reward) isDone = False info = {} # @TODO: terminal_steps should be implemented, it requires a reset # observation is a ndarray, we need to serialize this # therefore, change it to list type which is serializable if isinstance(observation, np.ndarray): observation = observation.tolist() return observation, reward, isDone, info
torch.manual_seed(args.seed) np.random.seed(args.seed) engine_configuration_channel = EngineConfigurationChannel() unity_env = UnityEnvironment(side_channels=[engine_configuration_channel], file_name=args.env) engine_configuration_channel.set_configuration_parameters( width=200, height=200, quality_level=5, time_scale=1 if args.show else 20, target_frame_rate=-1, capture_frame_rate=60) env = UnityToGymWrapper(unity_env=unity_env) env.seed(args.seed) env.action_space.seed(args.seed) train_tools.EVAL_SEED = args.seed obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] act_bound = env.action_space.high[0] # create nets actor_net = DDPGMLPActor(obs_dim=obs_dim, act_dim=act_dim, act_bound=act_bound, hidden_size=[400, 300], hidden_activation=nn.ReLU)
from gym_unity.envs import UnityToGymWrapper from mlagents_envs.environment import UnityEnvironment as UE if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('executable', type=str, help='path to exec') parser.add_argument('--num_timesteps', type=int, default=100, help='set number of training episodes') args = parser.parse_args() env_name = args.executable env = UE(file_name=env_name, seed=1, side_channels=[]) env = UnityToGymWrapper(env) # Create log dir time_int = int(time.time()) log_dir = "stable_results/basic_env_{}/".format(time_int) os.makedirs(log_dir, exist_ok=True) model = PPO('MlpPolicy', env, verbose=1) #model.learn(total_timesteps=args.num_timesteps) obs = env.reset() for i in range(50): print(f'\nEPISODE:{i}\n') obs = env.reset() total, step, done = 0, 0, False while not done:
# setup environment if sys.platform == 'win32': env_build = "../env/FlyCamera/windows/FlyCamera.exe" elif sys.platform == 'linux': env_build = "../env/FlyCamera/linux/FlyCamera.x86_64" elif sys.platform == "darwin": env_build = "../env/FlyCamera/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) channel.set_float_parameter("key_speed", 10.0) channel.set_float_parameter("cam_sens", 0.25) env = UnityToGymWrapper(unity_env, uint8_visual=True) # interface max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process mouse_position = np.zeros((2,)) def mouse_move(event): global mouse_position x, y = event.xdata, event.ydata mouse_position = np.array([x, y]) key_wasd = np.array([False] * 4) def key_press(event): # NOTE: cannot handle multiple key press at the same time global key_wasd try: key = event.key.lower() except:
def train(path): # env = gym.make("LunarLander-v2") # env = wrappers.Monitor(env, "tmp/lunar-lander", video_callable=lambda episode_id: True, force=True) unityenv = UnityEnvironment(path) env = UnityToGymWrapper(unity_env=unityenv, flatten_branched=True) ddqnAgent = DDQNAgent(alpha=0.0001, gamma=0.99, nActions=7, epsilon=1.0, batchSize=512, inputShape=210) nEpisodes = 1000 ddqnScores = [] ddqnAverageScores = [] epsilonHistory = [] stepsPerEpisode = [] for episode in range(nEpisodes): StartTime = time.time() done = False score = 0 steps = 0 observation = env.reset() while not done: action = ddqnAgent.chooseAction(observation) observationNew, reward, done, info = env.step(action) score += reward ddqnAgent.remember(state=observation, stateNew=observationNew, action=action, reward=reward, done=done) observation = observationNew ddqnAgent.learn() steps += 1 epsilonHistory.append(ddqnAgent.epsilon) ddqnScores.append(score) averageScore = np.mean(ddqnScores) ddqnAverageScores.append(averageScore) stepsPerEpisode.append(steps) ElapsedTime = time.time() - StartTime ElapsedTime = ElapsedTime / 60 print("Episode:", episode, "Score: %.2f" % score, "Average Score: %.2f" % averageScore, "Run Time:", ElapsedTime, "Minutes", "Epsilon:", ddqnAgent.epsilon, "Steps:", steps) if episode > 1 and episode % 9 == 0: ddqnAgent.saveModel() env.close() x = [i for i in range(nEpisodes)] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10, 10)) fig.suptitle("DDQN Hallway") ax1.plot(x, ddqnScores, "C1") ax1.set_title('Episodes vs Scores') ax1.set(xlabel='Episodes', ylabel='Scores') ax2.plot(x, ddqnAverageScores, "C2") ax2.set_title('Episodes vs Average Scores') ax2.set(xlabel='Episodes', ylabel='Average Scores') ax3.plot(x, epsilonHistory, "C3") ax3.set_title('Episodes vs Epsilon Decay') ax3.set(xlabel='Episodes', ylabel='Epsilon Decay') ax4.plot(x, stepsPerEpisode, "C4") ax4.set_title('Episodes vs Steps Per Epsisode') ax4.set(xlabel='Episodes', ylabel='Steps') plt.savefig('Hallway.png')
def test_closing(env_name): """ Run the gym test and closes the environment multiple times :param env_name: Name of the Unity environment binary to launch """ try: env1 = UnityToGymWrapper( UnityEnvironment(env_name, worker_id=1, no_graphics=True)) env1.close() env1 = UnityToGymWrapper( UnityEnvironment(env_name, worker_id=1, no_graphics=True)) env2 = UnityToGymWrapper( UnityEnvironment(env_name, worker_id=2, no_graphics=True)) env2.reset() finally: env1.close() env2.close()
class FooCarEnv(gym.Env): _channel = EnvironmentParametersChannel() PathSpace = { 'xyz': 0, 'xy': 2, 'yz': 2, 'xz': 2 } def __init__(self, no_graphics:bool=False, seed:int=1, **config): self._config = config worker_id = 0 if 'worker_id' in config: worker_id = config['worker_id'] self._unity_env = UnityEnvironment( file_name=UNITY_ENV_EXE_FILE, # file_name=None, # Unity Editor Mode (debug) no_graphics=no_graphics, seed=seed, side_channels=[self._channel], worker_id=worker_id ) for key, value in config.items(): self._channel.set_float_parameter(key, float(value)) self._gym_env = UnityToGymWrapper(self._unity_env) def step(self, action): obs, reward, done, info = self._gym_env.step(action) size = self.observation_size return obs[:size], reward, done, info def reset(self): obs = self._gym_env.reset() size = self.observation_size return obs[:size] def render(self, mode="rgb_array"): return self._gym_env.render(mode=mode) def seed(self, seed=None): self._gym_env.seed(seed=seed) # it will throw a warning def close(self): self._gym_env.close() @property def metadata(self): return self._gym_env.metadata @property def reward_range(self) -> Tuple[float, float]: return self._gym_env.reward_range @property def action_space(self): return self._gym_env.action_space @property def observation_space(self): config = self._config space = self.PathSpace path_space = config['path_space'] if 'path_space' in config else space['xz'] r = config['radius_anchor_circle'] if 'radius_anchor_circle' in config else 8.0 r_e = config['radius_epsilon_ratio'] if 'radius_epsilon_ratio' in config else 0.7 h = config['max_anchor_height'] if 'max_anchor_height' in config else 1.0 xyz_mode = (path_space == space['xyz']) bound = max(r * (1 + r_e), h if xyz_mode else 0) shape = (self.observation_size,) return gym.spaces.Box(-bound, +bound, dtype=np.float32, shape=shape) @property def observation_size(self): # Reference: readonly variable (Unity)FooCar/CarAgent.ObservationSize config = self._config space = self.PathSpace path_space = config['path_space'] if 'path_space' in config else space['xz'] ticker_end = config['ticker_end'] if 'ticker_end' in config else 5 ticker_start = config['ticker_start'] if 'ticker_start' in config else -3 xyz_mode = (path_space == space['xyz']) basic_num = 6 point_dim = 3 if xyz_mode else 2 return basic_num + 2 * point_dim * (ticker_end - ticker_start + 1)
ppo_var = 0 # 0 no minibatch, >=1 minibatch if len(sys.argv) > 1: worker_id = int(sys.argv[1]) if len(sys.argv) > 2: ppo_var = int(sys.argv[2]) # Domain setup # windows_path = "../crawler_build/windows/dynamic/UnityEnvironment" # build_path = windows_path linux_path = "../crawler_build/linux/dynamic_server/crawler_dynamic.x86_64" build_path = linux_path unity_env = UnityEnvironment(file_name=build_path, worker_id=worker_id) crawler_env = UnityToGymWrapper(unity_env=unity_env) training_episodes = 10000 params = {} params["nr_output_features"] = crawler_env.action_space.shape[0] params["nr_input_features"] = crawler_env.observation_space.shape[0] params["env"] = crawler_env params["update_episodes"] = 10 params["ppo_epochs"] = 4 params["minibatch_size"] = 32 params["lr"] = 3e-4 params["clip"] = 0.2
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", default=MODEL, help="Model file to load") parser.add_argument("-e", "--env", default=ENV_ID, help="Environment name to use, default=" + ENV_ID) args = parser.parse_args() channel = EngineConfigurationChannel() unity_env = UnityEnvironment(ENV_ID, seed=1, side_channels=[channel]) channel.set_configuration_parameters(time_scale=1.0) env = UnityToGymWrapper(unity_env, allow_multiple_obs=True) net = model.DDPGActor(env.observation_space.shape[0], env.action_space.shape[0]) net.load_state_dict(torch.load(args.model)) obs = env.reset() total_reward = 0.0 total_steps = 0 while True: obs_v = torch.FloatTensor([obs]) mu_v = net(obs_v) action = mu_v.squeeze(dim=0).data.numpy() action = np.clip(action, -1, 1) obs, reward, done, _ = env.step(action) total_reward += reward
import time MIN_THROTTLE = 0.45 MAX_THROTTLE = 0.6 MAX_STEERING_DIFF = 0.15 JERK_REWARD_WEIGHT = 0.0 MAX_STEERING = 1 MIN_STEERING = -MAX_STEERING STEERING_GAIN = 1 STEERING_BIAS = 0 from gym_unity.envs import UnityToGymWrapper from mlagents_envs.environment import UnityEnvironment unity_gym_env = UnityToGymWrapper( UnityEnvironment(), allow_multiple_obs=True, # not exactly sure what this does, ) class Envrionment(object): def __init__( self, vae=None, min_throttle=0.45, max_throttle=0.6, n_command_history=0, frame_skip=1, n_stack=1, action_lambda=0.5, ): # copy args