def __init__(self, environment_filename, no_graphics): engine_configuration_channel = EngineConfigurationChannel() self.env = UnityEnvironment( file_name=environment_filename, side_channels=[engine_configuration_channel], no_graphics=no_graphics) self.env.reset() self.brain_name = self.env.get_agent_groups() self.group_spec = self.env.get_agent_group_spec(self.brain_name[0]) engine_configuration_channel.set_configuration_parameters( width=640, height=480, time_scale=3.0) self.group_name = self.brain_name # Set observation and action spaces if self.group_spec.is_action_discrete(): self._action_space = [] branches = self.group_spec.discrete_action_branches # if self.group_spec.action_shape == 1: for _ in range(2): self._action_space.append([ spaces.Discrete(branches[i]) for i in range(len(branches)) ]) else: high = np.array([1] * self.group_spec.action_shape) self._action_space = spaces.Box(-high, high, dtype=np.float32) high = np.array([np.inf] * self._get_vec_obs_size()) self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def train(): engine_configuration_channel = EngineConfigurationChannel() # 時間スケールを20倍に設定 engine_configuration_channel.set_configuration_parameters(time_scale=20.0) unity_env = UnityEnvironment("./ml-agents/Project/PushBlock", side_channels=[engine_configuration_channel]) env = UnityToGymWrapper(unity_env, 0, flatten_branched=True) logger.configure('./logs') # DQNで学習 model = deepq.learn( env, "mlp", seed=0, lr=2.5e-4, total_timesteps=400000, buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.1, print_freq=20, train_freq=5, learning_starts=20000, target_network_update_freq=50, gamma=0.99, prioritized_replay=False, checkpoint_freq=1000, dueling=True, checkpoint_path=None, load_path="./model" ) # モデルを保存 save_path = "./model" ckpt = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=1) manager.save()
def __init__(self, unity_env, time_scale=1.0, width=720, height=480, target_frame_rate=60, quality_level=5): """ Initializes the game :param unity_env: (UnityEnvironment) Environment where the game will be played :param time_scale:(float) Speed of the game :param width:(int) Window's width :param height:(int) Window's height :param target_frame_rate:(int) Frame rate :param quality_level:(int) Visual quality Todo: Commentate a little, reorganise """ self.unity_env = unity_env self.unity_env.reset() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( time_scale=time_scale, width=width, height=height, target_frame_rate=target_frame_rate, quality_level=quality_level) self.unity_env.side_channels[2] = engine_configuration_channel self.group_name = unity_env.get_agent_groups()[0] self.group_spec = unity_env.get_agent_group_spec(self.group_name) self.n_agents = self.unity_env.get_step_result( self.group_name).n_agents() self.action_size = self.group_spec.action_size
def initialize_env(self, config, env_file) -> Environment: """ Initialize the environment. Args: config: the configuration parameters. env_file: the environment file. Returns: env: Environment """ # [3] Environment configuration base_port = int(input("Enter base port: ")) time_scale = int(config.get("time_scale")) width = int(config.get("width")) height = int(config.get("height")) channel_config = EngineConfigurationChannel() channel_param = EnvironmentParametersChannel() env = Environment( file_name=env_file, base_port=base_port, side_channels=[channel_config, channel_param], ) channel_config.set_configuration_parameters(time_scale=time_scale, quality_level=1, width=width, height=height) env.set_float_parameters(config) return env
def test_engine_configuration(): sender = EngineConfigurationChannel() # We use a raw bytes channel to interpred the data receiver = RawBytesChannel(sender.channel_id) config = EngineConfig.default_config() sender.set_configuration(config) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) received_data = receiver.get_and_clear_received_messages() assert len(received_data) == 5 # 5 different messages one for each setting sent_time_scale = 4.5 sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) message = IncomingMessage(receiver.get_and_clear_received_messages()[0]) message.read_int32() time_scale = message.read_float32() assert time_scale == sent_time_scale with pytest.raises(UnitySideChannelException): sender.set_configuration_parameters(width=None, height=42) with pytest.raises(UnityCommunicationException): # try to send data to the EngineConfigurationChannel sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([sender]).process_side_channel_message(data)
def __init__(self, worker_id, realtime_mode=False): self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4" self._env = UnityEnvironment( env_path, worker_id, side_channels=[self.reset_parameters, self.engine_config]) self._env.reset() self.behavior_name = list(self._env.behavior_specs)[0] behavior_spec = self._env.behavior_specs[self.behavior_name] print(behavior_spec) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._flattener = ActionFlattener( behavior_spec.action_spec.discrete_branches)
def _create_engine_channel(self): engine_channel = EngineConfigurationChannel() engine_config = EngineConfig(80, 80, 1, 4.0, 30 * 4) if self.train_mode else EngineConfig( 1280, 720, 1, 1.0, 60) engine_channel.set_configuration(engine_config) return engine_channel
def make_unity_env(self, env_name, float_params=dict(), time_scale=1, seed=time.time(), worker_id=None, **kwargs): """ creates a gym environment from a unity game env_name: str the path to the game float_params: dict or None this should be a dict of argument settings for the unity environment keys: varies by environment time_scale: float argument to set Unity's time scale. This applies less to gym wrapped versions of Unity Environments, I believe.. but I'm not sure seed: int the seed for randomness worker_id: int must specify a unique worker id for each unity process on this machine """ if float_params is None: float_params = dict() path = os.path.expanduser(env_name) channel = EngineConfigurationChannel() env_channel = EnvironmentParametersChannel() channel.set_configuration_parameters(time_scale=1) for k, v in float_params.items(): if k == "validation" and v >= 1: print("Game in validation mode") env_channel.set_float_parameter(k, float(v)) if worker_id is None: worker_id = seed % 500 + 1 env_made = False n_loops = 0 worker_id = 0 while not env_made and n_loops < 50: try: env = UnityEnvironment(file_name=path, side_channels=[channel, env_channel], worker_id=worker_id, seed=seed) env_made = True except: s = "Error encountered making environment, " s += "trying new worker_id" print(s) worker_id = (worker_id + 1 + int(np.random.random() * 100)) % 500 try: env.close() except: pass n_loops += 1 env = UnityToGymWrapper(env, allow_multiple_obs=True) return env
class FQ_Env(object): def __init__(self): self.engine_configuration_channel = EngineConfigurationChannel() self.env = UnityEnvironment(side_channels=[self.engine_configuration_channel]) self.engine_configuration_channel.set_configuration_parameters( # width = 84, # height = 84, # quality_level = 5, #1-5 time_scale = 1 # 1-100 # target_frame_rate = 60, #1-60 # capture_frame_rate = 60 #default 60 ) self.reset() self.n = self.agent_num() self.state_shapes = [self.env.get_behavior_spec(behavior_name).observation_shapes[0][0] for behavior_name in self.env.get_behavior_names()] self.action_dims = [self.env.get_behavior_spec(behavior_name).action_shape for behavior_name in self.env.get_behavior_names()] def agent_num(self): behavior_names = self.env.get_behavior_names() agent_num = len(behavior_names) return agent_num def reset(self): self.env.reset() cur_state = [] for behavior_name in self.env.get_behavior_names(): DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name) cur_state.append(DecisionSteps.obs[0][0]) return cur_state def step(self, actions): next_state = [] reward = [] done = [] for behavior_name_index, behavior_name in enumerate(self.env.get_behavior_names()): self.env.set_actions(behavior_name=behavior_name, action=np.asarray([actions[behavior_name_index]])) self.env.step() for behavior_name in self.env.get_behavior_names(): DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name) if len(TerminalSteps.reward) == 0: next_state.append(DecisionSteps.obs[0][0]) reward.append(DecisionSteps.reward[0]) done.append(False) else: next_state.append(TerminalSteps.obs[0][0]) reward.append(TerminalSteps.reward[0]) done.append(True) return next_state, reward, done def close(self): self.env.close()
def main(): """ file_name: is the name of the environment binary (located in the root directory of the python project) worker_id: indicates which port to use for communication with the environment. For use in parallel training regimes such as A3C. seed: indicates the seed to use when generating random numbers during the training process. In environments which are deterministic, setting the seed enables reproducible experimentation by ensuring that the environment and trainers utilize the same random seed. side_channels: provides a way to exchange data with the Unity simulation that is not related to the reinforcement learning loop. For example: configurations or properties. More on them in the "Modifying the environment from Python"(https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#modifying-the-environment-from-python) section. --- env.reset() env.step() env.close() """ channel = EngineConfigurationChannel() filename = "Mummy" env = UnityEnvironment(file_name=filename, seed=1, side_channels=[channel]) channel.set_configuration_parameters(time_scale=2.0) env.reset() behavior_names = env.behavior_specs.keys() for name in behavior_names: print('behavior_name:', name) # Mummy?team=0 decision_steps, terminal_steps = env.get_steps( behavior_name="Mummy?team=0") """ print('DecisionSteps') print('- observation:', decision_steps.obs) print('- reward:', decision_steps.reward) print('- agent_id:', decision_steps.agent_id) print('- action_mask:', decision_steps.action_mask) print('TerminalSteps') print('- observation:', terminal_steps.obs) print('- reward:', terminal_steps.reward) print('- agent_id:', terminal_steps.agent_id) print('- interrupted:', terminal_steps.interrupted) """ while True: for i in decision_steps.agent_id: if i in terminal_steps.agent_id: continue env.set_action_for_agent(behavior_name="Mummy?team=0", agent_id=i, action=np.random.uniform(-1.0, 1.0, size=(2, ))) env.step() decision_steps, terminal_steps = env.get_steps( behavior_name="Mummy?team=0")
class UnityWrapper(object): def __init__(self, env_args): self.engine_configuration_channel = EngineConfigurationChannel() if env_args['train_mode']: self.engine_configuration_channel.set_configuration_parameters( time_scale=env_args['train_time_scale']) else: self.engine_configuration_channel.set_configuration_parameters( width=env_args['width'], height=env_args['height'], quality_level=env_args['quality_level'], time_scale=env_args['inference_time_scale'], target_frame_rate=env_args['target_frame_rate']) self.float_properties_channel = EnvironmentParametersChannel() if env_args['file_path'] is None: self._env = UnityEnvironment(base_port=5004, seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ]) else: unity_env_dict = load_yaml('/'.join( [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml'])) self._env = UnityEnvironment( file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render'], seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ], additional_args=[ '--scene', str( unity_env_dict.get(env_args.get('env_name', 'Roller'), 'None')), '--n_agents', str(env_args.get('env_num', 1)) ]) self.reset_config = env_args['reset_config'] def reset(self, **kwargs): reset_config = kwargs.get('reset_config', None) or self.reset_config for k, v in reset_config.items(): self.float_properties_channel.set_float_parameter(k, v) self._env.reset() def __getattr__(self, name): if name.startswith('_'): raise AttributeError( "attempted to get missing private attribute '{}'".format(name)) return getattr(self._env, name)
def _create_env(self, env_file, time_scale, no_graphics): channel = EngineConfigurationChannel() env = UnityEnvironment( file_name=env_file, no_graphics=no_graphics, side_channels=[channel], # See if setting a worker id allows me to spin up more agents worker_id=proc_id(), ) channel.set_configuration_parameters( time_scale=time_scale, ) return env
def _make_unity_env( env_path: Optional[str] = None, port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, seed: int = -1, env_args: Optional[List[str]] = None, engine_config: Optional[EngineConfig] = None, side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment: """ Create a UnityEnvironment. """ # Use Unity Editor if env file is not provided. if env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT else: launch_string = UnityEnvironment.validate_environment_path(env_path) if launch_string is None: raise UnityEnvironmentException( f"Couldn't launch the {env_path} environment. Provided filename does not match any environments." ) logger.info(f"Starting environment from {env_path}.") # Configure Unity Engine. if engine_config is None: engine_config = EngineConfig.default_config() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_config) if side_channels is None: side_channels = [engine_configuration_channel] else: side_channels.append(engine_configuration_channel) # Find an available port to connect to Unity environment. while True: try: env = UnityEnvironment( file_name=env_path, seed=seed, base_port=port, args=env_args, side_channels=side_channels, ) except UnityWorkerInUseException: logger.debug(f"port {port} in use.") port += 1 else: logger.info(f"Connected to environment using port {port}.") break return env
def initialize_all_side_channels(self, initialize_config, engine_config): """ 初始化所有的通讯频道 """ engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( **engine_config) float_properties_channel = EnvironmentParametersChannel() float_properties_channel.set_float_parameter('env_copies', self._n_copies) for k, v in initialize_config.items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def __init__(self, n_episodes, env_name, model): # Nª Episodes self.n_episodes = n_episodes # Environment self.env_name = env_name channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=False) self.action_size, self.state_size = Utils.getActionStateSize(self.env) # Model self.model = ActorCritic(self.state_size, self.action_size, seed=0).to(device) # Initialize time step (for updating every "update_every" time steps) self.t_step = 1 # Start test self.load_model(model) self.test()
def get_env_info(env_name, args): engine_configuration_channel = EngineConfigurationChannel() env = UnityEnvironment( file_name="envs/{0}".format(env_name) if args.run_unity_editor is not True else None, # no_graphics=True, side_channels=[engine_configuration_channel]) env.reset() behavior_name_list = list(env.behavior_specs.keys()) dec, _ = env.get_steps(behavior_name_list[0]) #Coordinator state = dec.obs[0] dec, _ = env.get_steps(behavior_name_list[1]) # Agent obs = dec.obs[0] env_arg = { "n_agents": dec.action_mask[0].shape[0], "n_actions": dec.action_mask[0].shape[1], "state_shape": state.shape[1], "obs_shape": obs.shape[1], "episode_limit": 160 } return env, env_arg, engine_configuration_channel
def __init__(self): # Hyperparameters self.learning_rate = 0.0003 self.betas = (0.9, 0.999) self.gamma = 0.99 self.eps_clip = 0.2 self.buffer_size = 2048 self.batch_size = 256 self.K_epochs = 3 self.max_steps = 100000 self.tau = 0.95 self.entropy_coef = 0.001 self.value_loss_coef = 0.5 self.summary_freq = 1000 # Environment self.env_name = "Environments/env1/Unity Environment" channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=True) channel.set_configuration_parameters(time_scale=100) self.action_size, self.state_size = Utils.getActionStateSize(self.env) self.n_agents = self.env.number_agents print("Nº of Agents: ", self.n_agents) # Model self.model = ActorCritic(self.state_size, self.action_size, seed=0).to(device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, betas=self.betas) self.MseLoss = nn.MSELoss() # Buffer memory self.memory = [] for _ in range(self.n_agents): self.memory.append(Buffer()) # Initialize time step (for updating when buffer_size is full) self.t_step = 1
def __init__(self): # Hyperparameters self.learning_rate = 0.0003 self.buffer_size = 10240 self.batch_size = 1024 self.gamma = 0.99 self.update_every = 64 self.max_steps = 100000 self.epsilon = 1.0 self.epsilon_end = 0.01 self.epsilon_decay = 0.995 self.tau = 0.01 self.summary_freq = 1000 # Environment self.env_name = "Environments/env1/Unity Environment" channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=False) channel.set_configuration_parameters(time_scale=100) self.action_size, self.state_size = Utils.getActionStateSize(self.env) self.n_agents = self.env.number_agents # Models self.local_model = QNetwork(self.state_size, self.action_size, seed=0).to(device) self.target_model = QNetwork(self.state_size, self.action_size, seed=0).to(device) self.optimizer = optim.Adam(self.local_model.parameters(), lr=self.learning_rate) # Buffer memory self.memory = Buffer(self.buffer_size, self.batch_size, seed=0, device=device) # Initialize time step (for updating every "update_every" time steps) self.t_step = 0
def initialize_all_side_channels(self, kwargs): ''' 初始化所有的通讯频道 ''' engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( width=kwargs['width'], height=kwargs['height'], quality_level=kwargs['quality_level'], time_scale=1 if bool(kwargs.get('inference', False)) else kwargs['time_scale'], target_frame_rate=kwargs['target_frame_rate'], capture_frame_rate=kwargs['capture_frame_rate']) float_properties_channel = EnvironmentParametersChannel() for k, v in kwargs.get('initialize_config', {}).items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def play(): engine_configuration_channel = EngineConfigurationChannel() # 時間スケールを10倍に設定 engine_configuration_channel.set_configuration_parameters(time_scale=10.0) unity_env = UnityEnvironment("./ml-agents/Project/PushBlock", side_channels=[engine_configuration_channel]) env = UnityToGymWrapper(unity_env, 0, flatten_branched=True) # モデル読み込み model = deepq.learn(env, "mlp", total_timesteps=0, load_path="./model") obs = env.reset() obs = np.expand_dims(np.array(obs), axis=0) while True: action, _, _, _ = model.step(tf.constant(obs)) action = action[0].numpy() obs, rew, done, _ = env.step(action) if done: obs = env.reset() obs = np.expand_dims(np.array(obs), axis=0)
def __init__(self): self.engine_configuration_channel = EngineConfigurationChannel() self.env = UnityEnvironment(side_channels=[self.engine_configuration_channel]) self.engine_configuration_channel.set_configuration_parameters( # width = 84, # height = 84, # quality_level = 5, #1-5 time_scale = 1 # 1-100 # target_frame_rate = 60, #1-60 # capture_frame_rate = 60 #default 60 ) self.reset() self.n = self.agent_num() self.state_shapes = [self.env.get_behavior_spec(behavior_name).observation_shapes[0][0] for behavior_name in self.env.get_behavior_names()] self.action_dims = [self.env.get_behavior_spec(behavior_name).action_shape for behavior_name in self.env.get_behavior_names()]
def initialise_environment(self): """Initialise and reset unity environment""" engine_configuration_channel = EngineConfigurationChannel() self.float_properties_channel = FloatPropertiesChannel() self.env = UnityEnvironment(file_name=self.env_path, base_port=5004, side_channels=[ engine_configuration_channel, self.float_properties_channel ]) # Reset the environment self.env.reset() # Set the default brain to work with self.group_name = self.env.get_agent_groups()[0] self.group_spec = self.env.get_agent_group_spec(self.group_name) # Set the time scale of the engine engine_configuration_channel.set_configuration_parameters( time_scale=self.time_scale)
def test_set_action_multi_agent(): engine_config_channel = EngineConfigurationChannel() env = default_registry[BALL_ID].make( base_port=6001, worker_id=0, no_graphics=True, side_channels=[engine_config_channel], ) engine_config_channel.set_configuration_parameters(time_scale=100) for _ in range(3): env.reset() behavior_name = list(env.behavior_specs.keys())[0] d, t = env.get_steps(behavior_name) for _ in range(50): action = np.ones((len(d), 2)) action_tuple = ActionTuple() action_tuple.add_continuous(action) env.set_actions(behavior_name, action_tuple) env.step() d, t = env.get_steps(behavior_name) env.close()
def create_engine_config_side_channel(self) -> EngineConfigurationChannel: if self.play or self.inference: engine_configuration = EngineConfig( width=self.WINDOW_WIDTH.play, height=self.WINDOW_HEIGHT.play, quality_level=self.QUALITY_LEVEL.play, time_scale=self.TIMESCALE.play, target_frame_rate=self.TARGET_FRAME_RATE.play, ) else: engine_configuration = EngineConfig( width=self.WINDOW_WIDTH.train, height=self.WINDOW_HEIGHT.train, quality_level=self.QUALITY_LEVEL.train, time_scale=self.TIMESCALE.train, target_frame_rate=self.TARGET_FRAME_RATE.train, ) engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) return engine_configuration_channel
def __init__(self): # Hyperparameters self.learning_rate = 0.0003 self.gamma = 0.99 self.batch_size = 256 self.max_steps = 100000 self.tau = 0.95 self.entropy_coef = 0.001 self.value_loss_coef = 0.5 self.summary_freq = 1000 # Environment self.env_name = "Environments/env1/Unity Environment" channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics = False, multiagent = True) channel.set_configuration_parameters(time_scale = 100) self.action_size, self.state_size = Utils.getActionStateSize(self.env) self.n_agents = self.env.number_agents print("Nº of Agents: ",self.n_agents) # Shared model self.shared_model = ActorCritic(self.state_size, self.action_size, seed = 0).to(device) # Agents models self.agent_model = [] self.optimizer = [] for i in range(self.n_agents): self.agent_model.append(ActorCritic(self.state_size, self.action_size, seed = 0).to(device)) self.optimizer.append(optim.Adam(self.agent_model[i].parameters(), lr=self.learning_rate)) # Buffer memory self.memory = [] for _ in range(self.n_agents): self.memory.append(Buffer()) # Initialize time step (for updating every "batch_size" time steps) self.t_step = 1
def __init__(self, env_args): self.engine_configuration_channel = EngineConfigurationChannel() if env_args['train_mode']: self.engine_configuration_channel.set_configuration_parameters( time_scale=env_args['train_time_scale']) else: self.engine_configuration_channel.set_configuration_parameters( width=env_args['width'], height=env_args['height'], quality_level=env_args['quality_level'], time_scale=env_args['inference_time_scale'], target_frame_rate=env_args['target_frame_rate']) self.float_properties_channel = EnvironmentParametersChannel() if env_args['file_path'] is None: self._env = UnityEnvironment(base_port=5004, seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ]) else: unity_env_dict = load_yaml('/'.join( [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml'])) self._env = UnityEnvironment( file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render'], seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ], additional_args=[ '--scene', str( unity_env_dict.get(env_args.get('env_name', 'Roller'), 'None')), '--n_agents', str(env_args.get('env_num', 1)) ]) self.reset_config = env_args['reset_config']
def unity_env_fn(agent_file, time_scale, no_graphics, worker_id): """Wrapper function for making unity environment with custom speed and graphics options. Args: agent_file (str): path to the environment binary time_scale (float): speed at which to run the simulation no_graphics (bool): whether or not to show the simulation Returns: Gym environment. """ channel = EngineConfigurationChannel() unity_env = UnityEnvironment( file_name=agent_file, no_graphics=no_graphics, side_channels=[channel], worker_id=worker_id, ) channel.set_configuration_parameters(time_scale=time_scale, ) env = UnityToGymWrapper(unity_env) return env
def main(params): config = vars(parser.parse_args()) channel = EngineConfigurationChannel() unity_env = UnityEnvironment(file_name=None, side_channels=[channel]) channel.set_configuration_parameters(time_scale=20.0) env = UnityToGymWrapper(unity_env) agent = DDQN(env, cfg['agent']) tag = 'DDQN' # Initiate the tracker for stats tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'], ['Epoch', 'Ep_Reward']) # Train the agent agent.train(tracker, n_episodes=config['epochs'], verbose=config['verbose'], params=cfg['agent'], hyperp=config)
def __init__(self, config=DEFAULT_ENV_CONFIG): """ Environment initialization :param config: Configuration of the environment. """ # create side channels self.env_param_channel = EnvironmentParametersChannel() self.engine_channel = EngineConfigurationChannel() self.color_pool_channel = IntListPropertiesChannel() side_channels = [ self.env_param_channel, self.engine_channel, self.color_pool_channel, ] # flag whether the config has been apllied to the environment self.is_already_initialized = False # create environment with config and side channels super().__init__(config, DEFAULT_ENV_CONFIG, side_channels=side_channels)
def __init__(self, train_mode=True, file_name=None, base_port=5005, seed=None, scene=None, n_agents=1): seed = seed if seed is not None else np.random.randint(0, 65536) self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self._env = UnityEnvironment( file_name=file_name, base_port=base_port, seed=seed, args=['--scene', scene, '--n_agents', str(n_agents)], side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) if train_mode: self.engine_configuration_channel.set_configuration_parameters( width=200, height=200, quality_level=0, time_scale=100) else: self.engine_configuration_channel.set_configuration_parameters( width=1028, height=720, quality_level=5, time_scale=5, target_frame_rate=60) self._env.reset() self.bahavior_name = self._env.get_behavior_names()[0]