def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0 ) env = UnityEnvironment(" ") brain_infos = env.reset() brain_info = brain_infos[env.brain_names[0]] trainer_parameters = dummy_config model_path = env.brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy( 0, env.brains[env.brain_names[0]], trainer_parameters, False, False ) run_out = policy.get_value_estimates(brain_info, 0, done=False) for key, val in run_out.items(): assert type(key) is str assert type(val) is float run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val == 0.0 # Check if we ignore terminal states properly policy.reward_signals["extrinsic"].use_terminal_states = False run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val != 0.0 env.close()
def __init__(self, env_path, worker_id, train_mode=True, n_str=16, n_goalie=16): self.env = UnityEnvironment(file_name=env_path, worker_id=0) self.striker_brain_name, self.goalie_brain_name = self.env.brain_names self.striker_brain = self.env.brains[self.striker_brain_name] self.goalie_brain = self.env.brains[self.goalie_brain_name] self.done_str = [False] * 16 self.done_goalie = [False] * 16 self.train_mode = train_mode self.done_hist_str = [False] * 16 self.done_hist_goalie = [False] * 16 self.episode_str_rewards = 0 self.episode_goalie_rewards = 0 self.n_str = n_str self.n_goalie = n_goalie self.act_str_hist = [[] for x in range(n_str)] self.act_goalie_hist = [[] for x in range(n_goalie)] self.observation_str_hist = [[] for x in range(SIZE_OBSERVATION)] self.observation_goalie_hist = [[] for x in range(SIZE_OBSERVATION)] self.observation_str = None self.observation_goalie = None return
def main(): #evaluate = True #train = True #test = False evaluate = False train = True test = False train_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_100X" test_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_realtime" evaluate_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_realtime" #train_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed" #test_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed_test" #evaluate_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed" if train: env = UnityEnvironment(file_name=train_file_name, worker_id=1) elif test: env = UnityEnvironment(file_name=test_file_name, worker_id=1) elif evaluate: env = UnityEnvironment(file_name=evaluate_file_name, worker_id=1) else: print("decide between test and train mode") exit(0) print("Created Env") host_name = os.uname()[1] os.system('mkdir -p ./logs/' + host_name) logger.configure('./logs/' + host_name) # Çhange to log in a different directory if train: act = ddpg_unity_her_kl.learn( "mlp", # conv_only is also a good choice for GridWorld env, nb_epochs=1000, nb_epoch_cycles=100, nb_rollout_steps=500, # total_timesteps=10000000, test=test, train=train) if test: act = ddpg_unity_her_kl.learn( "mlp", # conv_only is also a good choice for GridWorld env, nb_epochs=1000, nb_epoch_cycles=100, nb_rollout_steps=500, # total_timesteps=10000000, test=test, train=train) elif evaluate: act = ddpg_unity_her_kl.evaluate( "mlp", # conv_only is also a good choice for GridWorld env) print("Saving model to unity_model.pkl") act.save("unity_model.pkl")
def test(self): from mlagents.envs import UnityEnvironment num_worker = 20 state_size = 33 output_size = 4 n_step = 128 ep = 0 score = 0 saver = tf.train.Saver() saver.restore(self.sess, 'model/model') env = UnityEnvironment(file_name='env/walker', worker_id=2) default_brain = env.brain_names[0] brain = env.brains[default_brain] initial_observation = env.reset() env_info = env.reset() states = np.zeros([num_worker, state_size]) while True: inference = [self.get_action(s) for s in states] actions = [inf[0] for inf in inference] env_info = env.step(actions)[default_brain] states = env_info.vector_observations
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') memory_size = 128 model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.memory_out ] feed_dict = { model.batch_size: 1, model.sequence_length: 2, model.memory_in: np.zeros((1, memory_size)), model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.epsilon: np.array([[0, 1]]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=0) env = UnityEnvironment(' ') model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.intrinsic_reward ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_holder: [[0], [0]], model.action_masks: np.ones([2, 2]) } sess.run(run_list, feed_dict=feed_dict) env.close()
class UnityEnv: def __init__(self, env_name, **kwargs) -> None: super().__init__() filename = unity_filename(env_name) self.unity_env = UnityEnvironment(file_name=filename, **kwargs) brain_name = self.unity_env.brain_names[0] self.name = brain_name.replace("Brain", "") brain = self.unity_env.brains[brain_name] env_info = self.unity_env.reset(train_mode=True)[brain_name] self.brain_name = brain_name self.num_agents = len(env_info.agents) self.num_actions = list(brain.vector_action_space_size)[0] self.states = env_info.vector_observations self.num_states = self.states.shape[1] def reset(self, train_mode=False): env_info = self.unity_env.reset(train_mode=train_mode)[self.brain_name] return env_info.vector_observations def step(self, actions): env_info = self.unity_env.step(actions) env_info = env_info[self.brain_name] next_states = env_info.vector_observations rewards = env_info.rewards dones = env_info.local_done return np.asarray(next_states), np.asarray(rewards), np.asarray( dones), env_info
def test_ppo_model_cc_visual(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=2) env = UnityEnvironment(' ') model = PPOModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.log_probs, model.value, model.entropy, model.learning_rate ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3]), model.visual_in[1]: np.ones([2, 40, 30, 3]), model.epsilon: np.array([[0, 1], [2, 3]]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def __init__(self, environment_filename: str, worker_id=0, use_visual=False, multiagent=False): """ Environment initialization :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym. :param worker_id: Worker number for environment. :param use_visual: Whether to use visual observation or vector observation. :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done). """ self._env = UnityEnvironment(environment_filename, worker_id) self.name = self._env.academy_name self.visual_obs = None self._current_state = None self._n_agents = None self._multiagent = multiagent # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if use_visual and brain.number_visual_observations == 0: raise UnityGymException("`use_visual` was set to True, however there are no" " visual observations as part of this environment.") self.use_visual = brain.number_visual_observations == 1 and use_visual if brain.num_stacked_vector_observations != 1: raise UnityGymException( "There can only be one stacked vector observation in a UnityEnvironment " "if it is wrapped in a gym.") # Check for number of agents in scene. initial_info = self._env.reset()[self.brain_name] self._check_agents(len(initial_info.agents)) # Set observation and action spaces if brain.vector_action_space_type == "discrete": if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete(brain.vector_action_space_size[0]) else: self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size) else: high = np.array([1] * brain.vector_action_space_size[0]) self._action_space = spaces.Box(-high, high, dtype=np.float32) high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions if self.use_visual: if brain.camera_resolutions[0]["blackAndWhite"]: depth = 1 else: depth = 3 self._observation_space = spaces.Box(0, 1, dtype=np.float32, shape=(brain.camera_resolutions[0]["height"], brain.camera_resolutions[0]["width"], depth)) else: self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def test_close(mock_communicator, mock_launcher): comm = MockCommunicator(discrete_action=False, visual_inputs=0) mock_communicator.return_value = comm env = UnityEnvironment(' ') assert env._loaded env.close() assert not env._loaded assert comm.has_been_closed
def run(self): from mlagents.envs import UnityEnvironment writer = SummaryWriter('runs/td3') num_worker = 20 state_size = 33 output_size = 4 epsilon = 1.0 ep = 0 train_size = 5 env = UnityEnvironment(file_name='env/training', worker_id=0) default_brain = env.brain_names[0] brain = env.brains[default_brain] initial_observation = env.reset() step = 0 score = 0 while True: ep += 1 env_info = env.reset() states = np.zeros([num_worker, state_size]) terminal = False self.noise.reset() if epsilon > 0.001: epsilon = -ep * 0.005 + 1.0 while not terminal: step += 1 actions = [self.get_action(s, epsilon) for s in states] env_info = env.step(actions)[default_brain] next_states = env_info.vector_observations rewards = env_info.rewards dones = env_info.local_done terminal = dones[0] for s, ns, r, d, a in zip(states, next_states, rewards, dones, actions): self.memory.append(s, ns, r, d, a) score += sum(rewards) states = next_states if step % train_size == 0: self.update() if ep < 1000: print('episode :', ep, '| score : ', score, '| epsilon :', epsilon) writer.add_scalar('data/reward', score, ep) writer.add_scalar('data/epsilon', epsilon, ep) writer.add_scalar('data/memory_size', len(self.memory.memory), ep) score = 0
def loadEnv(self, wid): # load env env_name = ENV_LOCATION self.env = UnityEnvironment(env_name, worker_id=wid) # Set the default brain to work with self.default_brain = self.env.brain_names[0] self.brain = self.env.brains[self.default_brain] # Reset the environment - train mode enabled env_info = self.env.reset(train_mode=True)[self.default_brain]
def walking_iterator(): env = UnityEnvironment(file_name=env_name) # Set the default brain to work with default_brain = env.brain_names[0] brain = env.brains[default_brain] # Reset the environment env_info = env.reset(train_mode=train_mode)[default_brain] # Examine the state space for the default brain print("Agent vector observations look like: \n{}".format(env_info.vector_observations[0])) # Examine the observation space for the default brain print("Agent visual observations look like:") for i, vo in enumerate(env_info.visual_observations): print("Visual observation", i, ":", vo[0].shape) turning_sign = 1 while True: # Interpret and yield sensory input rgb_image = env_info.visual_observations[0][0] depth_image = depth_rgb_to_float(env_info.visual_observations[1][0]) pose = env_info.vector_observations[0][:4] forward_clear_dist = env_info.vector_observations[0][4] yield { 'image': rgb_image, 'depth': np.clip(depth_image * 1000, 0, 65535).astype(np.uint16), 'pose': pose } # Decide on actions # First action dim is forward motion, second is rotation actions = np.zeros([len(env_info.agents), brain.vector_action_space_size[0]], np.float32) if forward_clear_dist > 3.0: turning_sign = -1 * turning_sign if forward_clear_dist > 1.0: # Forward is clear, go forward actions[0,0] = np.random.uniform(0.05, 0.5) actions[0,1] = np.random.uniform(0.0, 0.01) * turning_sign elif forward_clear_dist < 0.1: # Back up! actions[0,0] = np.random.uniform(-0.05, -0.5) else: # Just a little distance. Turn actions[0,1] = np.random.uniform(0.01, 0.05) * turning_sign env_info = env.step(actions)[default_brain] if env_info.local_done[0]: env_info = env.reset(train_mode=train_mode) if type(env_info) is dict: # This happens sometimes, not sure why env_info = env_info['SlamWalkerLearning']
def make_unity_env(env_args): if env_args['file_path'] is None: env = UnityEnvironment() else: env = UnityEnvironment(file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render']) env = InfoWrapper(env) env = UnityReturnWrapper(env) env = SamplerWrapper(env, env_args) return env
def _init_env(self, critic_config, ppo_config, model_root_path): config = self.config if config['build_path'] is None or config['build_path'] == '': self.env = UnityEnvironment() else: self.env = UnityEnvironment(file_name=config['build_path'], no_graphics=self.train_mode, base_port=config['port'], args=['--scene', config['scene']]) self.default_brain_name = self.env.brain_names[0] brain_params = self.env.brains[self.default_brain_name] state_dim = brain_params.vector_observation_space_size action_dim = brain_params.vector_action_space_size[0] ppo_module = importlib.import_module(config['ppo']) class Critic(ppo_module.Critic_Custom, Critic_Base): pass class PPO(ppo_module.PPO_Custom, PPO_Base): pass self.critic = Critic(state_dim=state_dim, model_root_path=model_root_path, seed=config['seed'], **critic_config) self.ppos = list() for i in range(config['policies_num']): if config['policies_num'] > 1: tmp_model_root_path = f'{model_root_path}/{i}' else: tmp_model_root_path = model_root_path if config['seed'] is None: seed = None else: seed = i + self.config['seed'] logger.info(tmp_model_root_path) ppo = PPO(state_dim=state_dim, action_dim=action_dim, model_root_path=tmp_model_root_path, seed=seed, addition_objective=config['addition_objective'], **ppo_config) ppo.get_v = lambda s: self.critic.get_v(s) self.ppos.append(ppo)
def __init__(self, env_name="", seed=0): self.env_name = env_name # Start ML Agents Environment | Without filename in editor training is started log("ML AGENTS INFO") if self.env_name == "": self.env = UnityEnvironment(file_name=None, seed=seed) else: self.env = UnityEnvironment(file_name=env_name, seed=seed) log("END ML AGENTS INFO") self.info = self.env.reset()[self.default_brain_name]
def __init__(self, env_name, **kwargs) -> None: super().__init__() filename = unity_filename(env_name) self.unity_env = UnityEnvironment(file_name=filename, **kwargs) brain_name = self.unity_env.brain_names[0] self.name = brain_name.replace("Brain", "") brain = self.unity_env.brains[brain_name] env_info = self.unity_env.reset(train_mode=True)[brain_name] self.brain_name = brain_name self.num_agents = len(env_info.agents) self.num_actions = list(brain.vector_action_space_size)[0] self.states = env_info.vector_observations self.num_states = self.states.shape[1]
def reset(self, **kwargs): # Reset the environment params = {} self.dead = [] self.states = [] if 'params' in kwargs: params = kwargs['params'] if not TanksWorldStackedEnv._env: try: print('WARNING: seed not set, using default') TanksWorldStackedEnv._env = UnityEnvironment( file_name=self._filename, worker_id=self._workerid, seed=1234, timeout_wait=500) print('finished initializing environment') TanksWorldStackedEnv._env_params['filename'] = self._filename TanksWorldStackedEnv._env_params['workerid'] = self._workerid except: print( 'ERROR: could not initialize unity environment, are filename correct and workerid not already in use by another unity instance?' ) raise # Set the default brain to work with self._default_brain = self._env.brain_names[0] print("number of brains ", len(self._env.brain_names)) brain = self._env.brains[self._default_brain] self._env_info = self._env.reset(train_mode=0, config=params)[self._default_brain] state = self.get_state() return state
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain_infos = env.reset() brain_info = brain_infos[env.brain_names[0]] trainer_parameters = dummy_config model_path = env.brain_names[0] trainer_parameters['model_path'] = model_path trainer_parameters['keep_checkpoints'] = 3 policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out['action'].shape == (3, 2) env.close()
class Drone: spec = None name = None action_space = None observation_space = None def __init__( self, env_path: str, env_name: str, cfg: dict, train_mode: bool = True, worker_id: int = 1, ): self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id) self.default_brain = self.env.brain_names[0] self.cfg = cfg self.name = env_name self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(9, ), dtype=np.float32) self.train_mode = train_mode def reset(self): env_info = self.env.reset(train_mode=self.train_mode, config=self.cfg)[self.default_brain] return env_info.vector_observations[0] def step(self, action): env_info = self.env.step(action.tolist())[self.default_brain] observation = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] info = None return observation, reward, done, info def close(self): self.env.close() def seed(self, seed): pass
def initialize_env_model(filepath, algo, name, port): env = UnityEnvironment( file_name=filepath, base_port=port, no_graphics=True ) if algo == 'pg': algorithm_config = Algorithms.pg_config model = Algorithms.PG policy_mode = 'ON' elif algo == 'ppo': algorithm_config = Algorithms.ppo_config model = Algorithms.PPO policy_mode = 'ON' elif algo == 'ddpg': algorithm_config = Algorithms.ddpg_config model = Algorithms.DDPG policy_mode = 'OFF' elif algo == 'td3': algorithm_config = Algorithms.td3_config model = Algorithms.TD3 policy_mode = 'OFF' elif algo == 'sac': algorithm_config = Algorithms.sac_config model = Algorithms.SAC policy_mode = 'OFF' elif algo == 'sac_no_v': algorithm_config = Algorithms.sac_no_v_config model = Algorithms.SAC_NO_V policy_mode = 'OFF' else: raise Exception("Don't have this algorithm.") env_dir = os.path.split(filepath)[0] sys.path.append(env_dir) import env_config reset_config = env_config.reset_config max_step = env_config.max_step env_name = os.path.join(*fix_path(env_dir).split('/')[-2:]) base_dir = os.path.join(r'C:/RLData'if platform.system() == "Windows" else r'/RLData', env_name, algo, name) brain_names = env.external_brain_names brains = env.brains models = [model( s_dim=brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations, a_counts=brains[i].vector_action_space_size[0], action_type=brains[i].vector_action_space_type, cp_dir=os.path.join(base_dir, i, 'model'), log_dir=os.path.join(base_dir, i, 'log'), excel_dir=os.path.join(base_dir, i, 'excel'), logger2file=False, out_graph=False, **algorithm_config ) for i in brain_names] [save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names] begin_episode = models[0].get_init_step() max_episode = models[0].get_max_episode() return env, brain_names, models, policy_mode, reset_config, max_step
def test_cc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = {model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]])} sess.run(run_list, feed_dict=feed_dict) env.close()
def __init__( self, env_path: str, env_name: str, cfg: dict, train_mode=True, worker_id: int = 1, ): self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id) self.default_brain = self.env.brain_names[0] self.cfg = cfg self.name = env_name self.action_space = spaces.Discrete(5) self.observation_space = spaces.Box(low=0, high=255, shape=(3, 84, 84), dtype=np.uint8) self.train_mode = train_mode
def __init__(self, env_path, worker_id, train_mode=True, n_striker=16, n_goalie=16, render=True): self._striker_map = { 'field': [8, 0, 4, 2, 14, 10, 12, 6, 9, 1, 5, 3, 15, 11, 13, 7], 'team': [12, 8, 10, 9, 15, 13, 14, 11, 4, 0, 2, 1, 7, 5, 6, 3] } self._goalie_map = { 'field': [8, 0, 4, 2, 14, 10, 12, 6, 13, 7, 11, 3, 15, 9, 5, 1], 'team': [12, 8, 10, 9, 15, 13, 14, 11, 6, 3, 5, 1, 7, 4, 2, 0] } self._striker_inv_map = { 'field': np.argsort(self._striker_map['field']), 'team': np.argsort(self._striker_map['team']) } self._goalie_inv_map = { 'field': np.argsort(self._goalie_map['field']), 'team': np.argsort(self._goalie_map['team']) } self.env = UnityEnvironment(file_name=env_path, worker_id=0, no_graphics=not render) self.striker_brain_name, self.goalie_brain_name = self.env.brain_names self.striker_brain = self.env.brains[self.striker_brain_name] self.goalie_brain = self.env.brains[self.goalie_brain_name] self.done_striker = [False] * 16 self.done_goalie = [False] * 16 self.train_mode = train_mode self.done_hist_striker = [False] * 16 self.done_hist_goalie = [False] * 16 self.episode_striker_rewards = 0 self.episode_goalie_rewards = 0 self.n_striker = n_striker self.n_goalie = n_goalie self.observation_striker = None self.observation_goalie = None return
def create_unity_environment(worker_id: int) -> UnityEnvironment: env_seed = seed if not env_seed: env_seed = seed_pool[worker_id % len(seed_pool)] return UnityEnvironment(file_name=env_path, worker_id=worker_id, seed=env_seed, docker_training=docker_training, no_graphics=no_graphics, base_port=start_port)
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain = env.brains['RealFakeBrain'] brain_info = env.reset() env.close() assert not env.global_done assert isinstance(brain_info, dict) assert isinstance(brain_info['RealFakeBrain'], BrainInfo) assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) assert len(brain_info['RealFakeBrain'].visual_observations ) == brain.number_visual_observations assert len(brain_info['RealFakeBrain'].vector_observations) == \ len(brain_info['RealFakeBrain'].agents) assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \ brain.vector_observation_space_size * brain.num_stacked_vector_observations
def createDiscreteActionsEnv( executableFullPath, envType = 'simple', seed = 0, workerID = 0 ) : _unityEnv = UnityEnvironment( executableFullPath, seed = seed, worker_id = workerID ) if envType == 'simple' : return SingleAgentDiscreteActionsEnv( _unityEnv, executableFullPath ) elif envType == 'visual' : return VisualBananaEnv( _unityEnv, executableFullPath ) else : print( 'ERROR> multi-simulations with MPI not supported yet' ) sys.exit( 1 )
def test_ppo_policy_evaluate(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain_infos = env.reset() brain_info = brain_infos[env.brain_names[0]] trainer_parameters = dummy_config() graph_scope = env.brain_names[0] trainer_parameters['graph_scope'] = graph_scope policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, sess, False) init = tf.global_variables_initializer() sess.run(init) run_out = policy.evaluate(brain_info) assert run_out['action'].shape == (3, 2) env.close()
def loadUnityEnvironment(options): #print("Python version:") #print(sys.version) # check Python version #if (sys.version_info[0] < 3): #raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3") env = UnityEnvironment(file_name=options.env_name, worker_id=options.workerid, seed=1) return env
class Sokoban: spec = None name = None action_space = None observation_space = None def __init__( self, env_path: str, env_name: str, cfg: dict, train_mode=True, worker_id: int = 1, ): self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id) self.default_brain = self.env.brain_names[0] self.cfg = cfg self.name = env_name self.action_space = spaces.Discrete(5) self.observation_space = spaces.Box(low=0, high=255, shape=(3, 84, 84), dtype=np.uint8) self.train_mode = train_mode def reset(self): env_info = self.env.reset(train_mode=self.train_mode, config=self.cfg)[self.default_brain] return env_info.visual_observations[0][0].reshape(3, 84, 84) def step(self, action): env_info = self.env.step(action.tolist())[self.default_brain] observation = env_info.visual_observations[0][0].reshape(3, 84, 84) reward = env_info.rewards[0] done = env_info.local_done[0] info = None return observation, reward, done, info def close(self): self.env.close() def seed(self, seed): pass