def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
    brain_info = brain_infos[env.brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(
        0, env.brains[env.brain_names[0]], trainer_parameters, False, False
    )
    run_out = policy.get_value_estimates(brain_info, 0, done=False)
    for key, val in run_out.items():
        assert type(key) is str
        assert type(val) is float

    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val == 0.0

    # Check if we ignore terminal states properly
    policy.reward_signals["extrinsic"].use_terminal_states = False
    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val != 0.0

    env.close()
Example #2
0
 def __init__(self,
              env_path,
              worker_id,
              train_mode=True,
              n_str=16,
              n_goalie=16):
     self.env = UnityEnvironment(file_name=env_path, worker_id=0)
     self.striker_brain_name, self.goalie_brain_name = self.env.brain_names
     self.striker_brain = self.env.brains[self.striker_brain_name]
     self.goalie_brain = self.env.brains[self.goalie_brain_name]
     self.done_str = [False] * 16
     self.done_goalie = [False] * 16
     self.train_mode = train_mode
     self.done_hist_str = [False] * 16
     self.done_hist_goalie = [False] * 16
     self.episode_str_rewards = 0
     self.episode_goalie_rewards = 0
     self.n_str = n_str
     self.n_goalie = n_goalie
     self.act_str_hist = [[] for x in range(n_str)]
     self.act_goalie_hist = [[] for x in range(n_goalie)]
     self.observation_str_hist = [[] for x in range(SIZE_OBSERVATION)]
     self.observation_goalie_hist = [[] for x in range(SIZE_OBSERVATION)]
     self.observation_str = None
     self.observation_goalie = None
     return
def main():

    #evaluate = True
    #train = True
    #test = False

    evaluate = False
    train = True
    test = False

    train_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_100X"
    test_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_realtime"
    evaluate_file_name = "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fullMaze/Build/mazeBasic_Continuous_fixedGoal_test1_realtime"

    #train_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed"
    #test_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed_test"
    #evaluate_file_name="/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_1/Build/mazeBasic_fullDynamic_fullSpeed"

    if train:
        env = UnityEnvironment(file_name=train_file_name, worker_id=1)
    elif test:
        env = UnityEnvironment(file_name=test_file_name, worker_id=1)
    elif evaluate:
        env = UnityEnvironment(file_name=evaluate_file_name, worker_id=1)
    else:
        print("decide between test and train mode")
        exit(0)

    print("Created Env")

    host_name = os.uname()[1]
    os.system('mkdir -p ./logs/' + host_name)
    logger.configure('./logs/' +
                     host_name)  # Çhange to log in a different directory

    if train:
        act = ddpg_unity_her_kl.learn(
            "mlp",  # conv_only is also a good choice for GridWorld
            env,
            nb_epochs=1000,
            nb_epoch_cycles=100,
            nb_rollout_steps=500,  # total_timesteps=10000000,
            test=test,
            train=train)
    if test:
        act = ddpg_unity_her_kl.learn(
            "mlp",  # conv_only is also a good choice for GridWorld
            env,
            nb_epochs=1000,
            nb_epoch_cycles=100,
            nb_rollout_steps=500,  # total_timesteps=10000000,
            test=test,
            train=train)
    elif evaluate:
        act = ddpg_unity_her_kl.evaluate(
            "mlp",  # conv_only is also a good choice for GridWorld
            env)

    print("Saving model to unity_model.pkl")
    act.save("unity_model.pkl")
Example #4
0
    def test(self):
        from mlagents.envs import UnityEnvironment
        num_worker = 20
        state_size = 33
        output_size = 4
        n_step = 128
        ep = 0
        score = 0
        saver = tf.train.Saver()
        saver.restore(self.sess, 'model/model')

        env = UnityEnvironment(file_name='env/walker', worker_id=2)
        default_brain = env.brain_names[0]
        brain = env.brains[default_brain]
        initial_observation = env.reset()

        env_info = env.reset()
        states = np.zeros([num_worker, state_size])

        while True:
            inference = [self.get_action(s) for s in states]
            actions = [inf[0] for inf in inference]
            env_info = env.step(actions)[default_brain]

            states = env_info.vector_observations
Example #5
0
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            memory_size = 128
            model = PPOModel(env.brains["RealFakeBrain"],
                             use_recurrent=True,
                             m_size=memory_size)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.memory_out
            ]
            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
                model.memory_in: np.zeros((1, memory_size)),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Example #6
0
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.intrinsic_reward
            ]
            feed_dict = {
                model.batch_size:
                2,
                model.sequence_length:
                1,
                model.vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.next_vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.action_holder: [[0], [0]],
                model.action_masks:
                np.ones([2, 2])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Example #7
0
class UnityEnv:
    def __init__(self, env_name, **kwargs) -> None:
        super().__init__()
        filename = unity_filename(env_name)
        self.unity_env = UnityEnvironment(file_name=filename, **kwargs)
        brain_name = self.unity_env.brain_names[0]
        self.name = brain_name.replace("Brain", "")
        brain = self.unity_env.brains[brain_name]

        env_info = self.unity_env.reset(train_mode=True)[brain_name]

        self.brain_name = brain_name
        self.num_agents = len(env_info.agents)
        self.num_actions = list(brain.vector_action_space_size)[0]
        self.states = env_info.vector_observations
        self.num_states = self.states.shape[1]

    def reset(self, train_mode=False):
        env_info = self.unity_env.reset(train_mode=train_mode)[self.brain_name]
        return env_info.vector_observations

    def step(self, actions):
        env_info = self.unity_env.step(actions)
        env_info = env_info[self.brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done

        return np.asarray(next_states), np.asarray(rewards), np.asarray(
            dones), env_info
Example #8
0
def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=2)
            env = UnityEnvironment(' ')

            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.log_probs, model.value, model.entropy,
                model.learning_rate
            ]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.visual_in[0]: np.ones([2, 40, 30, 3]),
                model.visual_in[1]: np.ones([2, 40, 30, 3]),
                model.epsilon: np.array([[0, 1], [2, 3]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Example #9
0
    def __init__(self, environment_filename: str, worker_id=0, use_visual=False, multiagent=False):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        """
        self._env = UnityEnvironment(environment_filename, worker_id)
        self.name = self._env.academy_name
        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException("`use_visual` was set to True, however there are no"
                                    " visual observations as part of this environment.")
        self.use_visual = brain.number_visual_observations == 1 and use_visual

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym.")

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
            else:
                self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)
        else:
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            if brain.camera_resolutions[0]["blackAndWhite"]:
                depth = 1
            else:
                depth = 3
            self._observation_space = spaces.Box(0, 1, dtype=np.float32,
                                                 shape=(brain.camera_resolutions[0]["height"],
                                                        brain.camera_resolutions[0]["width"],
                                                        depth))
        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
Example #10
0
def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    env = UnityEnvironment(' ')
    assert env._loaded
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
Example #11
0
    def run(self):
        from mlagents.envs import UnityEnvironment

        writer = SummaryWriter('runs/td3')
        num_worker = 20
        state_size = 33
        output_size = 4
        epsilon = 1.0
        ep = 0
        train_size = 5

        env = UnityEnvironment(file_name='env/training', worker_id=0)
        default_brain = env.brain_names[0]
        brain = env.brains[default_brain]
        initial_observation = env.reset()

        step = 0
        score = 0

        while True:
            ep += 1
            env_info = env.reset()
            states = np.zeros([num_worker, state_size])
            terminal = False
            self.noise.reset()
            if epsilon > 0.001:
                epsilon = -ep * 0.005 + 1.0
            while not terminal:
                step += 1

                actions = [self.get_action(s, epsilon) for s in states]
                env_info = env.step(actions)[default_brain]

                next_states = env_info.vector_observations
                rewards = env_info.rewards
                dones = env_info.local_done

                terminal = dones[0]

                for s, ns, r, d, a in zip(states, next_states, rewards, dones,
                                          actions):
                    self.memory.append(s, ns, r, d, a)

                score += sum(rewards)

                states = next_states

                if step % train_size == 0:
                    self.update()

            if ep < 1000:
                print('episode :', ep, '| score : ', score, '| epsilon :',
                      epsilon)
                writer.add_scalar('data/reward', score, ep)
                writer.add_scalar('data/epsilon', epsilon, ep)
                writer.add_scalar('data/memory_size', len(self.memory.memory),
                                  ep)
                score = 0
Example #12
0
 def loadEnv(self, wid):
     # load env
     env_name = ENV_LOCATION
     self.env = UnityEnvironment(env_name, worker_id=wid)
     # Set the default brain to work with
     self.default_brain = self.env.brain_names[0]
     self.brain = self.env.brains[self.default_brain]
     # Reset the environment - train mode enabled
     env_info = self.env.reset(train_mode=True)[self.default_brain]
Example #13
0
def walking_iterator():
    env = UnityEnvironment(file_name=env_name)

    # Set the default brain to work with
    default_brain = env.brain_names[0]
    brain = env.brains[default_brain]

    # Reset the environment
    env_info = env.reset(train_mode=train_mode)[default_brain]

    # Examine the state space for the default brain
    print("Agent vector observations look like: \n{}".format(env_info.vector_observations[0]))

    # Examine the observation space for the default brain
    print("Agent visual observations look like:")
    for i, vo in enumerate(env_info.visual_observations):
        print("Visual observation", i, ":", vo[0].shape)

    turning_sign = 1
    while True:
        # Interpret and yield sensory input
        rgb_image = env_info.visual_observations[0][0]
        depth_image = depth_rgb_to_float(env_info.visual_observations[1][0])
        pose = env_info.vector_observations[0][:4]
        forward_clear_dist = env_info.vector_observations[0][4]

        yield {
            'image': rgb_image,
            'depth': np.clip(depth_image * 1000, 0, 65535).astype(np.uint16),
            'pose': pose
        }

        # Decide on actions
        # First action dim is forward motion, second is rotation
        actions = np.zeros([len(env_info.agents), brain.vector_action_space_size[0]], np.float32)

        if forward_clear_dist > 3.0:
            turning_sign = -1 * turning_sign

        if forward_clear_dist > 1.0:
            # Forward is clear, go forward
            actions[0,0] = np.random.uniform(0.05, 0.5)
            actions[0,1] = np.random.uniform(0.0, 0.01) * turning_sign
        elif forward_clear_dist < 0.1:
            # Back up!
            actions[0,0] = np.random.uniform(-0.05, -0.5)
        else:
            # Just a little distance. Turn
            actions[0,1] = np.random.uniform(0.01, 0.05) * turning_sign

        env_info = env.step(actions)[default_brain]
        if env_info.local_done[0]:
            env_info = env.reset(train_mode=train_mode)
        if type(env_info) is dict:
            # This happens sometimes, not sure why
            env_info = env_info['SlamWalkerLearning']
Example #14
0
def make_unity_env(env_args):
    if env_args['file_path'] is None:
        env = UnityEnvironment()
    else:
        env = UnityEnvironment(file_name=env_args['file_path'],
                               base_port=env_args['port'],
                               no_graphics=not env_args['render'])
    env = InfoWrapper(env)
    env = UnityReturnWrapper(env)
    env = SamplerWrapper(env, env_args)
    return env
Example #15
0
    def _init_env(self, critic_config, ppo_config, model_root_path):
        config = self.config

        if config['build_path'] is None or config['build_path'] == '':
            self.env = UnityEnvironment()
        else:
            self.env = UnityEnvironment(file_name=config['build_path'],
                                        no_graphics=self.train_mode,
                                        base_port=config['port'],
                                        args=['--scene', config['scene']])

        self.default_brain_name = self.env.brain_names[0]

        brain_params = self.env.brains[self.default_brain_name]
        state_dim = brain_params.vector_observation_space_size
        action_dim = brain_params.vector_action_space_size[0]

        ppo_module = importlib.import_module(config['ppo'])

        class Critic(ppo_module.Critic_Custom, Critic_Base):
            pass

        class PPO(ppo_module.PPO_Custom, PPO_Base):
            pass

        self.critic = Critic(state_dim=state_dim,
                             model_root_path=model_root_path,
                             seed=config['seed'],
                             **critic_config)

        self.ppos = list()

        for i in range(config['policies_num']):
            if config['policies_num'] > 1:
                tmp_model_root_path = f'{model_root_path}/{i}'
            else:
                tmp_model_root_path = model_root_path

            if config['seed'] is None:
                seed = None
            else:
                seed = i + self.config['seed']

            logger.info(tmp_model_root_path)
            ppo = PPO(state_dim=state_dim,
                      action_dim=action_dim,
                      model_root_path=tmp_model_root_path,
                      seed=seed,
                      addition_objective=config['addition_objective'],
                      **ppo_config)
            ppo.get_v = lambda s: self.critic.get_v(s)
            self.ppos.append(ppo)
    def __init__(self, env_name="", seed=0):

        self.env_name = env_name

        # Start ML Agents Environment | Without filename in editor training is started
        log("ML AGENTS INFO")
        if self.env_name == "":
            self.env = UnityEnvironment(file_name=None, seed=seed)
        else:
            self.env = UnityEnvironment(file_name=env_name, seed=seed)
        log("END ML AGENTS INFO")

        self.info = self.env.reset()[self.default_brain_name]
Example #17
0
    def __init__(self, env_name, **kwargs) -> None:
        super().__init__()
        filename = unity_filename(env_name)
        self.unity_env = UnityEnvironment(file_name=filename, **kwargs)
        brain_name = self.unity_env.brain_names[0]
        self.name = brain_name.replace("Brain", "")
        brain = self.unity_env.brains[brain_name]

        env_info = self.unity_env.reset(train_mode=True)[brain_name]

        self.brain_name = brain_name
        self.num_agents = len(env_info.agents)
        self.num_actions = list(brain.vector_action_space_size)[0]
        self.states = env_info.vector_observations
        self.num_states = self.states.shape[1]
Example #18
0
    def reset(self, **kwargs):
        # Reset the environment
        params = {}
        self.dead = []
        self.states = []
        if 'params' in kwargs:
            params = kwargs['params']
        if not TanksWorldStackedEnv._env:
            try:
                print('WARNING: seed not set, using default')
                TanksWorldStackedEnv._env = UnityEnvironment(
                    file_name=self._filename,
                    worker_id=self._workerid,
                    seed=1234,
                    timeout_wait=500)
                print('finished initializing environment')
                TanksWorldStackedEnv._env_params['filename'] = self._filename
                TanksWorldStackedEnv._env_params['workerid'] = self._workerid
            except:
                print(
                    'ERROR: could not initialize unity environment, are filename correct and workerid not already in use by another unity instance?'
                )
                raise

        # Set the default brain to work with
        self._default_brain = self._env.brain_names[0]
        print("number of brains ", len(self._env.brain_names))
        brain = self._env.brains[self._default_brain]
        self._env_info = self._env.reset(train_mode=0,
                                         config=params)[self._default_brain]

        state = self.get_state()

        return state
Example #19
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain_infos = env.reset()
    brain_info = brain_infos[env.brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters['model_path'] = model_path
    trainer_parameters['keep_checkpoints'] = 3
    policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters,
                       False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out['action'].shape == (3, 2)
    env.close()
Example #20
0
class Drone:
    spec = None
    name = None
    action_space = None
    observation_space = None

    def __init__(
        self,
        env_path: str,
        env_name: str,
        cfg: dict,
        train_mode: bool = True,
        worker_id: int = 1,
    ):
        self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id)
        self.default_brain = self.env.brain_names[0]
        self.cfg = cfg
        self.name = env_name
        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(3, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=(9, ),
                                            dtype=np.float32)
        self.train_mode = train_mode

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode,
                                  config=self.cfg)[self.default_brain]
        return env_info.vector_observations[0]

    def step(self, action):
        env_info = self.env.step(action.tolist())[self.default_brain]
        observation = env_info.vector_observations[0]
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        info = None
        return observation, reward, done, info

    def close(self):
        self.env.close()

    def seed(self, seed):
        pass
Example #21
0
def initialize_env_model(filepath, algo, name, port):
    env = UnityEnvironment(
        file_name=filepath,
        base_port=port,
        no_graphics=True
    )
    if algo == 'pg':
        algorithm_config = Algorithms.pg_config
        model = Algorithms.PG
        policy_mode = 'ON'
    elif algo == 'ppo':
        algorithm_config = Algorithms.ppo_config
        model = Algorithms.PPO
        policy_mode = 'ON'
    elif algo == 'ddpg':
        algorithm_config = Algorithms.ddpg_config
        model = Algorithms.DDPG
        policy_mode = 'OFF'
    elif algo == 'td3':
        algorithm_config = Algorithms.td3_config
        model = Algorithms.TD3
        policy_mode = 'OFF'
    elif algo == 'sac':
        algorithm_config = Algorithms.sac_config
        model = Algorithms.SAC
        policy_mode = 'OFF'
    elif algo == 'sac_no_v':
        algorithm_config = Algorithms.sac_no_v_config
        model = Algorithms.SAC_NO_V
        policy_mode = 'OFF'
    else:
        raise Exception("Don't have this algorithm.")
    env_dir = os.path.split(filepath)[0]
    sys.path.append(env_dir)
    import env_config
    reset_config = env_config.reset_config
    max_step = env_config.max_step
    env_name = os.path.join(*fix_path(env_dir).split('/')[-2:])
    base_dir = os.path.join(r'C:/RLData'if platform.system() == "Windows" else r'/RLData', env_name, algo, name)
    brain_names = env.external_brain_names
    brains = env.brains
    models = [model(
        s_dim=brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations,
        a_counts=brains[i].vector_action_space_size[0],
        action_type=brains[i].vector_action_space_type,
        cp_dir=os.path.join(base_dir, i, 'model'),
        log_dir=os.path.join(base_dir, i, 'log'),
        excel_dir=os.path.join(base_dir, i, 'excel'),
        logger2file=False,
        out_graph=False,
        **algorithm_config
    ) for i in brain_names]
    [save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names]

    begin_episode = models[0].get_init_step()
    max_episode = models[0].get_max_episode()
    return env, brain_names, models, policy_mode, reset_config, max_step
Example #22
0
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                    [3, 4, 5, 3, 4, 5]])}
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Example #23
0
 def __init__(
     self,
     env_path: str,
     env_name: str,
     cfg: dict,
     train_mode=True,
     worker_id: int = 1,
 ):
     self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id)
     self.default_brain = self.env.brain_names[0]
     self.cfg = cfg
     self.name = env_name
     self.action_space = spaces.Discrete(5)
     self.observation_space = spaces.Box(low=0,
                                         high=255,
                                         shape=(3, 84, 84),
                                         dtype=np.uint8)
     self.train_mode = train_mode
Example #24
0
    def __init__(self,
                 env_path,
                 worker_id,
                 train_mode=True,
                 n_striker=16,
                 n_goalie=16,
                 render=True):
        self._striker_map = {
            'field': [8, 0, 4, 2, 14, 10, 12, 6, 9, 1, 5, 3, 15, 11, 13, 7],
            'team': [12, 8, 10, 9, 15, 13, 14, 11, 4, 0, 2, 1, 7, 5, 6, 3]
        }
        self._goalie_map = {
            'field': [8, 0, 4, 2, 14, 10, 12, 6, 13, 7, 11, 3, 15, 9, 5, 1],
            'team': [12, 8, 10, 9, 15, 13, 14, 11, 6, 3, 5, 1, 7, 4, 2, 0]
        }

        self._striker_inv_map = {
            'field': np.argsort(self._striker_map['field']),
            'team': np.argsort(self._striker_map['team'])
        }
        self._goalie_inv_map = {
            'field': np.argsort(self._goalie_map['field']),
            'team': np.argsort(self._goalie_map['team'])
        }

        self.env = UnityEnvironment(file_name=env_path,
                                    worker_id=0,
                                    no_graphics=not render)
        self.striker_brain_name, self.goalie_brain_name = self.env.brain_names
        self.striker_brain = self.env.brains[self.striker_brain_name]
        self.goalie_brain = self.env.brains[self.goalie_brain_name]
        self.done_striker = [False] * 16
        self.done_goalie = [False] * 16
        self.train_mode = train_mode
        self.done_hist_striker = [False] * 16
        self.done_hist_goalie = [False] * 16
        self.episode_striker_rewards = 0
        self.episode_goalie_rewards = 0
        self.n_striker = n_striker
        self.n_goalie = n_goalie

        self.observation_striker = None
        self.observation_goalie = None
        return
Example #25
0
 def create_unity_environment(worker_id: int) -> UnityEnvironment:
     env_seed = seed
     if not env_seed:
         env_seed = seed_pool[worker_id % len(seed_pool)]
     return UnityEnvironment(file_name=env_path,
                             worker_id=worker_id,
                             seed=env_seed,
                             docker_training=docker_training,
                             no_graphics=no_graphics,
                             base_port=start_port)
Example #26
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain = env.brains['RealFakeBrain']
    brain_info = env.reset()
    env.close()
    assert not env.global_done
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
    assert isinstance(brain_info['RealFakeBrain'].vector_observations,
                      np.ndarray)
    assert len(brain_info['RealFakeBrain'].visual_observations
               ) == brain.number_visual_observations
    assert len(brain_info['RealFakeBrain'].vector_observations) == \
           len(brain_info['RealFakeBrain'].agents)
    assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \
           brain.vector_observation_space_size * brain.num_stacked_vector_observations
Example #27
0
def createDiscreteActionsEnv( executableFullPath, envType = 'simple', seed = 0, workerID = 0 ) :
    _unityEnv = UnityEnvironment( executableFullPath, seed = seed, worker_id = workerID )

    if envType == 'simple' :
        return SingleAgentDiscreteActionsEnv( _unityEnv, executableFullPath )
    elif envType == 'visual' :
        return VisualBananaEnv( _unityEnv, executableFullPath )
    else :
        print( 'ERROR> multi-simulations with MPI not supported yet' )
        sys.exit( 1 )
Example #28
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        mock_communicator.return_value = MockCommunicator(
            discrete_action=False, visual_inputs=0)
        env = UnityEnvironment(' ')
        brain_infos = env.reset()
        brain_info = brain_infos[env.brain_names[0]]

        trainer_parameters = dummy_config()
        graph_scope = env.brain_names[0]
        trainer_parameters['graph_scope'] = graph_scope
        policy = PPOPolicy(0, env.brains[env.brain_names[0]],
                           trainer_parameters, sess, False)
        init = tf.global_variables_initializer()
        sess.run(init)
        run_out = policy.evaluate(brain_info)
        assert run_out['action'].shape == (3, 2)
        env.close()
Example #29
0
def loadUnityEnvironment(options):
    #print("Python version:")
    #print(sys.version)
    # check Python version
    #if (sys.version_info[0] < 3):
    #raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")
    env = UnityEnvironment(file_name=options.env_name,
                           worker_id=options.workerid,
                           seed=1)
    return env
Example #30
0
class Sokoban:
    spec = None
    name = None
    action_space = None
    observation_space = None

    def __init__(
        self,
        env_path: str,
        env_name: str,
        cfg: dict,
        train_mode=True,
        worker_id: int = 1,
    ):
        self.env = UnityEnvironment(file_name=env_path, worker_id=worker_id)
        self.default_brain = self.env.brain_names[0]
        self.cfg = cfg
        self.name = env_name
        self.action_space = spaces.Discrete(5)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(3, 84, 84),
                                            dtype=np.uint8)
        self.train_mode = train_mode

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode,
                                  config=self.cfg)[self.default_brain]
        return env_info.visual_observations[0][0].reshape(3, 84, 84)

    def step(self, action):
        env_info = self.env.step(action.tolist())[self.default_brain]
        observation = env_info.visual_observations[0][0].reshape(3, 84, 84)
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        info = None
        return observation, reward, done, info

    def close(self):
        self.env.close()

    def seed(self, seed):
        pass