def test_step():
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                mock_glob.return_value = ['FakeLaunchPath']
                mock_socket.return_value.accept.return_value = (mock_socket, 0)
                mock_socket.recv.return_value.decode.return_value = dummy_start
                env = UnityEnvironment(' ')
                brain = env.brains['RealFakeBrain']
                mock_socket.recv.side_effect = dummy_reset
                brain_info = env.reset()
                mock_socket.recv.side_effect = dummy_step
                brain_info = env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                with pytest.raises(UnityActionException):
                    env.step([0])
                brain_info = env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                with pytest.raises(UnityActionException):
                    env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                env.close()
                assert env.global_done
                assert isinstance(brain_info, dict)
                assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
                assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
                assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
                assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
                assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
                       len(brain_info['RealFakeBrain'].agents)
                assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
                       brain.vector_observation_space_size * brain.num_stacked_vector_observations
                assert not brain_info['RealFakeBrain'].local_done[0]
                assert brain_info['RealFakeBrain'].local_done[2]
def test_ppo_model_cc_visual_curio(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=2)
            env = UnityEnvironment(' ')
            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.output, model.all_probs, model.value, model.entropy,
                        model.learning_rate, model.intrinsic_reward]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                    [3, 4, 5, 3, 4, 5]]),
                         model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                         [3, 4, 5, 3, 4, 5]]),
                         model.output: [[0.0, 0.0], [0.0, 0.0]],
                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
                         model.visual_in[1]: np.ones([2, 40, 30, 3]),
                         model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
                         model.next_visual_in[1]: np.ones([2, 40, 30, 3])
                         }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
def test_initialization():
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                mock_glob.return_value = ['FakeLaunchPath']
                mock_socket.return_value.accept.return_value = (mock_socket, 0)
                mock_socket.recv.return_value.decode.return_value = dummy_start
                env = UnityEnvironment(' ')
                with pytest.raises(UnityActionException):
                    env.step([0])
                assert env.brain_names[0] == 'RealFakeBrain'
                env.close()
def test_close():
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                mock_glob.return_value = ['FakeLaunchPath']
                mock_socket.return_value.accept.return_value = (mock_socket, 0)
                mock_socket.recv.return_value.decode.return_value = dummy_start
                env = UnityEnvironment(' ')
                assert env._loaded
                env.close()
                assert not env._loaded
                mock_socket.close.assert_called_once()
Beispiel #5
0
def test_ppo_model_discrete():
    d_action_c_state_start = '''{
      "AcademyName": "RealFakeAcademy",
      "resetParameters": {},
      "brainNames": ["RealFakeBrain"],
      "externalBrainNames": ["RealFakeBrain"],
      "logPath":"RealFakePath",
      "apiNumber":"API-3",
      "brainParameters": [{
          "vectorObservationSize": 3,
          "numStackedVectorObservations": 2,
          "vectorActionSize": 2,
          "memorySize": 0,
          "cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}],
          "vectorActionDescriptions": ["",""],
          "vectorActionSpaceType": 0,
          "vectorObservationSpaceType": 1
          }]
    }'''.encode()

    tf.reset_default_graph()
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                # End of mock
                with tf.Session() as sess:
                    with tf.variable_scope("FakeGraphScope"):
                        mock_glob.return_value = ['FakeLaunchPath']
                        mock_socket.return_value.accept.return_value = (mock_socket, 0)
                        mock_socket.recv.return_value.decode.return_value = d_action_c_state_start
                        env = UnityEnvironment(' ')
                        model = PPOModel(env.brains["RealFakeBrain"])
                        init = tf.global_variables_initializer()
                        sess.run(init)

                        run_list = [model.output, model.all_probs, model.value, model.entropy,
                                    model.learning_rate]
                        feed_dict = {model.batch_size: 2,
                                     model.sequence_length: 1,
                                     model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                               [3, 4, 5, 3, 4, 5]]),
                                     model.visual_in[0]: np.ones([2, 40, 30, 3])
                                     }
                        sess.run(run_list, feed_dict=feed_dict)
                        env.close()
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                   [3, 4, 5, 3, 4, 5]])}
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.output, model.all_probs, model.value, model.entropy,
                        model.learning_rate]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                    [3, 4, 5, 3, 4, 5]])}
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Beispiel #8
0
def test_cc_bc_model():
    c_action_c_state_start = '''{
      "AcademyName": "RealFakeAcademy",
      "resetParameters": {},
      "brainNames": ["RealFakeBrain"],
      "externalBrainNames": ["RealFakeBrain"],
      "logPath":"RealFakePath",
      "apiNumber":"API-3",
      "brainParameters": [{
          "vectorObservationSize": 3,
          "numStackedVectorObservations": 2,
          "vectorActionSize": 2,
          "memorySize": 0,
          "cameraResolutions": [],
          "vectorActionDescriptions": ["",""],
          "vectorActionSpaceType": 1,
          "vectorObservationSpaceType": 1
          }]
    }'''.encode()

    tf.reset_default_graph()
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                # End of mock
                with tf.Session() as sess:
                    with tf.variable_scope("FakeGraphScope"):
                        mock_glob.return_value = ['FakeLaunchPath']
                        mock_socket.return_value.accept.return_value = (mock_socket, 0)
                        mock_socket.recv.return_value.decode.return_value = c_action_c_state_start
                        env = UnityEnvironment(' ')

                        model = BehavioralCloningModel(env.brains["RealFakeBrain"])
                        init = tf.global_variables_initializer()
                        sess.run(init)

                        run_list = [model.sample_action, model.policy]
                        feed_dict = {model.batch_size: 2,
                                     model.sequence_length: 1,
                                     model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                               [3, 4, 5, 3, 4, 5]])}
                        sess.run(run_list, feed_dict=feed_dict)
                        env.close()
Beispiel #9
0
    def __init__(self, env_spec, env_space, e=0):
        self.env_spec = env_spec
        self.env_space = env_space
        self.info_space = env_space.info_space
        self.e = e
        util.set_attr(self, self.env_spec)
        self.name = self.env_spec['name']
        self.body_e = None
        self.nanflat_body_e = None  # nanflatten version of bodies
        self.body_num = None

        worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
        self.u_env = UnityEnvironment(file_name=util.get_env_path(self.name), worker_id=worker_id)
        # spaces for NN auto input/output inference
        logger.warn('Unity environment observation_space and action_space are constructed with invalid range. Use only their shapes.')
        self.observation_spaces = []
        self.action_spaces = []
        for a in range(len(self.u_env.brain_names)):
            observation_shape = (self.get_observable_dim(a)['state'],)
            if self.get_brain(a).state_space_type == 'discrete':
                observation_space = gym.spaces.Box(low=0, high=1, shape=observation_shape, dtype=np.int32)
            else:
                observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=observation_shape, dtype=np.float32)
            self.observation_spaces.append(observation_space)
            if self.is_discrete(a):
                action_space = gym.spaces.Discrete(self.get_action_dim(a))
            else:
                action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)
            self.action_spaces.append(action_space)
        for observation_space, action_space in zip(self.observation_spaces, self.action_spaces):
            set_gym_space_attr(observation_space)
            set_gym_space_attr(action_space)

        # TODO experiment to find out optimal benchmarking max_timestep, set
        # TODO ensure clock_speed from env_spec
        self.clock_speed = 1
        self.clock = Clock(self.clock_speed)
        self.done = False
Beispiel #10
0
    plt.plot(scores["index"],
             scores["scores_avg"],
             color=sns.xkcd_rgb["amber"])
    plt.legend(["Scores", "MA(%d)" % window_size])

    if ARGS.double_dqn:
        plt.savefig(os.path.join(ARGS.figure_dir, "score_plot_double_dqn.png"))
    elif ARGS.dueling_dqn:
        plt.savefig(os.path.join(ARGS.figure_dir,
                                 "score_plot_dueling_dqn.png"))
    else:
        plt.savefig(os.path.join(ARGS.figure_dir, "score_plot_dqn.png"))


if __name__ == "__main__":
    env = UnityEnvironment(file_name="Banana_Linux_NoVis/Banana.x86_64",
                           worker_id=1,
                           seed=1)

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # initialize an agent
    agent = Agent(state_size=37, action_size=4, seed=1, args=ARGS)

    # training a dqn agent
    scores = dqn(n_episodes=3000, max_t=1000)

    # visualization
    plot_scores(scores, window_size=ARGS.window_size)
Beispiel #11
0
# Imports
import agent
from unityagents import UnityEnvironment
import train_model as tm

env = UnityEnvironment(file_name="./Banana_Windows_x86_64/Banana.exe")

# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# Reset environment
env_info = env.reset(train_mode=True)[brain_name]

# Size of task (parameters we need)
state = env_info.vector_observations[0]
state_size = len(state)
action_space_size = brain.vector_action_space_size

# Initialise an agent
agent = agent.Agent(state_size, action_space_size)

# Train agent with environment and rewards
scores = tm.deep_q_learning(agent, env, brain_name)

# Plot scores
tm.plot_scores(scores)

# Properly close environment
env.close()
Beispiel #12
0
                    default=5e-4,
                    metavar='L',
                    help='discount factor (default: 0.99)')
parser.add_argument('--seed',
                    type=int,
                    default=123,
                    metavar='N',
                    help='random seed (default: 123)')
parser.add_argument('--render',
                    action='store_true',
                    help='render the environment')

args = parser.parse_args()

# creating Banana unity environment instance
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64",
                       no_graphics=True)

brain_name = env.brain_names[0]
brain = env.brains[brain_name]

env_info = env.reset(train_mode=True)[brain_name]
action_size = brain.vector_action_space_size
state = env_info.vector_observations[0]
state_size = len(state)

# policy instance (model)
policy = Policy(state_size, action_size)

# load the pre-trained weight file
# policy.load_state_dict(torch.load('checkpoint_re.pth'))
# policy.train()
# python test_agent.py --model checkpoint.pth
import argparse
import sys
import os

from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch

from dqn_agent import Agent

if __name__ == '__main__':
    
    env = UnityEnvironment(file_name="VisualBanana.app")

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents in the environment
    print('Number of agents:', len(env_info.agents))

    # number of actions
    action_size = brain.vector_action_space_size
    print('Number of actions:', action_size)
Beispiel #14
0
def train_agent(
    env: unityagents.UnityEnvironment,
    agent: agents.DDPGAgent,
    n_episodes: int = 200,
    mean_score_threshold: float = 30.0,
    max_t: int = 1000,
    has_ou_noise: bool = True,
    scores_maxlen: int = 100,
    ou_noise_sigma_start: float = 0.5,
    ou_noise_sigma_end: float = 0.01,
    ou_noise_sigma_decay: float = 0.99,
    n_random_episodes: int = 100,
    logging_freq: int = 10,
    checkpoints_dir: typing.Optional[pathlib.Path] = None,
    checkpoints_freq: int = 50,
) -> pd.DataFrame:
    """
    Train agent for Unity Tennis environment and return results.

    Parameters
    ----------
    env
        Unity environment
    agent
        And instance of Deep Reinforcement Learning Agent from drl_ctrl.agents module
    n_episodes
        Maximum number of episodes
    mean_score_threshold
        Threshold of mean last 100 weights to stop training and save results
    max_t
        Maximum number of time steps per episode
    has_ou_noise
        If True, Ornstein-Uhlenbeck noise is added to actions
    scores_maxlen
        Maximum length of scores window
    ou_noise_sigma_start
        Ornstein-Uhlenbeck noise sigma starting value per episode
    ou_noise_sigma_end
        Ornstein-Uhlenbeck noise sigma minimum value per episode
    ou_noise_sigma_decay
        Ornstein-Uhlenbeck noise sigma multiplicative decay
    n_random_episodes
        Number of random episodes to gather experience
    logging_freq
        Logging frequency
    checkpoints_dir
        Model checkpoints output directory
    checkpoints_freq
        Checkpoint frequency to check if agent scores achieves average score threshold

    """

    logger = logging.getLogger(__name__)

    scores = []
    scores_avg100 = []
    scores_window = deque(maxlen=scores_maxlen)
    time_started = time.time()
    times_total = []
    times_per_episode = []
    time_steps = []

    i_last_checkpoint = 0
    for i_episode in range(1, (n_random_episodes + n_episodes + 1)):

        time_started_episode = time.time()

        brain_name = env.brain_names[0]
        env_info = env.reset(train_mode=True)[brain_name]
        agent.reset()
        states = env_info.vector_observations
        num_agents = len(env_info.agents)
        agent_scores = np.zeros(num_agents)

        ou_noise_sigma = ou_noise_sigma_start

        t = 1
        while True:
            # choose action (for each agent)
            if i_episode <= n_random_episodes:
                action_size = env.brains[brain_name].vector_action_space_size
                actions = np.random.randn(num_agents, action_size)
                actions = np.clip(actions, -1, 1)
            else:
                actions = agent.act(states,
                                    ou_noise_sigma=ou_noise_sigma,
                                    add_noise=has_ou_noise)
            ou_noise_sigma = max(ou_noise_sigma_end,
                                 ou_noise_sigma * ou_noise_sigma_decay)

            # take action in the environment(for each agent)
            env_info = env.step(actions)[brain_name]

            # get next state (for each agent)
            next_states = env_info.vector_observations

            # see if episode finished
            dones = env_info.local_done

            # update the score (for each agent)
            agent_scores += env_info.rewards

            if i_episode <= n_random_episodes:
                agent.memory.add_batch(states, actions, env_info.rewards,
                                       next_states, dones)
            else:
                agent.step(states, actions, env_info.rewards, next_states,
                           dones)

            # roll over states to next time step
            states = next_states

            # exit loop if episode finished
            if np.any(dones):
                break
            t += 1

        score = float(np.max(agent_scores))
        scores_window.append(score)
        scores.append(score)
        scores_avg100.append(np.mean(scores_window))

        times_total.append(time.time() - time_started)
        times_per_episode.append(time.time() - time_started_episode)
        time_steps.append(t)

        if i_episode % logging_freq == 0:
            logger.info(f'\rEp: {i_episode}'
                        f'\tSigma({t}): {ou_noise_sigma:.3f}'
                        f'\tScore: {score:.2f}'
                        f'\tAvg. Score: {np.mean(scores_window):.2f}'
                        f'\tTime_e: {times_per_episode[-1]:.3f}s'
                        f'\tTime: {times_total[-1]:.3f}s')

        if len(scores_window) == scores_maxlen and np.mean(
                scores_window) >= mean_score_threshold:
            if (checkpoints_dir is not None and
                ((i_episode - i_last_checkpoint) % checkpoints_freq) == 0):
                checkpoint_dir = checkpoints_dir.joinpath(
                    f"episode_{i_episode}")
                checkpoint_dir.mkdir(parents=True, exist_ok=True)

                torch.save(
                    agent.actor_local.state_dict(),
                    str(path_util.mk_path_weights_actor_local(checkpoint_dir)))
                torch.save(
                    agent.actor_target.state_dict(),
                    str(path_util.mk_path_weights_actor_target(
                        checkpoint_dir)))
                torch.save(
                    agent.critic_local.state_dict(),
                    str(path_util.mk_path_weights_critic_local(
                        checkpoint_dir)))
                torch.save(
                    agent.critic_target.state_dict(),
                    str(
                        path_util.mk_path_weights_critic_target(
                            checkpoints_dir)))

                logger.info(
                    f'\nSaved model checkpoint to {str(checkpoints_dir)}')
            else:
                logger.info(
                    f'\nEnvironment solved in {i_episode - 100:d} episodes!'
                    f'\nScore: {score:.2f}'
                    f'\tAverage Score: {np.mean(scores_window):.2f}'
                    f'\tAverage Time_e: {np.mean(times_per_episode):.3f}s'
                    f'\tTotal Time: {times_total[-1]:.3f}s')
                break

    return pd.DataFrame.from_records(
        zip(range(len(scores)), scores, scores_avg100, time_steps,
            times_per_episode, times_total),
        columns=[
            cfg.COL_EPISODE, cfg.COL_SCORE, cfg.COL_SCORE_AVG100,
            cfg.COL_N_TIME_STEPS, cfg.COL_TIME_PER_EPISODE, cfg.COL_TIME_TOTAL
        ])
Beispiel #15
0
from unityagents import UnityEnvironment
from utils import sim_act
from model import DeterministicActor
import numpy as np
import torch

# sim options
NUM_SIMS = 5  # Maximum number of training episodes

# observation and action specs of each agent
osize = 24
asize = 2

# create environment
env = UnityEnvironment(file_name='tennis.app')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# initialize actors and critics
actor = DeterministicActor(osize, asize, seed=0)
actor.load_state_dict(torch.load('checkpoint_actor_cpu.pth'))

# ------  Train loop -------

for ep_count in range(NUM_SIMS):

    # reset the environment
    env_info = env.reset(train_mode=False)[brain_name]
Beispiel #16
0
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
#%matplotlib inline


from unityagents import UnityEnvironment
from drl.agent import Agent

# get environment
env = UnityEnvironment(file_name='Tennis_Linux/Tennis.x86_64')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

env_info = env.reset(train_mode=False)[brain_name]
num_agents = len(env_info.agents)
action_size = brain.vector_action_space_size
states = env_info.vector_observations
state_size = states.shape[1]

agent = Agent(state_size=state_size, action_size=action_size, random_seed=1337)
agent.load(torch.load("checkpoints/checkpoint_actor_v2_solved.pth"),
           torch.load("checkpoints/checkpoint_critic_v2_solved.pth"));

print_every=100

scores_deque = deque(maxlen=print_every)
scores_total = []
Beispiel #17
0
# # Multiple env classes simultaneously
# env_path = environment.get_env_path('3dball')
# env_1 = UnityEnvironment(file_name=env_path, worker_id=1)
# env_path = environment.get_env_path('gridworld')
# env_2 = UnityEnvironment(file_name=env_path, worker_id=2)
# env_1.reset(train_mode=False)
# env_2.reset(train_mode=False)

env_path = util.get_env_path('gridworld')
# use train_mode = False to debug, i.e. render env at real size, real time
train_mode = False

# UnityEnvironment interfaces python with Unity,
# and contains brains for controlling connected agents.
env = UnityEnvironment(file_name=env_path)
print(str(env))

# get the default brain
default_brain = env.brain_names[0]
brain = env.brains[default_brain]
env_info = env.reset(train_mode=train_mode)[default_brain]
'''
is_continuous = (brain.action_space_type == 'continuous')
use_observations = (brain.number_observations > 0)
use_states = (brain.state_space_size > 0)

- reset env with param, returns dict of {brain: BrainInfo}
env.reset(train_mode=train_mode)
env_info = env.reset(train_mode=train_mode)[default_brain]
Beispiel #18
0
        config = JobConfig(filename)
        train_ddpg_agent_job(config)

    elif command == "hyperopt":
        if args[0] == '-f':
            metaconf_file = args[1]
            path = generate_random_configuration_files(metaconf_file)

        else:
            raise (ValueError, f"Unknown parameter{args[0]}")
    elif command == "demo":
        try:
            checkpoint_path = args[0]
            conf = JobConfig(checkpoint_path + "config.yml")

            env = UnityEnvironment(
                file_name="./resources/Tennis_Linux/Tennis.x86_64")
            brain_name = env.brain_names[0]
            brain = env.brains[brain_name]

            env_info = env.reset(train_mode=False)[brain_name]
            num_agents = len(env_info.agents)
            action_size = brain.vector_action_space_size

            states = env_info.vector_observations
            state_size = states.shape[1]

            agent = DDPGAgent(state_size, action_size, conf.random_seed,
                              conf.buffer_size, conf.batch_size, conf.gamma,
                              conf.tau, conf.lr_actor, conf.lr_critic,
                              conf.weight_decay, conf.sigma,
                              conf.actor_nn_size, conf.critic_nn_size,
Beispiel #19
0
save_freq = int(options['--save-freq'])
env_name = options['<env>']

# Algorithm-specific parameters for tuning
gamma = float(options['--gamma'])
lambd = float(options['--lambd'])
time_horizon = int(options['--time-horizon'])
beta = float(options['--beta'])
num_epoch = int(options['--num-epoch'])
epsilon = float(options['--epsilon'])
buffer_size = int(options['--buffer-size'])
learning_rate = float(options['--learning-rate'])
hidden_units = int(options['--hidden-units'])
batch_size = int(options['--batch-size'])

env = UnityEnvironment(file_name=env_name)
print(str(env))
brain_name = env.brain_names[0]

tf.reset_default_graph()

# Create the Tensorflow model graph
ppo_model = create_agent_model(env, lr=learning_rate,
                               h_size=hidden_units, epsilon=epsilon,
                               beta=beta)

is_continuous = (env.brains[brain_name].action_space_type == "continuous")
use_observations = (env.brains[brain_name].number_observations > 0)

if not os.path.exists(model_path):
    os.makedirs(model_path)
from unityagents import UnityEnvironment
from tqdm import tqdm
import numpy as np
import torch
from torch import FloatTensor, LongTensor, cuda
import sys

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if sys.platform == "darwin":
    env = UnityEnvironment(file_name="./Banana.app")
else:
    env = UnityEnvironment(file_name="/data/Banana_Linux_NoVis/Banana.x86_64")
    # env = UnityEnvironment(file_name="./Banana_Linux")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

print(brain_name)
print(brain)

state_size = brain.vector_observation_space_size
print("State size: ", state_size)

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))
Beispiel #21
0
from unityagents import UnityEnvironment
import numpy as np
import torch
import matplotlib.pyplot as plt
from agent import Agent

num_agents = 1

# please do not modify the line below
env = UnityEnvironment(file_name="Reacher_Windows_x86_64/Reacher.exe")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]


# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
Beispiel #22
0
from unityagents import UnityEnvironment
import numpy as np
from tqdm import tqdm
from agent import DQAgent
from utils import draw

unity_environment_path = "./Banana_Linux/Banana.x86_64"
best_model_path = "./best_model.checkpoint"

if __name__ == "__main__":
    # prepare environment
    env = UnityEnvironment(file_name=unity_environment_path)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]

    # number of actions
    action_size = brain.vector_action_space_size

    # examine the state space
    state = env_info.vector_observations[0]
    state_size = len(state)

    agent = DQAgent(state_size, action_size)
    agent.load(best_model_path)

    test_scores = []
    for i_episode in tqdm(range(1, 101)):
        score = 0  # initialize the score
        env_info = env.reset(
            train_mode=True)[brain_name]  # reset the environment
Beispiel #23
0
class UnityEnv:
    '''
    Class for all Envs.
    Standardizes the UnityEnv design to work in Lab.
    Access Agents properties by: Agents - AgentSpace - AEBSpace - EnvSpace - Envs
    '''

    def __init__(self, env_spec, env_space, e=0):
        self.env_spec = env_spec
        self.env_space = env_space
        self.info_space = env_space.info_space
        self.e = e
        util.set_attr(self, self.env_spec)
        self.name = self.env_spec['name']
        self.body_e = None
        self.nanflat_body_e = None  # nanflatten version of bodies
        self.body_num = None

        worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
        self.u_env = UnityEnvironment(file_name=util.get_env_path(self.name), worker_id=worker_id)
        # spaces for NN auto input/output inference
        logger.warn('Unity environment observation_space and action_space are constructed with invalid range. Use only their shapes.')
        self.observation_spaces = []
        self.action_spaces = []
        for a in range(len(self.u_env.brain_names)):
            observation_shape = (self.get_observable_dim(a)['state'],)
            if self.get_brain(a).state_space_type == 'discrete':
                observation_space = gym.spaces.Box(low=0, high=1, shape=observation_shape, dtype=np.int32)
            else:
                observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=observation_shape, dtype=np.float32)
            self.observation_spaces.append(observation_space)
            if self.is_discrete(a):
                action_space = gym.spaces.Discrete(self.get_action_dim(a))
            else:
                action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)
            self.action_spaces.append(action_space)
        for observation_space, action_space in zip(self.observation_spaces, self.action_spaces):
            set_gym_space_attr(observation_space)
            set_gym_space_attr(action_space)

        # TODO experiment to find out optimal benchmarking max_timestep, set
        # TODO ensure clock_speed from env_spec
        self.clock_speed = 1
        self.clock = Clock(self.clock_speed)
        self.done = False

    def check_u_brain_to_agent(self):
        '''Check the size match between unity brain and agent'''
        u_brain_num = self.u_env.number_brains
        agent_num = len(self.body_e)
        assert u_brain_num == agent_num, f'There must be a Unity brain for each agent. e:{self.e}, brain: {u_brain_num} != agent: {agent_num}.'

    def check_u_agent_to_body(self, env_info_a, a):
        '''Check the size match between unity agent and body'''
        u_agent_num = len(env_info_a.agents)
        body_num = util.count_nonan(self.body_e[a])
        assert u_agent_num == body_num, f'There must be a Unity agent for each body; a:{a}, e:{self.e}, agent_num: {u_agent_num} != body_num: {body_num}.'

    def get_brain(self, a):
        '''Get the unity-equivalent of agent, i.e. brain, to access its info'''
        name_a = self.u_env.brain_names[a]
        brain_a = self.u_env.brains[name_a]
        return brain_a

    def get_env_info(self, env_info_dict, a):
        name_a = self.u_env.brain_names[a]
        env_info_a = env_info_dict[name_a]
        return env_info_a

    @lab_api
    def post_body_init(self):
        '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
        self.nanflat_body_e = util.nanflatten(self.body_e)
        for idx, body in enumerate(self.nanflat_body_e):
            body.nanflat_e_idx = idx
        self.body_num = len(self.nanflat_body_e)
        self.check_u_brain_to_agent()
        logger.info(util.self_desc(self))

    def is_discrete(self, a):
        '''Check if an agent (brain) is subject to discrete actions'''
        return self.get_brain(a).is_discrete()

    def get_action_dim(self, a):
        '''Get the action dim for an agent (brain) in env'''
        return self.get_brain(a).get_action_dim()

    def get_action_space(self, a):
        return self.action_spaces[a]

    def get_observable_dim(self, a):
        '''Get the observable dim for an agent (brain) in env'''
        return self.get_brain(a).get_observable_dim()

    def get_observable_types(self, a):
        '''Get the observable for an agent (brain) in env'''
        return self.get_brain(a).get_observable_types()

    def get_observation_space(self, a):
        return self.observation_spaces[a]

    @lab_api
    def reset(self):
        self.done = False
        env_info_dict = self.u_env.reset(train_mode=(util.get_lab_mode() != 'dev'), config=self.env_spec.get('unity'))
        _reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
        for (a, b), body in util.ndenumerate_nonan(self.body_e):
            env_info_a = self.get_env_info(env_info_dict, a)
            self.check_u_agent_to_body(env_info_a, a)
            state = env_info_a.states[b]
            state_e[(a, b)] = state
            done_e[(a, b)] = self.done
        return _reward_e, state_e, done_e

    @lab_api
    def step(self, action_e):
        # TODO implement clock_speed: step only if self.clock.to_step()
        if self.done:
            return self.reset()
        action_e = util.nanflatten(action_e)
        env_info_dict = self.u_env.step(action_e)
        reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
        for (a, b), body in util.ndenumerate_nonan(self.body_e):
            env_info_a = self.get_env_info(env_info_dict, a)
            reward_e[(a, b)] = env_info_a.rewards[b]
            state_e[(a, b)] = env_info_a.states[b]
            done_e[(a, b)] = env_info_a.local_done[b]
        self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep)
        return reward_e, state_e, done_e

    @lab_api
    def close(self):
        self.u_env.close()
Beispiel #24
0
            score = 0
            env_info = env.reset(train_mode=False)[brain_name]
            state = env_info.vector_observations[0]
            while True:
                action = agent.act(state)
                env_info = env.step(action)[brain_name]
                next_state = env_info.vector_observations[
                    0]  # get the next state
                reward = env_info.rewards[0]  # get the reward
                done = env_info.local_done[0]  # see if episode has finished
                state = next_state
                score += reward
                if done:
                    break
            print('episodes %d get score %d' % (i, score))
    env.close()


if __name__ == '__main__':
    env = UnityEnvironment(file_name="./Banana_Env/Banana_Linux/Banana.x86_64")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    # number of actions
    action_size = brain.vector_action_space_size
    # examine the state space
    state = env_info.vector_observations[0]
    state_size = len(state)
    agent = getattr(agent, cfgs.AGENT_TYPE)(state_size, action_size, seed=0)
    show_results(env, brain_name, agent)
    parser.add_argument('--model',
                        type=str,
                        help='Path to trained model',
                        default='checkpoint.pth')
    parser.add_argument('--type',
                        type=str,
                        help='NN type - NoisyDueling, Dueling or Q',
                        default='NoisyDueling')
    parser.add_argument('--cuda', dest='cuda', action='store_true')
    parser.add_argument('--no_cuda', dest='cuda', action='store_false')
    parser.set_defaults(cuda=True)

    print('Testing')
    args = parser.parse_args()

    env = UnityEnvironment(file_name=args.environment)

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=False)[brain_name]

    # initialize agent
    action_size = brain.vector_action_space_size
    state = env_info.vector_observations[0]
    state_size = len(state)
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  seed=0,
Beispiel #26
0
# ### 1. Start the Environment
#
# Run the next code cell to install a few packages.  This line will take a few minutes to run!

# The environments corresponding to both versions of the environment are already saved in the Workspace and can be accessed at the file paths provided below.
#
# Please select one of the two options below for loading the environment.

# In[3]:

from unityagents import UnityEnvironment
import numpy as np

# select this option to load version 1 (with a single agent) of the environment

env = UnityEnvironment(file_name='Tennis_Linux/Tennis.x86_64',
                       no_graphics=True)

# Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. Here we check for the first brain available, and set it as the default brain we will be controlling from Python.

# In[4]:

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
for brains in env.brain_names:
    print(brains)

# ### 2. Examine the State and Action Spaces
#
# Run the code cell below to print some information about the environment.
Beispiel #27
0
def evaluate(agent_dir: Path,
             number_of_episodes: int = 1000,
             maximum_timestaps: int = 1000,
             environment_path: str = DEFAULT_ENVIRONMENT_EXECUTABLE_PATH):
    """Evaluate an agent on some episodes. Note that the agent is not trained during the evaluation and the
    exploration is set to 0. Thus the results really reflect the final performance of the agent."""
    agent_path = agent_dir / 'checkpoint.pth'
    if not agent_path.exists():
        logging.warning(f'No saved parameters found for agent in {agent_dir}.')
        return
    hist_path = agent_dir / 'evaluation_histogram.png'
    scores_path = agent_dir / 'scores_evaluation.csv'

    env = UnityEnvironment(file_name=environment_path, no_graphics=True)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=True)[brain_name]
    state_size = len(env_info.vector_observations[0])

    agent = DqnAgent(state_size=state_size,
                     action_size=action_size,
                     device=DEVICE)
    agent.load(agent_path)

    scores = []

    for _ in tqdm(list(range(1, number_of_episodes + 1))):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        for t in range(maximum_timestaps):
            action = agent.act(state, epsilon=0.0)

            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]

            state = next_state
            score += reward
            if done:
                break
        scores.append(score)

    scores_ts = pd.Series(scores)

    plt.hist(scores, bins=100, color='steelblue')
    xlim = plt.ylim()
    med = scores_ts.median()
    plt.vlines(med,
               *xlim,
               linewidth=2,
               linestyle='--',
               color='orange',
               label=f'median: {med}')
    plt.legend()

    plt.savefig(hist_path)
    scores_ts.to_csv(scores_path, index=False)
args = vars(parser.parse_args())
print(args)

for key, value in args.items():
    exec(f'{key} = {value}')

os.system(f'mkdir -p results/model-{model_num}')
with open(f'results/model-{model_num}/training_params.json', 'w') as outfile:
    json.dump(args, outfile)

# env = UnityEnvironment(file_name="Tennis_Linux/Tennis.x86_64", worker_id=int(f'4{model_num}'))
# env = UnityEnvironment(file_name="Tennis_Linux_NoVis/Tennis.x86_64", worker_id=int(f'{model_num}'))
# env = UnityEnvironment(file_name="Soccer_Linux/Soccer.x86_64", worker_id=int(f'5{model_num}'))
# env = UnityEnvironment(file_name="Soccer_Linux_NoVis/Soccer.x86_64", worker_id=int(f'5{model_num}'))
env = UnityEnvironment(file_name="Tennis.app", worker_id=1000)

brain_name = env.brain_names[0]
brain = env.brains[brain_name]

env_info = env.reset(train_mode=True)[brain_name]
num_agents = len(env_info.agents)
action_size = brain.vector_action_space_size
states = env_info.vector_observations
state_size = states.shape[1]

agent = Agents(state_size=state_size, action_size=action_size, num_agents=num_agents,
               random_seed=seed, fc1_units=fc1_units, fc2_units=fc2_units, BUFFER_SIZE=BUFFER_SIZE,
               BATCH_SIZE=BATCH_SIZE, GAMMA=GAMMA, TAU=TAU, LR_ACTOR=LR_ACTOR,
               LR_CRITIC=LR_CRITIC, CRITIC_WEIGHT_DECAY=CRITIC_WEIGHT_DECAY)
Beispiel #29
0
def main():
    env = UnityEnvironment(file_name="./Tennis_Linux/Tennis.x86_64")
    print_env_info(env)
    random_play(env)
    env.close()
                    default='cuda',
                    help="Select device for training and inference")
parser.add_argument('--discount-rate', type=float, default=0.99, help='')
parser.add_argument('--tau', type=float, default=0.95, help='')
parser.add_argument('--gradient-clip', type=float, default=5, help='')
parser.add_argument('--rollout-length', type=int, default=2048, help='')
parser.add_argument('--ppo-epochs', type=int, default=10, help='')
parser.add_argument('--ppo-clip', type=float, default=2.0, help='')
parser.add_argument('--batch-size', type=int, default=32, help='')
parser.add_argument('--entropy-coefficent', type=float, default=1E-2, help='')
parser.add_argument('--required-reward', type=float, default=30, help='')
parser.add_argument('--learning-rate', type=float, default=3E-4, help='')
parser.add_argument('--hidden-units', type=int, default=512, help='')
args = parser.parse_args()

env = UnityEnvironment(file_name='../Reacher_Linux/Reacher.x86_64')
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=True)[brain_name]

_STATE_SIZE = env_info.vector_observations.shape[1]
_NUM_ACTIONS = brain.vector_action_space_size
_NUM_AGENTS = len(env_info.agents)


def play(policy, args):
    env_info = env.reset(train_mode=True)[brain_name]
    states = env_info.vector_observations
    scores = np.zeros(_NUM_AGENTS)
    while True:
        actions, _, _, _ = policy(torch.FloatTensor(states).to(args.device))
                   '--eps_start',
                   help='starting value of epsilon',
                   nargs=1)
parse.add_argument('-e', '--eps_end', help='minimum value of epsilon', nargs=1)
parse.add_argument(
    '-d',
    '--eps_decay',
    help='multiplicative factor (per episode) for decreasing epsilon',
    nargs=1)

# set parameters for agent training
n_episodes, max_t, eps_start, eps_end, eps_decay = arg_parser(
    parse.parse_args())

# start environment
env = UnityEnvironment(file_name=ENVIRONMENT_PATH)

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions available to the agent
action_size = brain.vector_action_space_size

# examine state space
state = env_info.vector_observations[0]

# get size of the state
import gym
import random
import torch
import numpy as np
import sys

from tqdm import tqdm
from unityagents import UnityEnvironment
from collections import deque
import matplotlib.pyplot as plt

from ddpg_agent import Agent, ReplayBuffer, OUNoise

env = UnityEnvironment(file_name='app/Reacher.app')
print('Loaded env')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: {}".format(device))

# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
print(brain)

# Reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# Number of agents
num_agents = len(env_info.agents)
print('Number of agents:{}'.format(num_agents))
class TrainerController(object):
    def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,
                 worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
                 no_graphics):
        """
        :param env_path: Location to the environment executable to be loaded.
        :param run_id: The sub-directory name for model and summary statistics
        :param save_freq: Frequency at which to save model
        :param curriculum_file: Curriculum json file for environment
        :param fast_simulation: Whether to run the game at training speed
        :param load: Whether to load the model or randomly initialize
        :param train: Whether to train model, or only run inference
        :param worker_id: Number to add to communication port (5005). Used for multi-environment
        :param keep_checkpoints: How many model checkpoints to keep
        :param lesson: Start learning from this lesson
        :param seed: Random seed used for training.
        :param docker_target_name: Name of docker volume that will contain all data.
        :param trainer_config_path: Fully qualified path to location of trainer configuration file
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        """
        self.trainer_config_path = trainer_config_path
        if env_path is not None:
            env_path = (env_path.strip()
                        .replace('.app', '')
                        .replace('.exe', '')
                        .replace('.x86_64', '')
                        .replace('.x86', ''))  # Strip out executable extensions if passed
        # Recognize and use docker volume if one is passed as an argument
        if docker_target_name == '':
            self.docker_training = False
            self.model_path = './models/{run_id}'.format(run_id=run_id)
            self.curriculum_file = curriculum_file
            self.summaries_dir = './summaries'
        else:
            self.docker_training = True
            self.model_path = '/{docker_target_name}/models/{run_id}'.format(
                docker_target_name=docker_target_name,
                run_id=run_id)
            if env_path is not None:
                env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
                                                                     env_name=env_path)
            if curriculum_file is None:
                self.curriculum_file = None
            else:
                self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format(
                    docker_target_name=docker_target_name,
                    curriculum_file=curriculum_file)
            self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
        self.logger = logging.getLogger("unityagents")
        self.run_id = run_id
        self.save_freq = save_freq
        self.lesson = lesson
        self.fast_simulation = fast_simulation
        self.load_model = load
        self.train_model = train
        self.worker_id = worker_id
        self.keep_checkpoints = keep_checkpoints
        self.trainers = {}
        if seed == -1:
            seed = np.random.randint(0, 999999)
        self.seed = seed
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
                                    curriculum=self.curriculum_file, seed=self.seed,
                                    docker_training=self.docker_training,
                                    no_graphics=no_graphics)
        if env_path is None:
            self.env_name = 'editor_'+self.env.academy_name
        else:
            self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment

    def _get_progress(self):
        if self.curriculum_file is not None:
            progress = 0
            if self.env.curriculum.measure_type == "progress":
                for brain_name in self.env.external_brain_names:
                    progress += self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
                return progress / len(self.env.external_brain_names)
            elif self.env.curriculum.measure_type == "reward":
                for brain_name in self.env.external_brain_names:
                    progress += self.trainers[brain_name].get_last_reward
                return progress
            else:
                return None
        else:
            return None

    def _process_graph(self):
        nodes = []
        scopes = []
        for brain_name in self.trainers.keys():
            if self.trainers[brain_name].graph_scope is not None:
                scope = self.trainers[brain_name].graph_scope + '/'
                if scope == '/':
                    scope = ''
                scopes += [scope]
                if self.trainers[brain_name].parameters["trainer"] == "imitation":
                    nodes += [scope + x for x in ["action"]]
                else:
                    nodes += [scope + x for x in ["action", "value_estimate", "action_probs"]]
                if self.trainers[brain_name].parameters["use_recurrent"]:
                    nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
        if len(scopes) > 1:
            self.logger.info("List of available scopes :")
            for scope in scopes:
                self.logger.info("\t" + scope)
        self.logger.info("List of nodes to export :")
        for n in nodes:
            self.logger.info("\t" + n)
        return nodes

    def _save_model(self, sess, saver, steps=0):
        """
        Saves current model to checkpoint folder.
        :param sess: Current Tensorflow session.
        :param steps: Current number of steps in training process.
        :param saver: Tensorflow saver for session.
        """
        last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
        saver.save(sess, last_checkpoint)
        tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
        self.logger.info("Saved Model")

    def _export_graph(self):
        """
        Exports latest saved model to .bytes format for Unity embedding.
        """
        target_nodes = ','.join(self._process_graph())
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb',
                                  input_binary=True,
                                  input_checkpoint=ckpt.model_checkpoint_path,
                                  output_node_names=target_nodes,
                                  output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes',
                                  clear_devices=True, initializer_nodes="", input_saver="",
                                  restore_op_name="save/restore_all", filename_tensor_name="save/Const:0")

    def _initialize_trainers(self, trainer_config, sess):
        trainer_parameters_dict = {}
        self.trainers = {}
        for brain_name in self.env.external_brain_names:
            trainer_parameters = trainer_config['default'].copy()
            if len(self.env.external_brain_names) > 1:
                graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                trainer_parameters['graph_scope'] = graph_scope
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id) + '_' + graph_scope)
            else:
                trainer_parameters['graph_scope'] = ''
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id))
            if brain_name in trainer_config:
                _brain_key = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                for k in trainer_config[_brain_key]:
                    trainer_parameters[k] = trainer_config[_brain_key][k]
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     trainer_parameters_dict[brain_name],
                                                                     self.train_model, self.seed)
            elif trainer_parameters_dict[brain_name]['trainer'] == "ppo":
                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))

    def _load_config(self):
        try:
            with open(self.trainer_config_path) as data_file:
                trainer_config = yaml.load(data_file)
                return trainer_config
        except IOError:
            raise UnityEnvironmentException("""Parameter file could not be found here {}.
                                            Will use default Hyper parameters"""
                                            .format(self.trainer_config_path))
        except UnicodeDecodeError:
            raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
                                            .format(self.trainer_config_path))

    @staticmethod
    def _create_model_path(model_path):
        try:
            if not os.path.exists(model_path):
                os.makedirs(model_path)
        except Exception:
            raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
                                            " Please make sure the permissions are set correctly."
                                            .format(model_path))

    def start_learning(self):
        self.env.curriculum.set_lesson_number(self.lesson)
        trainer_config = self._load_config()
        self._create_model_path(self.model_path)

        tf.reset_default_graph()

        with tf.Session() as sess:
            self._initialize_trainers(trainer_config, sess)
            for k, t in self.trainers.items():
                self.logger.info(t)
            init = tf.global_variables_initializer()
            saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
            # Instantiate model parameters
            if self.load_model:
                self.logger.info('Loading Model...')
                ckpt = tf.train.get_checkpoint_state(self.model_path)
                if ckpt is None:
                    self.logger.info('The model {0} could not be found. Make sure you specified the right '
                                     '--run-id'.format(self.model_path))
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)
            global_step = 0  # This is only for saving the model
            self.env.curriculum.increment_lesson(self._get_progress())
            curr_info = self.env.reset(train_mode=self.fast_simulation)
            if self.train_model:
                for brain_name, trainer in self.trainers.items():
                    trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
            try:
                while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model:
                    if self.env.global_done:
                        self.env.curriculum.increment_lesson(self._get_progress())
                        curr_info = self.env.reset(train_mode=self.fast_simulation)
                        for brain_name, trainer in self.trainers.items():
                            trainer.end_episode()
                    # Decide and take an action
                    take_action_vector, take_action_memories, take_action_text, take_action_outputs = {}, {}, {}, {}
                    for brain_name, trainer in self.trainers.items():
                        (take_action_vector[brain_name],
                         take_action_memories[brain_name],
                         take_action_text[brain_name],
                         take_action_outputs[brain_name]) = trainer.take_action(curr_info)
                    new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories,
                                             text_action=take_action_text)
                    for brain_name, trainer in self.trainers.items():
                        trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
                        trainer.process_experiences(curr_info, new_info)
                        if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
                            # Perform gradient descent with experience buffer
                            trainer.update_model()
                        # Write training statistics to Tensorboard.
                        trainer.write_summary(self.env.curriculum.lesson_number)
                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
                            trainer.increment_step_and_update_last_reward()
                    if self.train_model:
                        global_step += 1
                    if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
                        # Save Tensorflow model
                        self._save_model(sess, steps=global_step, saver=saver)
                    curr_info = new_info
                # Final save Tensorflow model
                if global_step != 0 and self.train_model:
                    self._save_model(sess, steps=global_step, saver=saver)
            except KeyboardInterrupt:
                print('--------------------------Now saving model-------------------------')
                if self.train_model:
                    self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
                    self._save_model(sess, steps=global_step, saver=saver)
                pass
        self.env.close()
        if self.train_model:
            self._export_graph()
def load_env(env_path: str, no_graphics: bool = False) -> UnityEnvironment:
    return UnityEnvironment(file_name=env_path, no_graphics=no_graphics)
ACTORNET_PATH = './checkpoint_actor.pth'
NUM_EPISODES_TEST = 100
MAX_T_TEST = 200


def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


if not VIS:
    path_prefix = '_NoVis'
else:
    path_prefix = ''

env = UnityEnvironment(file_name='Tennis_Linux' + path_prefix +
                       '/Tennis.x86_64')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)
Beispiel #36
0
import argparse
from time import sleep

from unityagents import UnityEnvironment

from maddpg import MADDPGAgent
from config import config
from train import training_loop
from play import play_loop


parser = argparse.ArgumentParser()
parser.add_argument('--train', action='store_true', dest='train', help='Set the train mode')
parser.add_argument('--file_prefix', default=None, help='Set the file for agent to load weights with using prefix')
parser.add_argument('--playthroughs', default=10, type=int, help='Number of playthroughs played in a play mode')
parser.add_argument('--sleep', default=0, type=int, help='Time before environment starts in a play mode [seconds]')
arguments = parser.parse_args()

env = UnityEnvironment(file_name='./Tennis.app', seed=config.general.seed)
brain_name = env.brain_names[0]
agent = MADDPGAgent(config=config, file_prefix=arguments.file_prefix)

if arguments.train:
    print('Train mode \n')
    training_loop(env, brain_name, agent, config)
else:
    print('Play mode \n')
    sleep(arguments.sleep)
    play_loop(env, brain_name, agent, playthrougs=arguments.playthroughs)
Beispiel #37
0
              end="")

        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(recent_scores)))

        if np.mean(recent_scores) >= 13.0:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(recent_scores)))
            torch.save(model.local_network.state_dict(), 'banana_ckpt.pth')
            break


if __name__ == "__main__":
    # Environment setup
    env = UnityEnvironment(file_name="Banana.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    state_size = len(state)
    action_size = brain.vector_action_space_size

    # Load model
    model = DQN(state_size=state_size, action_size=action_size, seed=0)

    # Start training
    train_dqn(model)

    env.close()
Beispiel #38
0
import os
import torch
from unityagents import UnityEnvironment

import sys
sys.path.append("../")

from agent import Agent

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA available {}".format(torch.cuda.is_available()))

env = UnityEnvironment(file_name="../Reacher_Linux/Reacher.x86_64", no_graphics=False)

# get the default brain# get t
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space
Beispiel #39
0
# ##############################################################################
#                                                  SETUP
# ##############################################################################
# Generate Output Directory Paths
model_dir = os.path.join("models", MODEL_NAME)
snapshots_dir = os.path.join(model_dir, "snapshots")

# SET SEEDS FOR REPRODUCIBILITY
np.random.seed(SEED)
torch.manual_seed(SEED)

# ##############################################################################
#                                                  ENVIRONMENT
# ##############################################################################
env = UnityEnvironment(file_name=ENV_FILE, seed=SEED)

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# ##############################################################################
#                                                  AGENT
# ##############################################################################
# INITIALIZE AGENT, AND LOAD WEIGHTS FROM BEST SNAPSHOT
maddpg = MADDPG(
    actor_layer_sizes=ACTOR_LAYER_SIZES,
    critic_layer_sizes=CRITIC_LAYER_SIZES,
    clamp_actions=CLAMP_ACTIONS,
    logger=None,
)
Beispiel #40
0
class UnityEnv(gym.Env):
    def __init__(self, app_name=None, idx=0):
        # parameter
        app_path = os.path.join(os.path.dirname(__file__), 'assets', app_name)
        idx = idx
        no_graphics = False
        self.num_envs = 1

        # create environment
        self._env = UnityEnvironment(file_name=app_path,
                                     worker_id=idx,
                                     no_graphics=no_graphics)
        self.name = app_name

        # Only Accept Environment with Only One Brain
        assert len(self._env.brains) == 1
        self.brain_name = self._env.external_brain_names[0]
        self.brain = self._env.brains[self.brain_name]

        # viusalization
        self.use_visual = (self.brain.number_visual_observations == 1)

        # action space dimension
        if self.brain.vector_action_space_type == "discrete":
            self._a_dim = Discrete(1)
        else:
            high = np.array([np.inf] * (self.brain.vector_action_space_size))
            self._a_dim = Box(-high, high)

        # observation spce dimension
        if self.use_visual and False and no_graphic:
            high = np.array([np.inf] *
                            self.brain.camera_resolutions[0]["height"] *
                            self.brain.camera_resolutions[0]["width"] * 3)
            self._ob_dim = Box(-high, high)
        else:
            high = np.array([np.inf] *
                            self.brain.vector_observation_space_size)
            self._ob_dim = Box(-high, high)

        # video buffer
        self.frames = []

    def reset(self):
        self.frames = []
        info = self._env.reset()[self.brain_name]
        state = info.vector_observations[0]
        return np.array([state])

    def step(self, action):
        info = self._env.step([action])[self.brain_name]

        state = info.vector_observations[0]
        reward = info.rewards[0]
        done = info.local_done[0]

        self._collect_frames(info.visual_observations[0])
        return np.array([state]), np.array([reward
                                            ]), np.array([done
                                                          ]), np.array([None])

    def close(self):
        self._env.close()

    def _collect_frames(self, frame):
        if self.use_visual:
            self.frames.append(frame)

    @property
    def action_space(self):
        return self._a_dim

    @property
    def observation_space(self):
        return self._ob_dim
Beispiel #41
0
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name='Reacher.exe')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(
    num_agents, state_size))
print('The state for the first agent looks like:', states[0])

env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
# states = env_info.vector_observations                  # get the current state (for each agent)
# scores = np.zeros(num_agents)                          # initialize the score (for each agent)
# while True:
 def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,
              worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
              no_graphics):
     """
     :param env_path: Location to the environment executable to be loaded.
     :param run_id: The sub-directory name for model and summary statistics
     :param save_freq: Frequency at which to save model
     :param curriculum_file: Curriculum json file for environment
     :param fast_simulation: Whether to run the game at training speed
     :param load: Whether to load the model or randomly initialize
     :param train: Whether to train model, or only run inference
     :param worker_id: Number to add to communication port (5005). Used for multi-environment
     :param keep_checkpoints: How many model checkpoints to keep
     :param lesson: Start learning from this lesson
     :param seed: Random seed used for training.
     :param docker_target_name: Name of docker volume that will contain all data.
     :param trainer_config_path: Fully qualified path to location of trainer configuration file
     :param no_graphics: Whether to run the Unity simulator in no-graphics mode
     """
     self.trainer_config_path = trainer_config_path
     if env_path is not None:
         env_path = (env_path.strip()
                     .replace('.app', '')
                     .replace('.exe', '')
                     .replace('.x86_64', '')
                     .replace('.x86', ''))  # Strip out executable extensions if passed
     # Recognize and use docker volume if one is passed as an argument
     if docker_target_name == '':
         self.docker_training = False
         self.model_path = './models/{run_id}'.format(run_id=run_id)
         self.curriculum_file = curriculum_file
         self.summaries_dir = './summaries'
     else:
         self.docker_training = True
         self.model_path = '/{docker_target_name}/models/{run_id}'.format(
             docker_target_name=docker_target_name,
             run_id=run_id)
         if env_path is not None:
             env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
                                                                  env_name=env_path)
         if curriculum_file is None:
             self.curriculum_file = None
         else:
             self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format(
                 docker_target_name=docker_target_name,
                 curriculum_file=curriculum_file)
         self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
     self.logger = logging.getLogger("unityagents")
     self.run_id = run_id
     self.save_freq = save_freq
     self.lesson = lesson
     self.fast_simulation = fast_simulation
     self.load_model = load
     self.train_model = train
     self.worker_id = worker_id
     self.keep_checkpoints = keep_checkpoints
     self.trainers = {}
     if seed == -1:
         seed = np.random.randint(0, 999999)
     self.seed = seed
     np.random.seed(self.seed)
     tf.set_random_seed(self.seed)
     self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
                                 curriculum=self.curriculum_file, seed=self.seed,
                                 docker_training=self.docker_training,
                                 no_graphics=no_graphics)
     if env_path is None:
         self.env_name = 'editor_'+self.env.academy_name
     else:
         self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment
Beispiel #43
0
from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':
    env = UnityEnvironment(
        file_name="../Banana_Env/Banana_Linux_Pixels/Banana.x86_64")
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents in the environment
    print('Number of agents:', len(env_info.agents))

    # number of actions
    action_size = brain.vector_action_space_size
    print('Number of actions:', action_size)

    # examine the state space
    state = env_info.visual_observations[0]
    print('States look like:')
    plt.imshow(np.squeeze(state))
    plt.show()
    state_size = state.shape
    print('States have shape:', state.shape)

    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.visual_observations[0]  # get the current state
    score = 0  # initialize the score