Esempio n. 1
0
    def __init__(self,
                 state_size,
                 action_size,
                 mode: str = "train",
                 seed: int = 42,
                 hidden_size=10,
                 config: dict = {}):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.config = config
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.mode = mode

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.losses = deque(maxlen=100)  # last 100 scores
        self.avg_loss = np.inf
        self.step_every_action = False

        # For continous problem, for each action we will have as output the mean and the std of the continuous action
        if self.config.get("problem_type") == "continuous":
            action_size = action_size * 2
        self.network = SimpleNeuralNetHead(action_size,
                                           SimpleNeuralNetBody(
                                               state_size, (hidden_size, )),
                                           func=config.get("head_func"))
        logger.info(self.network)
        self.optimizer = optim.Adam(self.network.parameters(), lr=1e-2)
        self.saved_log_probs = []
Esempio n. 2
0
def main(discount_factor=0.99, weight_decay=0.0001, batch_size=64):
    importlib.reload(unityagents)
    from unityagents import UnityEnvironment

    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    path = Path(__file__).parent
    save_path = f"./results/Crawler_DDPG_{pd.Timestamp.utcnow().value}"
    os.makedirs(save_path, exist_ok=True)

    # logging.basicConfig(filename=f"{save_path}/logs_navigation_{pd.Timestamp.utcnow().value}.log",
    #                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    #                     level=logging.INFO)
    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{save_path}/logs_navigation_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    # stream_handler = logging.StreamHandler()
    # stream_handler.setFormatter(formatter)

    # logger.addHandler(stream_handler)
    logger.addHandler(handler)
    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    n_episodes = 500
    config = dict(
        # Environment parameters
        env_name="Crawler",
        n_episodes=n_episodes,
        length_episode=1500,
        save_every=100,
        save_path=save_path,
        mode="train",  # "train" or "test"
        evaluate_every=
        50,  # Number of training episodes before 1 evaluation episode
        eps_decay=0.995,  # Epsilon decay rate

        # Agent Parameters
        agent="DDPG",
        hidden_layers_actor=(40, 30, 20),  #
        hidden_layers_critic_body=(40, ),  #
        hidden_layers_critic_head=(30, 20),  #
        func_critic_body="F.relu",  #
        func_critic_head="F.relu",  #
        func_actor_body="F.relu",  #
        lr_scheduler={
            'scheduler_type':
            "multistep",  # "step", "exp" or "decay", "multistep"
            'gamma': 0.5,  # 0.99999,
            'step_size': 1,
            'milestones': [25 * 1000 * i for i in range(1, 6)],
            'max_epochs': n_episodes
        },
        TAU=1e-3,  # for soft update of target parameters
        BUFFER_SIZE=int(1e5),  # replay buffer size
        BATCH_SIZE=batch_size,  # minibatch size
        GAMMA=discount_factor,  # discount factor
        LR_ACTOR=1e-3,  # learning rate of the actor
        LR_CRITIC=1e-3,  # learning rate of the critic
        WEIGHT_DECAY=weight_decay,  # L2 weight decay
        UPDATE_EVERY=5,  # Number of actions before making a learning step
        action_noise="OU",  #
        action_noise_scale=0.01,
        weights_noise=None,  #
        state_normalizer="RunningMeanStd",  #
        warmup=1e4,  # Number of random actions to start with as a warm-up
        start_time=str(pd.Timestamp.utcnow()),
    )

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment

    # env = UnityEnvironment(file_name=f'./Reacher_Linux/Reacher.x86_64')  # Linux
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    if config["mode"] == "train":
        # Actor model
        actor = SimpleNeuralNetHead(action_size,
                                    SimpleNeuralNetBody(
                                        state_size,
                                        config["hidden_layers_actor"]),
                                    func=torch.tanh)
        # Critic model
        critic = DeepNeuralNetHeadCritic(
            action_size,
            SimpleNeuralNetBody(state_size,
                                config["hidden_layers_critic_body"],
                                func=eval(config["func_critic_body"])),
            hidden_layers_sizes=config["hidden_layers_critic_head"],
            func=eval(config["func_critic_head"]),
            end_func=None)

        # DDPG Agent
        agent = DDPGAgent(
            state_size=state_size,
            action_size=action_size,
            model_actor=actor,
            model_critic=critic,
            action_space_low=[
                -1,
            ] * action_size,
            action_space_high=[
                1,
            ] * action_size,
            config=config,
        )

        # Unity Monitor
        monitor = UnityMonitor(env=env, config=config)

        # Training
        start = pd.Timestamp.utcnow()
        scores = monitor.run(agent)
        logger.info("Average Score last 100 episodes: {}".format(
            np.mean(scores[-100:])))
        elapsed_time = pd.Timedelta(pd.Timestamp.utcnow() -
                                    start).total_seconds()
        logger.info(f"Elapsed Time: {elapsed_time} seconds")

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    else:
        agent = DDPGAgent.load(filepath=config['save_path'], mode="test")
        scores = unity_monitor.run(env,
                                   agent,
                                   brain_name,
                                   n_episodes=10,
                                   length_episode=1e6,
                                   mode="test")
        logger.info(
            f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
        config["test_scores"] = scores
        config["best_test_score"] = max(scores)
        config["avg_test_score"] = np.mean(scores)

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()
def main(seed=seed):
    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    save_path = f"./results/Reacher_DDPG_{pd.Timestamp.utcnow().value}"
    os.makedirs(save_path, exist_ok=True)

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{save_path}/logs_navigation_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    n_episodes = 300
    config = dict(
        # Environment parameters
        env_name="Reacher",
        n_episodes=n_episodes,
        length_episode=1500,
        save_every=100,
        save_path=save_path,
        mode="train",  # "train" or "test"
        evaluate_every=
        5000,  # Number of training episodes before 1 evaluation episode
        eps_decay=1,  # Epsilon decay rate

        # Agent Parameters
        agent="DDPG",
        hidden_layers_actor=(200, 150),  # (50, 50, 15),  # (200, 150),  #
        hidden_layers_critic_body=(400, ),  # (50, 50,),  #
        hidden_layers_critic_head=(300, ),  # (50,),   # (300,)
        func_critic_body="F.leaky_relu",  #
        func_critic_head="F.leaky_relu",  #
        func_actor_body="F.leaky_relu",  #
        lr_scheduler=
        None,  #{'scheduler_type': "multistep",  # "step", "exp" or "decay", "multistep"
        #               'gamma': 0.5,  # 0.99999,
        #               'step_size': 1,
        #               'milestones': [15*1000 * i for i in range(1, 6)],
        #               'max_epochs': n_episodes},
        TAU=1e-3,  # for soft update of target parameters
        BUFFER_SIZE=int(1e6),  # replay buffer size
        BATCH_SIZE=128,  # minibatch size
        GAMMA=0.99,  # discount factor
        LR_ACTOR=1e-3,  # learning rate of the actor
        LR_CRITIC=1e-3,  # learning rate of the critic
        WEIGHT_DECAY=0,  # L2 weight decay
        UPDATE_EVERY=1,  # Number of actions before making a learning step
        action_noise="OU",  #
        action_noise_scale=1,
        weights_noise=None,  #
        state_normalizer="BatchNorm",  # "RunningMeanStd" or "BatchNorm"
        warmup=0,  # Number of random actions to start with as a warm-up
        start_time=str(pd.Timestamp.utcnow()),
        random_seed=seed,
        threshold=30)
    logger.warning("+=" * 90)
    logger.warning(f"  RUNNING SIMULATION WITH PARAMETERS config={config}")
    logger.warning("+=" * 90)

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment

    # env = UnityEnvironment(file_name=f'./Reacher_Linux_2/Reacher.x86_64')  # Linux
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)
    config["n_agents"] = num_agents

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    # Unity Monitor
    monitor = UnityMonitor(env=env, config=config)

    if config["mode"] == "train":
        # Actor model
        seed = 0
        actor = SimpleNeuralNetHead(action_size,
                                    SimpleNeuralNetBody(
                                        state_size,
                                        config["hidden_layers_actor"],
                                        seed=seed),
                                    func=F.tanh,
                                    seed=seed)
        actor_target = SimpleNeuralNetHead(action_size,
                                           SimpleNeuralNetBody(
                                               state_size,
                                               config["hidden_layers_actor"],
                                               seed=seed),
                                           func=F.tanh,
                                           seed=seed)
        # Critic model
        critic = DeepNeuralNetHeadCritic(
            action_size,
            SimpleNeuralNetBody(state_size,
                                config["hidden_layers_critic_body"],
                                func=eval(config["func_critic_body"]),
                                seed=seed),
            hidden_layers_sizes=config["hidden_layers_critic_head"],
            func=eval(config["func_critic_head"]),
            end_func=None,
            seed=seed)

        critic_target = DeepNeuralNetHeadCritic(
            action_size,
            SimpleNeuralNetBody(state_size,
                                config["hidden_layers_critic_body"],
                                func=eval(config["func_critic_body"]),
                                seed=seed),
            hidden_layers_sizes=config["hidden_layers_critic_head"],
            func=eval(config["func_critic_head"]),
            end_func=None,
            seed=seed)

        # DDPG Agent
        agent = DDPGAgent(
            state_size=state_size,
            action_size=action_size,
            model_actor=actor,
            model_critic=critic,
            # actor_target=actor_target, critic_target=critic_target,
            action_space_low=-1,
            action_space_high=1,
            config=config,
        )

        # Training
        start = pd.Timestamp.utcnow()
        scores = monitor.run(agent)
        logger.info("Average Score last 100 episodes: {}".format(
            np.mean(scores[-100:])))
        elapsed_time = pd.Timedelta(pd.Timestamp.utcnow() -
                                    start).total_seconds()
        logger.info(f"Elapsed Time: {elapsed_time} seconds")

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    else:
        agent = DDPGAgent.load(filepath=config['save_path'], mode="test")
        scores = monitor.run(agent)
        logger.info(
            f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
        config["test_scores"] = scores
        config["best_test_score"] = max(scores)
        config["avg_test_score"] = np.mean(scores)

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()
Esempio n. 4
0
def main():
    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    save_path = f"./results/Tennis_DDPG_{pd.Timestamp.utcnow().value}"
    os.makedirs(save_path, exist_ok=True)

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{save_path}/logs_p3_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    import json
    with open(f"./assets/best_agent/config.json", "r") as f:
        config = json.load(f)
    config["mode"] = "test"
    config["n_episodes"] = 10
    config["warmup"] = 0

    logger.warning("+=" * 90)
    logger.warning(f"  RUNNING SIMULATION WITH PARAMETERS config={config}")
    logger.warning("+=" * 90)

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)
    config["n_agents"] = num_agents

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    # Unity Monitor
    monitor = UnityMonitor(env=env, config=config)

    # Actor model
    seed = 0
    actor = SimpleNeuralNetHead(action_size,
                                SimpleNeuralNetBody(
                                    state_size,
                                    config["hidden_layers_actor"],
                                    seed=seed),
                                func=torch.tanh,
                                seed=seed)
    # Critic model
    critic = DeepNeuralNetHeadCritic(
        action_size * num_agents,
        SimpleNeuralNetBody(state_size * num_agents,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"]),
                            seed=seed),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None,
        seed=seed)

    # MADDPG Agent
    agent = MADDPGAgent(
        state_size=state_size,
        action_size=action_size,
        model_actor=actor,
        model_critic=critic,
        action_space_low=-1,
        action_space_high=1,
        config=config,
    )

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    logger.warning("Entering Test Mode!")
    monitor.n_episodes = 100
    env.reset(train_mode=False)
    env.warmup = 0
    agent.warmup = 0
    for a in agent.agents:
        a.warmup = 0
    agent.load(filepath="./assets/best_agent", mode="test")
    scores = monitor.run(agent)
    logger.info(f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
    config["test_scores"] = scores
    config["best_test_score"] = max(scores)
    config["avg_test_score"] = np.mean(scores)

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()
Esempio n. 5
0
def main(discount_factor=0.99, weight_decay=0.0001, batch_size=64):
    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    n_episodes = 1000
    config = dict(
        # Environment parameters
        env_name='BipedalWalker-v3',
        n_episodes=n_episodes,
        length_episode=1500,
        save_every=100,
        save_path=f"./DDPG_Bipedal_{pd.Timestamp.utcnow().value}",
        mode="train",  # "train" or "test"
        evaluate_every=
        50,  # Number of training episodes before 1 evaluation episode
        eps_decay=0.9999,  # Epsilon decay rate
        render=True,

        # Agent Parameters
        agent="DDPG",
        hidden_layers_actor=(256, 128),  #
        hidden_layers_critic_body=(256, ),  #
        hidden_layers_critic_head=(128, ),  #
        func_critic_body="F.relu",  #
        func_critic_head="F.relu",  #
        func_actor_body="F.relu",  #
        lr_scheduler={
            'scheduler_type':
            "multistep",  # "step", "exp" or "decay", "multistep"
            'gamma': 0.5,  # 0.99999,
            'step_size': 1,
            'milestones': [25 * 1000 * i for i in range(1, 6)],
            'max_epochs': n_episodes
        },
        TAU=1e-3,  # for soft update of target parameters
        BUFFER_SIZE=int(1e6),  # replay buffer size
        BATCH_SIZE=128,  # minibatch size
        GAMMA=0.99,  # discount factor
        LR_ACTOR=1e-4,  # learning rate of the actor
        LR_CRITIC=3e-4,  # learning rate of the critic
        WEIGHT_DECAY=0.001,  # L2 weight decay
        UPDATE_EVERY=1,  # Number of actions before making a learning step
        N_CONSECUTIVE_LEARNING_STEPS=2,
        action_noise="OU",  #
        action_noise_scale=1,
        weights_noise=None,  #
        state_normalizer=None,  #"RunningMeanStd",  #
        warmup=5e3,  # Number of random actions to start with as a warm-up
        start_time=str(pd.Timestamp.utcnow()),
    )

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment
    env = GymMonitor(config=config)

    # Actor model
    actor = SimpleNeuralNetHead(env.action_size,
                                SimpleNeuralNetBody(
                                    env.state_size,
                                    config["hidden_layers_actor"]),
                                func=F.tanh)
    actor_target = SimpleNeuralNetHead(env.action_size,
                                       SimpleNeuralNetBody(
                                           env.state_size,
                                           config["hidden_layers_actor"]),
                                       func=F.tanh)
    # Critic model
    critic = DeepNeuralNetHeadCritic(
        env.action_size,
        SimpleNeuralNetBody(env.state_size,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"])),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None)
    critic_target = DeepNeuralNetHeadCritic(
        env.action_size,
        SimpleNeuralNetBody(env.state_size,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"])),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None)

    # DDPG Agent
    agent = DDPGAgent(
        state_size=env.state_size,
        action_size=env.action_size,
        model_actor=actor,
        model_critic=critic,
        actor_target=actor_target,
        critic_target=critic_target,
        action_space_low=-1,
        action_space_high=1,
        config=config,
    )

    env.eps_decay = config["eps_decay"]
    env.run(agent)
Esempio n. 6
0
class ReinforceAgent():
    """Interacts with and learns from the environment."""
    def __init__(self,
                 state_size,
                 action_size,
                 mode: str = "train",
                 seed: int = 42,
                 hidden_size=10,
                 config: dict = {}):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.config = config
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.mode = mode

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.losses = deque(maxlen=100)  # last 100 scores
        self.avg_loss = np.inf
        self.step_every_action = False

        # For continous problem, for each action we will have as output the mean and the std of the continuous action
        if self.config.get("problem_type") == "continuous":
            action_size = action_size * 2
        self.network = SimpleNeuralNetHead(action_size,
                                           SimpleNeuralNetBody(
                                               state_size, (hidden_size, )),
                                           func=config.get("head_func"))
        logger.info(self.network)
        self.optimizer = optim.Adam(self.network.parameters(), lr=1e-2)
        self.saved_log_probs = []

    def step(self, states, actions, rewards):
        discounts = [GAMMA**i for i in range(len(rewards) + 1)]
        R = sum([a * b for a, b in zip(discounts, rewards)])

        policy_loss = []
        for log_prob in self.saved_log_probs:
            policy_loss.append(-log_prob * R)
        policy_loss = torch.cat(policy_loss).sum()

        self.optimizer.zero_grad()
        policy_loss.backward()
        self.optimizer.step()

        # reset saved_log_probs
        self.saved_log_probs = []

    def act(self, state, eps=0.):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        y = self.network.forward(state).cpu()
        if self.config.get("problem_type") == "discrete":
            m = Categorical(y)
            action = m.sample()
            delta = m.log_prob(action)
            action = action.item()
        elif self.config.get("problem_type") == "continuous":
            action = self.continuous_action(y.detach().numpy()[0])
            delta = torch.tensor([
                1,
            ])
        self.saved_log_probs.append(delta)
        return action

    @staticmethod
    def continuous_action(y):
        n_actions = int(len(y) / 2)
        print(f"n_actions={n_actions}")
        # even indexes contain the mean, odd contain std
        actions = [
            np.random.normal(y[i * 2], y[i * 2 + 1]) for i in range(n_actions)
        ]
        print(f"actions={actions}")
        return actions

    def save(self, filepath):
        pass

    @classmethod
    def load(cls, filepath, mode="train"):
        pass
def main(seed=seed):
    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    results_path = f"."

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{results_path}/logs_test_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    with open(f"{results_path}/config.json", "r") as f:
        config = json.load(f)
    #save_path = results_path,
    config["mode"] = "test"
    config["n_episodes"] = 10

    logger.warning("+=" * 90)
    logger.warning(f"  RUNNING SIMULATION WITH PARAMETERS config={config}")
    logger.warning("+=" * 90)

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment

    # env = UnityEnvironment(file_name=f'./Reacher_Linux_2/Reacher.x86_64')  # Linux
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=False)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)
    config["n_agents"] = num_agents

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    # Unity Monitor
    monitor = UnityMonitor(env=env, config=config)

    # Actor model
    seed = 0
    actor = SimpleNeuralNetHead(action_size,
                                SimpleNeuralNetBody(
                                    state_size,
                                    config["hidden_layers_actor"],
                                    seed=seed),
                                func=F.tanh,
                                seed=seed)

    # Critic model
    critic = DeepNeuralNetHeadCritic(
        action_size,
        SimpleNeuralNetBody(state_size,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"]),
                            seed=seed),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None,
        seed=seed)

    # DDPG Agent
    agent = DDPGAgent(
        state_size=state_size,
        action_size=action_size,
        model_actor=actor,
        model_critic=critic,
        action_space_low=-1,
        action_space_high=1,
        config=config,
    )
    agent.load(results_path)

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    start = pd.Timestamp.utcnow()
    scores = monitor.run(agent)
    elapsed_time = pd.Timedelta(pd.Timestamp.utcnow() - start).total_seconds()
    logger.info(f"Elapsed Time: {elapsed_time} seconds")

    logger.info(f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
    config["test_scores"] = scores
    config["best_test_score"] = np.max(np.mean(np.array(scores), axis=1))
    config["avg_test_score"] = np.mean(np.mean(np.array(scores), axis=1))

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()