コード例 #1
0
ファイル: Tennis_test.py プロジェクト: CangelosiQ/RL-Library
def main():
    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    save_path = f"./results/Tennis_DDPG_{pd.Timestamp.utcnow().value}"
    os.makedirs(save_path, exist_ok=True)

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{save_path}/logs_p3_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    import json
    with open(f"./assets/best_agent/config.json", "r") as f:
        config = json.load(f)
    config["mode"] = "test"
    config["n_episodes"] = 10
    config["warmup"] = 0

    logger.warning("+=" * 90)
    logger.warning(f"  RUNNING SIMULATION WITH PARAMETERS config={config}")
    logger.warning("+=" * 90)

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)
    config["n_agents"] = num_agents

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    # Unity Monitor
    monitor = UnityMonitor(env=env, config=config)

    # Actor model
    seed = 0
    actor = SimpleNeuralNetHead(action_size,
                                SimpleNeuralNetBody(
                                    state_size,
                                    config["hidden_layers_actor"],
                                    seed=seed),
                                func=torch.tanh,
                                seed=seed)
    # Critic model
    critic = DeepNeuralNetHeadCritic(
        action_size * num_agents,
        SimpleNeuralNetBody(state_size * num_agents,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"]),
                            seed=seed),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None,
        seed=seed)

    # MADDPG Agent
    agent = MADDPGAgent(
        state_size=state_size,
        action_size=action_size,
        model_actor=actor,
        model_critic=critic,
        action_space_low=-1,
        action_space_high=1,
        config=config,
    )

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    logger.warning("Entering Test Mode!")
    monitor.n_episodes = 100
    env.reset(train_mode=False)
    env.warmup = 0
    agent.warmup = 0
    for a in agent.agents:
        a.warmup = 0
    agent.load(filepath="./assets/best_agent", mode="test")
    scores = monitor.run(agent)
    logger.info(f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
    config["test_scores"] = scores
    config["best_test_score"] = max(scores)
    config["avg_test_score"] = np.mean(scores)

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()
コード例 #2
0
ファイル: Tennis.py プロジェクト: CangelosiQ/RL-Library
def main(seed=seed):
    # ---------------------------------------------------------------------------------------------------
    #  Logger
    # ---------------------------------------------------------------------------------------------------
    save_path = f"./results/Tennis_DDPG_{pd.Timestamp.utcnow().value}"
    os.makedirs(save_path, exist_ok=True)

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s : %(message)s')

    handler = logging.FileHandler(
        f"{save_path}/logs_p3_{pd.Timestamp.utcnow().value}.log")
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # ---------------------------------------------------------------------------------------------------
    #  Inputs
    # ---------------------------------------------------------------------------------------------------
    n_episodes = 2500
    config = dict(
        # Environment parameters
        env_name="Tennis",
        n_episodes=n_episodes,
        length_episode=1500,
        save_every=500,
        save_path=save_path,
        mode="test",  # "train" or "test"
        evaluate_every=
        5000,  # Number of training episodes before 1 evaluation episode
        eps_decay=1,  # Epsilon decay rate

        # Agent Parameters
        agent="DDPG",
        hidden_layers_actor=(256, 128),  # (50, 50, 15),  # (200, 150),  #
        hidden_layers_critic_body=(256, ),  # (50, 50,),  #
        hidden_layers_critic_head=(128, ),  # (50,),   # (300,)
        func_critic_body="F.leaky_relu",  #
        func_critic_head="F.leaky_relu",  #
        func_actor_body="F.leaky_relu",  #
        # lr_scheduler=None,
        lr_scheduler={
            'scheduler_type':
            "exp",  #"multistep",  # "step", "exp" or "decay", "multistep"
            'gamma': 0.99999,  #0.75,
            'step_size': 1,
            # 'milestones': [30 * 1000 * i for i in range(1, 6)],
            'max_epochs': n_episodes
        },
        TAU=1e-3,  # for soft update of target parameters
        BUFFER_SIZE=int(3e4),  # replay buffer size
        BATCH_SIZE=128,  # minibatch size
        GAMMA=0.99,  # discount factor
        LR_ACTOR=1e-4,  # learning rate of the actor
        LR_CRITIC=1e-4,  # learning rate of the critic
        WEIGHT_DECAY=0,  # L2 weight decay
        UPDATE_EVERY=1,  # Number of actions before making a learning step
        N_CONSECUTIVE_LEARNING_STEPS=2,
        action_noise="OU",  #
        action_noise_scale=1,
        weights_noise=None,  #
        state_normalizer="BatchNorm",  # "RunningMeanStd" or "BatchNorm"
        warmup=1e3,  # Number of random actions to start with as a warm-up
        start_time=str(pd.Timestamp.utcnow()),
        random_seed=seed,
        threshold=0.5)
    logger.warning("+=" * 90)
    logger.warning(f"  RUNNING SIMULATION WITH PARAMETERS config={config}")
    logger.warning("+=" * 90)

    # ------------------------------------------------------------
    #  1. Initialization
    # ------------------------------------------------------------
    # 1. Start the Environment
    env = UnityEnvironment(file_name=f'./{config["env_name"]}')  # mac OS

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)
    config["n_agents"] = num_agents

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    config.update(dict(action_size=action_size, state_size=state_size))

    # ------------------------------------------------------------
    #  2. Training
    # ------------------------------------------------------------
    # Unity Monitor
    monitor = UnityMonitor(env=env, config=config)

    # Actor model
    seed = 0
    actor = SimpleNeuralNetHead(action_size,
                                SimpleNeuralNetBody(
                                    state_size,
                                    config["hidden_layers_actor"],
                                    seed=seed),
                                func=torch.tanh,
                                seed=seed)
    # Critic model
    critic = DeepNeuralNetHeadCritic(
        action_size * num_agents,
        SimpleNeuralNetBody(state_size * num_agents,
                            config["hidden_layers_critic_body"],
                            func=eval(config["func_critic_body"]),
                            seed=seed),
        hidden_layers_sizes=config["hidden_layers_critic_head"],
        func=eval(config["func_critic_head"]),
        end_func=None,
        seed=seed)

    # MADDPG Agent
    agent = MADDPGAgent(
        state_size=state_size,
        action_size=action_size,
        model_actor=actor,
        model_critic=critic,
        action_space_low=-1,
        action_space_high=1,
        config=config,
    )

    if config["mode"] == "train":
        # Training
        start = pd.Timestamp.utcnow()
        scores = monitor.run(agent)
        logger.info("Average Score last 100 episodes: {}".format(
            np.mean(scores[-100:])))
        elapsed_time = pd.Timedelta(pd.Timestamp.utcnow() - start)
        logger.info(f"Elapsed Time: {elapsed_time}")

    # ------------------------------------------------------------
    #  3. Testing
    # ------------------------------------------------------------
    else:
        logger.warning("Entering Test Mode!")
        monitor.n_episodes = 100
        env.reset(train_mode=False)
        env.warmup = 0
        agent.warmup = 0
        for a in agent.agents:
            a.warmup = 0
        agent.load(filepath="./assets/best_agent", mode="test")
        scores = monitor.run(agent)
        logger.info(
            f"Test Score over {len(scores)} episodes: {np.mean(scores)}")
        config["test_scores"] = scores
        config["best_test_score"] = max(scores)
        config["avg_test_score"] = np.mean(scores)

    # When finished, you can close the environment.
    logger.info("Closing...")
    env.close()