def run_train(args):
    save_path = os.path.join(args.logdir, args.agent+'@cps'+str(args.stage))
    args.save_path = init_trial_path(save_path)
    with open(os.path.join(args.save_path, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f)
    print(args)
    config=init_config(args)
    agent = get_agent(args.agent)
    trainer = Trainer(config, [agent])
    results = trainer.run_games_for_agents()
    agent_name = agent.agent_name
    res_age=np.array(results[agent_name][0][0])
    score = (max(res_age)+np.mean(res_age[-10:]))/2
    converge_step_1=np.where(res_age>0)[0]
    converge_step_2= np.where(res_age > 100)[0]
    if len(converge_step_1)>0:
        converge_step_1 =converge_step_1[0]
    else:
        converge_step_1 = -1
    if len(converge_step_2)>0:
        converge_step_2 =converge_step_2[0]
    else:
        converge_step_2 = -1
    print("Train Done")
    print(f"Agent={agent_name}, Score={score}, Path={args.save_path}")
    print(f"Final Reward={np.mean(res_age[-10:])}, Converge Step 0 ={converge_step_1}, Converge Step 100 ={converge_step_2},")
    agent = trainer.trained_agents[0][0]
    return agent
def test_agent_solve_bit_flipping_game():
    AGENTS = [PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER]
    trainer = Trainer(config, AGENTS)
    results = trainer.train()
    for agent in AGENTS:
        agent_results = results[agent.agent_name]
        agent_results = np.max(agent_results[0][1][50:])
        assert agent_results >= 0.0, "Failed for {} -- score {}".format(agent.agent_name, agent_results)
Example #3
0
def test_agent_solve_bit_flipping_game():
    AGENTS = [DQN]
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        agent_results = results[agent.agent_name]
        agent_results = np.max(agent_results[0][1][50:])
        assert agent_results >= 0.0, "Failed for {} -- score {}".format(
            agent.agent_name, agent_results)
def test_agents_can_play_games_of_different_dimensions():
    config.num_episodes_to_run = 10
    config.hyperparameters["DQN_Agents"]["batch_size"] = 3
    AGENTS = [
        A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets,
        DDQN_With_Prioritised_Experience_Replay, DQN
    ]
    trainer = Trainer(config, AGENTS)
    config.environment = gym.make("CartPole-v0")
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [SAC, TD3, PPO, DDPG]
    config.environment = gym.make("MountainCarContinuous-v0")
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [DDQN, SNN_HRL]
    config.environment = Four_Rooms_Environment(
        15,
        15,
        stochastic_actions_probability=0.25,
        random_start_user_place=True,
        random_goal_place=False)
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()
Example #5
0
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
Example #6
0
            "batch_norm": False,
            "buffer_size": 1000000,
            "tau": 0.005,
            "gradient_clipping_norm": 5,
            "initialiser": "Xavier"
        },
        "min_steps_before_learning": 400,
        "batch_size": 64,
        "discount_rate": 0.99,
        "mu": 0.0,  #for O-H noise
        "theta": 0.15,  #for O-H noise
        "sigma": 0.25,  #for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "automatically_tune_entropy_hyperparameter": True,
        "entropy_term_weight": None,
        "add_extra_noise": False,
        "do_evaluation_iterations": True
    }
}

if __name__ == "__main__":
    # turn it on if you want to test specific network saved
    test = False
    dictPath = None
    AGENTS = [SAC_Discrete]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents(test, dictPath)
Example #7
0
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [400, 300],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "batch_size": 64,
        "discount_rate": 0.99,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.2,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "clip_rewards": False
    }
}

if __name__ == "__main__":
    AGENTS = [DDPG, HIRO]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents()
        "discount_rate": 0.99,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [20, 20, 20],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8,
        "clip_rewards": False
    }
}

trainer = Trainer(config, [DQN_HER])
config.hyperparameters = config.hyperparameters["DQN_Agents"]
agent = DQN_HER(config)
agent.reset_game()


def test_initiation():
    """Tests whether DQN_HER initiates correctly"""
    config.hyperparameters["batch_size"] = 64
    agent = DQN_HER(config)
    agent.reset_game()

    assert agent.ordinary_buffer_batch_size == int(0.2 * 64)
    assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64)

    assert agent.q_network_local.input_dim == 8
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon": 1.0,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.1,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 15,
        "tau": 1e-2,
        "linear_hidden_units": [256, 256],
        "final_layer_activation": "softmax",
        # "y_range": (-1, 14),
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8,
        "learning_iterations": 1,
        "clip_rewards": False
    }
}

config.model = FCNN()

if __name__== '__main__':
    AGENTS = [DQN, DRQN, ]#DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay]

    trainer = Trainer(config, AGENTS)
    trainer.train()


            "final_layer_activation": None,
            "batch_norm": False,
            "buffer_size": 1000000,
            "tau": 0.005,
            "gradient_clipping_norm": 5,
            "initialiser": "Xavier"
        },
        "min_steps_before_learning": 400,
        "batch_size": 256,
        "discount_rate": 0.99,
        "mu": 0.0,  #for O-H noise
        "theta": 0.15,  #for O-H noise
        "sigma": 0.25,  #for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "automatically_tune_entropy_hyperparameter": True,
        "entropy_term_weight": None,
        "add_extra_noise": False,
        "do_evaluation_iterations": True
    }
}

if __name__ == "__main__":
    AGENTS = [DQN]
    trainer = Trainer(config, AGENTS)
    trainer.render_games_for_pretrained_agent(DQN,
                                              'Models\\DQN_local_network.pt',
                                              False)
config.hyperparameters = dict(
    # y_range=(-1, 14),
    HER_sample_proportion=0.8,
    alpha_prioritised_replay=0.6,
    batch_norm=False,
    batch_size=64,
    beta_prioritised_replay=0.1,
    buffer_size=1_000_000,
    clip_rewards=False,
    discount_rate=0.999,
    epsilon=1.0,
    epsilon_decay_rate_denominator=(config.num_episodes_to_run * 0.01) //
    (1 - 0.01),
    final_layer_activation="softmax",
    gradient_clipping_norm=5,
    incremental_td_error=1e-8,
    learning_iterations=1,
    learning_rate=0.01,
    random_episodes_to_run=0,
    tau=1e-2,
    update_every_n_steps=15,
    num_hidden_layers=2,
    hidden_layer_size=256,
    linear_hidden_units=None,  # Either set this, or the previous two.
)

if __name__ == '__main__':
    trainer = Trainer(config, DQN)
    trainer.train()
Example #12
0
        "Actor": {
            "learning_rate": 0.0003,
            "linear_hidden_units": [128, 128, 32],
            "final_layer_activation": None,
            "batch_norm": False,
            "tau": 0.005,
            "gradient_clipping_norm": 5,
            "initialiser": "Xavier",
        },
        "Critic": {
            "learning_rate": 0.0003,
            "linear_hidden_units": [128, 128, 32],
            "final_layer_activation": None,
            "batch_norm": False,
            "buffer_size": 1000000,
            "tau": 0.005,
            "gradient_clipping_norm": 5,
            "initialiser": "Xavier",
        },
    }
}

if __name__ == "__main__":
    print('rerun with -h flag to see possible args or check the read me file')

    trainer = Trainer(config, AGENTS)
    if config.eval:
        trainer.eval_model(config.num_episodes_to_run)
    else:
        trainer.run_games_for_agents()
Example #13
0
import argparse
import sys
from os.path import dirname, abspath

sys.path.append(dirname(dirname(abspath(__file__))))

from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        help='path of pkl data file',
                        required=True)
    parser.add_argument('--save_path',
                        help='path of saved result',
                        required=True)
    parser.add_argument('--title',
                        help='title of result image',
                        default='Result')
    args = parser.parse_args()
    pkl_path = args.data_path
    save_path = args.save_path

    config = Config()
    trainer = Trainer(config=config, agents=None)
    trainer.visualise_preexisting_results(save_image_path=save_path,
                                          data_path=pkl_path,
                                          title='whatever')