def hp_tuning_config(client_config: ClientConfig) -> ClientConfig: """ Setup config for hparam tuning :param client_config: the client config :return: the updated client config """ client_config.hp_tuning = True client_config.hp_tuning_config = HpTuningConfig(param_1="alpha", param_2="num_hidden_layers", alpha=[0.000001, 0.00001, 0.0001, 0.001, 0.01], num_hidden_layers=[1, 2, 4, 8, 16]) client_config.run_many = False return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.00001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=350001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=10000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=88, output_dim_attacker=80, hidden_dim=64, num_hidden_layers=1, batch_size=32, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1) env_name = "idsgame-minimal_defense-v7" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.ACTOR_CRITIC_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="Actor-Critic vs DefendMinimalDefender", run_many=False, random_seeds=[0, 999, 299, 399, 499]) #client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ q_agent_config = QAgentConfig(gamma=0.9, alpha=0.05, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.1, eval_episodes=100, train_log_frequency=10, epsilon_decay=0.999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/videos", num_episodes=5000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/gifs", eval_frequency=1000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/data") env_name = "idsgame-random_defense-v1" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, q_agent_config=q_agent_config, output_dir=default_output_dir(), title="TrainingQAgent vs RandomDefender") return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ simulation_config = SimulationConfig( render=True, sleep=0.8, video=True, log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/videos", num_episodes=1000, gifs=True, gif_dir=default_output_dir() + "/gifs", video_frequency=1) q_agent_config = QAgentConfig(attacker_load_path=default_output_dir() + "/q_table/attacker_q_table.npy", defender_load_path=default_output_dir() + "/q_table/defender_q_table.npy") env_name = "idsgame-v3" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.TABULAR_Q_AGENT.value, defender_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.SIMULATE.value, simulation_config=simulation_config, output_dir=default_output_dir(), title="TabularQAgentAttacker vs TabularQAgentDefender", q_agent_config=q_agent_config, initial_state_path=default_output_dir() + "/initial_state/initial_state.pkl") return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ simulation_config = SimulationConfig( render=False, sleep=0.8, video=True, log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/videos", num_episodes=1000, gifs=True, gif_dir=default_output_dir() + "/gifs", video_frequency=1) env_name = "idsgame-v3" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.RANDOM.value, defender_type=AgentType.DEFEND_MINIMAL_VALUE.value, mode=RunnerMode.SIMULATE.value, simulation_config=simulation_config, output_dir=default_output_dir(), title="RandomAttacker vs DefendMinimalDefender") return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=2200001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 3 * 1, output_dim_attacker=4 * 3, hidden_dim=32, num_hidden_layers=1, batch_size=8, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0) #input_dim = (4 + 3) * 2, output_dim_attacker = 4 * 2, env_name = "idsgame-random_defense-v14" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.REINFORCE_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="TrainingREINFORCEAgent vs RandomDefender", run_many=False, random_seeds=[0, 999, 299, 399, 499]) client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ dqn_config = DQNConfig(input_dim=242, defender_output_dim=242, attacker_output_dim=220, hidden_dim=64, replay_memory_size=10000, num_hidden_layers=1, replay_start_size=1000, batch_size=32, target_network_update_freq=1000, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", loss_fn="Huber", optimizer="Adam", lr_exp_decay=True, lr_decay_rate=0.9999) q_agent_config = QAgentConfig(gamma=0.999, alpha=0.00001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=20001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000, attacker=True, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", dqn_config=dqn_config, checkpoint_freq=5000) env_name = "idsgame-v4" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.DQN_AGENT.value, defender_type=AgentType.DQN_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value, q_agent_config=q_agent_config, output_dir=default_output_dir(), title="TrainingDQNAgent vs TrainingDQNAgent", run_many=True, random_seeds=[0, 999, 299, 399, 499]) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ q_agent_config = QAgentConfig(gamma=0.999, alpha=0.0005, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=20001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000, attacker=False, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data") env_name = "idsgame-maximal_attack-v8" client_config = ClientConfig(env_name=env_name, defender_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, q_agent_config=q_agent_config, output_dir=default_output_dir(), title="AttackMaximalAttacker vs TrainingQAgent", run_many=True, random_seeds=[0, 999, 299, 399, 499]) # client_config.hp_tuning = True # client_config.hp_tuning_config = HpTuningConfig(param_1="alpha", param_2="epsilon_decay", # alpha=[0.0001, 0.001, 0.003, 0.05, 0.03], # epsilon_decay=[0.99999, 0.9999, 0.999, 0.99, 0.9]) # client_config.run_many = False return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-minimal_defense-v18" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.MANUAL_ATTACK.value, mode=RunnerMode.MANUAL_ATTACKER.value, output_dir=default_output_dir(), title="ManualAttacker vs DefendMinimalDefender") return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=550000000, attacker=False, defender=True, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=100, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1, vf_hidden_dim=128, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001, render_attacker_view=False, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32, multi_channel_obs=False, channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16, channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16, channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4, channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4, mini_batch_size=64, ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4, defender_node_input_dim=((4+1)*4), defender_at_net_input_dim=(4+1), defender_node_net_output_dim=4, defender_at_net_output_dim=5) env_name = "idsgame-maximal_attack-v20" client_config = ClientConfig(env_name=env_name, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="AttackMaximalAttacker vs OpenAI-PPO", run_many=False, random_seeds=[0, 999, 299, 399, 499], random_seed=999) # client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-random_attack-v4" client_config = ClientConfig(env_name=env_name, defender_type=AgentType.MANUAL_DEFENSE.value, mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(), title="RandomAttacker vs ManualDefender") return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v14" pg_agent_config = PolicyGradientAgentConfig( gamma=0.999, alpha_attacker=0.001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=10000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=1000, input_dim_attacker=(4 + 3) * 2, output_dim_attacker=4 * 2, hidden_dim=32, num_hidden_layers=1, batch_size=8, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, attacker_load_path= "/Users/kimham/workspace/rl/gym-idsgame/experiments/training/v14/minimal_defense/reinforce/results/data/0/1589370657.761538_attacker_policy_network.pt" ) client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.REINFORCE_AGENT.value, mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(), title="REINFORCE vs ManualDefender", pg_agent_config=pg_agent_config, bot_attacker=True) return client_config
def test_sim_random_vs_random(version) -> ClientConfig: simulation_config = SimulationConfig(log_frequency=1, num_episodes=10) env_name = "idsgame-v" + str(version) client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.RANDOM.value, defender_type=AgentType.RANDOM.value, mode=RunnerMode.SIMULATE.value, simulation_config=simulation_config, output_dir=default_output_dir(), title="RandomAttacker vs RandomDefender") Runner.run(client_config)
def test_sim_random_vs_defend_minimal(version) -> ClientConfig: simulation_config = SimulationConfig(log_frequency=1, num_episodes=10) env_name = "idsgame-v" + str(version) client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.RANDOM.value, defender_type=AgentType.DEFEND_MINIMAL_VALUE.value, mode=RunnerMode.SIMULATE.value, simulation_config=simulation_config, output_dir=default_output_dir()) Runner.run(client_config)
def test_train_random_attack_tabular_q_learning(version) -> ClientConfig: q_agent_config = QAgentConfig(num_episodes=10, eval_frequency=100, attacker=False, defender=True) env_name = "idsgame-random_attack-v" + str(version) client_config = ClientConfig(env_name=env_name, defender_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, q_agent_config=q_agent_config, output_dir=default_output_dir()) Runner.run(client_config)
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000, pool_increment_period=300, head_to_head_period=1, quality_scores=True, quality_score_eta=0.01, initial_quality=1000, pool_prob=0.5) pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.000008, epsilon=1, render=False, alpha_defender=0.000003, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=175000, attacker=True, defender=True, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=250, input_dim_attacker=((4 + 2) * 2)*1, output_dim_attacker=(4 + 1) * 2, input_dim_defender=((4 + 1) * 3)*1, output_dim_defender=5 * 3, hidden_dim=64, num_hidden_layers=4, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.01, render_attacker_view=False, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, opponent_pool=True, opponent_pool_config=opponent_pool_config, alternating_optimization=True, alternating_period=300, baselines_in_pool=True) env_name = "idsgame-v18" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="OpenAI-PPO vs OpenAI-PPO", run_many=False, random_seeds=[0, 999, 299, 399, 499]) # client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v19" opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000, pool_increment_period=50, head_to_head_period=1, quality_scores=True, quality_score_eta=0.01, initial_quality=1000, pool_prob=0.5) pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.00001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=100000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 2, output_dim_attacker=(4+1) * 2, input_dim_defender=(4 + 2) * 3, output_dim_defender=5 * 3, hidden_dim=64, num_hidden_layers=4, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, attacker_load_path="/home/kim/storage/workspace/gym-idsgame/experiments/manual_play/v19/minimal_defense/manual_vs_openai_ppo/v4/1591164917.874881_attacker_policy_network.zip", ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4 ) client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(), title="OpenAI PPO vs ManualDefender", pg_agent_config=pg_agent_config, bot_attacker=True) return client_config
def setup_train(config: ClientConfig, random_seed): time_str = str(time.time()) util.create_artefact_dirs(config.output_dir, random_seed) logger = util.setup_logger("tabular_q_vs_random_defense-v3", config.output_dir + "/results/logs/" + str(random_seed) + "/", time_str=time_str) config.q_agent_config.save_dir = default_output_dir( ) + "/results/data/" + str(random_seed) + "/" config.q_agent_config.video_dir = default_output_dir( ) + "/results/videos/" + str(random_seed) + "/" config.q_agent_config.gif_dir = default_output_dir( ) + "/results/gifs/" + str(random_seed) + "/" # config.q_agent_config.dqn_config.tensorboard_dir = default_output_dir() + "/results/tensorboard/" \ # + str(random_seed) + "/" config.logger = logger config.q_agent_config.logger = logger config.q_agent_config.random_seed = random_seed config.random_seed = random_seed config.q_agent_config.to_csv(config.output_dir + "/results/hyperparameters/" + str(random_seed) + "/" + time_str + ".csv") return time_str
def setup_agent_defender() -> ClientConfig: """ :return: Default configuration for the experiment """ q_agent_config = QAgentConfig( gamma=0.999, alpha=0.0005, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=50, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=5001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=1000, attacker=False, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", a_mu0=-10, a_lambda0=4.0, a_alpha0=3.0, a_beta0=3.0, num_mixture_samples=1000) # env_name = "idsgame-random_defense-v3" env_name = "idsgame-maximal_attack-v3" client_config = ClientConfig( env_name=env_name, defender_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, q_agent_config=q_agent_config, output_dir=default_output_dir(), title="TrainingQAgent vs DefendMinimalDefender", run_many=True, random_seeds=[0, 999, 299]) env = gym.make(client_config.env_name, idsgame_config=client_config.idsgame_config, save_dir=client_config.output_dir + "/results/data/" + str(client_config.random_seed), initial_state_path=client_config.initial_state_path) defender = BQLAgent(env, client_config.q_agent_config) return defender, client_config, env
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig( gamma=0.999, alpha_attacker=0.00001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=200001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=10000, attacker=False, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=33, output_dim_defender=33, hidden_dim=64, num_hidden_layers=1, batch_size=32, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999) env_name = "idsgame-maximal_attack-v8" client_config = ClientConfig(env_name=env_name, defender_type=AgentType.REINFORCE_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="AttackMaximalAttacker vs REINFORCE", run_many=False, random_seeds=[0, 999, 299, 399, 499]) client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000, pool_increment_period=50, head_to_head_period=1, quality_scores=True, quality_score_eta=0.01, initial_quality=1000, pool_prob=0.5) pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.0001, alpha_defender=0.0001, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=1350001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=20000, attacker=True, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=100, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, batch_size=64, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=True, lr_decay_rate=0.999, state_length=1, alternating_optimization=True, alternating_period=500, opponent_pool=True, opponent_pool_config=opponent_pool_config, normalize_features=False, merged_ad_features=True, zero_mean_features=False) env_name = "idsgame-v19" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.ACTOR_CRITIC_AGENT.value, defender_type=AgentType.ACTOR_CRITIC_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="Actor-Critic vs Actor-Critic", run_many=False, random_seeds=[0, 999, 299, 399, 499], random_seed=0) #client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ q_agent_config = QAgentConfig( gamma=0.999, alpha=0.0005, epsilon=1, render=False, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=100, epsilon_decay=0.999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=20001, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=2000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", tab_full_state_space=True) env_name = "idsgame-minimal_defense-v13" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.TABULAR_Q_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, q_agent_config=q_agent_config, output_dir=default_output_dir(), title="TrainingQAgent vs DefendMinimalDefender", run_many=False, random_seeds=[0, 999, 299, 399, 499]) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=25000, attacker=False, defender=True, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 1) * 3, output_dim_attacker=4 * 3, input_dim_defender=(4 + 1) * 3, output_dim_defender=5 * 3, hidden_dim=32, num_hidden_layers=2, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=False, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95) env_name = "idsgame-maximal_attack-v17" client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_DEFENDER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="AttackMaximalAttacker vs OpenAI-PPO", run_many=False, random_seeds=[0, 999, 299, 399, 499]) # client_config = hp_tuning_config(client_config) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v19" pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.0001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=55000, attacker=False, defender=True, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=250, input_dim_attacker=((4 + 2) * 4), output_dim_attacker=(4 + 1) * 4, input_dim_defender=((4 + 1) * 4), output_dim_defender=5 * 4, hidden_dim=128, num_hidden_layers=2, pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1, vf_hidden_dim=128, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001, render_attacker_view=False, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32, multi_channel_obs=False, channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16, channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16, channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4, channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4, mini_batch_size=64, ar_policy=True, attacker_node_input_dim=((4 + 2) * 4), attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1), attacker_node_net_output_dim=4, defender_node_input_dim=((4 + 1) * 4), defender_at_net_input_dim=(4 + 1), defender_node_net_output_dim=4, defender_at_net_output_dim=5, defender_load_path= "/home/kim/workspace/gym-idsgame/experiments/manual_play/v19/maximal_attack/manual_vs_openai_ppo/1592125075.4390159_defender_node_policy_network.zip" ) client_config = ClientConfig( env_name=env_name, defender_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.MANUAL_ATTACKER.value, output_dir=default_output_dir(), title="ManualAttacker vs OpenAI PPO", pg_agent_config=pg_agent_config, bot_defender=True) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ env_name = "idsgame-v16" pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.00001, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=100, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=1, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=100000, attacker=True, defender=False, video_frequency=101, save_dir=default_output_dir() + "/results/data", checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 3, output_dim_attacker=4 * 3, input_dim_defender=(4 + 2) * 3, output_dim_defender=5 * 3, hidden_dim=64, num_hidden_layers=4, batch_size=2000, gpu=False, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=1, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, attacker_load_path= "/Users/kimham/workspace/rl/gym-idsgame/experiments/manual_play/v16/minimal_defense/manual_vs_openai_ppo/1590564756.5454807_attacker_policy_network.zip" ) client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(), title="OpenAI PPO vs ManualDefender", pg_agent_config=pg_agent_config, bot_attacker=True) return client_config
def default_config() -> ClientConfig: """ :return: Default configuration for the experiment """ pg_agent_config = PolicyGradientAgentConfig( gamma=1, alpha_attacker=0.00003, epsilon=1, render=False, alpha_defender=0.0001, eval_sleep=0.9, min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1, epsilon_decay=0.9999, video=True, eval_log_frequency=500, video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=100000000, eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs", eval_frequency=175000, attacker=True, defender=False, video_frequency=1001, save_dir=default_output_dir() + "/results/data", checkpoint_freq=250, input_dim_attacker=((4 + 2) * 2), output_dim_attacker=(4 + 1) * 2, input_dim_defender=((4 + 1) * 3), output_dim_defender=5 * 3, hidden_dim=32, num_hidden_layers=2, batch_size=2000, gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard", optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999, state_length=1, normalize_features=False, merged_ad_features=True, zero_mean_features=False, gpu_id=0, lstm_network=False, lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10, eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95, cnn_feature_extractor=False, features_dim=512, flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.00, render_attacker_view=True, lr_progress_power_decay=4, lr_progress_decay=True, use_sde=False, sde_sample_freq=4, one_hot_obs=False) # input_dim_attacker = (3, 3, 5), # output_dim_attacker = (5 * 2) * 3, # input_dim_defender = (3, 3, 5), # output_dim_defender = 6 * 3, # input_dim_attacker = ((5 * 2 + 1) * 3), # output_dim_attacker = (5 * 2) * 3, # input_dim_defender = ((5 + 1) * 3), # output_dim_defender = 6 * 3, env_name = "idsgame-minimal_defense-v18" client_config = ClientConfig( env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value, mode=RunnerMode.TRAIN_ATTACKER.value, pg_agent_config=pg_agent_config, output_dir=default_output_dir(), title="OpenAI-PPO vs DefendMinimalDefender", run_many=False, random_seeds=[0, 999, 299, 399, 499]) # client_config = hp_tuning_config(client_config) return client_config