Ejemplo n.º 1
0
def hp_tuning_config(client_config: ClientConfig) -> ClientConfig:
    """
    Setup config for hparam tuning

    :param client_config: the client config
    :return: the updated client config
    """
    client_config.hp_tuning = True
    client_config.hp_tuning_config = HpTuningConfig(param_1="alpha", param_2="num_hidden_layers",
                                                    alpha=[0.000001, 0.00001, 0.0001, 0.001, 0.01],
                                                    num_hidden_layers=[1, 2, 4, 8, 16])
    client_config.run_many = False
    return client_config
Ejemplo n.º 2
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.00001, epsilon=1, render=False, eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=100, train_log_frequency=100,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=350001,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=10000, attacker=True, defender=False, video_frequency=101,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=5000, input_dim_attacker=88, output_dim_attacker=80, hidden_dim=64,
                                                num_hidden_layers=1, batch_size=32,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1)
    env_name = "idsgame-minimal_defense-v7"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.ACTOR_CRITIC_AGENT.value,
                                 mode=RunnerMode.TRAIN_ATTACKER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="Actor-Critic vs DefendMinimalDefender",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499])
    #client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 3
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    q_agent_config = QAgentConfig(gamma=0.9,
                                  alpha=0.05,
                                  epsilon=1,
                                  render=False,
                                  eval_sleep=0.9,
                                  min_epsilon=0.1,
                                  eval_episodes=100,
                                  train_log_frequency=10,
                                  epsilon_decay=0.999,
                                  video=True,
                                  eval_log_frequency=1,
                                  video_fps=5,
                                  video_dir=default_output_dir() + "/videos",
                                  num_episodes=5000,
                                  eval_render=False,
                                  gifs=True,
                                  gif_dir=default_output_dir() + "/gifs",
                                  eval_frequency=1000,
                                  attacker=True,
                                  defender=False,
                                  video_frequency=101,
                                  save_dir=default_output_dir() + "/data")
    env_name = "idsgame-random_defense-v1"
    client_config = ClientConfig(env_name=env_name,
                                 attacker_type=AgentType.TABULAR_Q_AGENT.value,
                                 mode=RunnerMode.TRAIN_ATTACKER.value,
                                 q_agent_config=q_agent_config,
                                 output_dir=default_output_dir(),
                                 title="TrainingQAgent vs RandomDefender")
    return client_config
Ejemplo n.º 4
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    simulation_config = SimulationConfig(
        render=True,
        sleep=0.8,
        video=True,
        log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/videos",
        num_episodes=1000,
        gifs=True,
        gif_dir=default_output_dir() + "/gifs",
        video_frequency=1)
    q_agent_config = QAgentConfig(attacker_load_path=default_output_dir() +
                                  "/q_table/attacker_q_table.npy",
                                  defender_load_path=default_output_dir() +
                                  "/q_table/defender_q_table.npy")
    env_name = "idsgame-v3"
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.TABULAR_Q_AGENT.value,
        defender_type=AgentType.TABULAR_Q_AGENT.value,
        mode=RunnerMode.SIMULATE.value,
        simulation_config=simulation_config,
        output_dir=default_output_dir(),
        title="TabularQAgentAttacker vs TabularQAgentDefender",
        q_agent_config=q_agent_config,
        initial_state_path=default_output_dir() +
        "/initial_state/initial_state.pkl")
    return client_config
Ejemplo n.º 5
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    simulation_config = SimulationConfig(
        render=False,
        sleep=0.8,
        video=True,
        log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/videos",
        num_episodes=1000,
        gifs=True,
        gif_dir=default_output_dir() + "/gifs",
        video_frequency=1)
    env_name = "idsgame-v3"
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.RANDOM.value,
        defender_type=AgentType.DEFEND_MINIMAL_VALUE.value,
        mode=RunnerMode.SIMULATE.value,
        simulation_config=simulation_config,
        output_dir=default_output_dir(),
        title="RandomAttacker vs DefendMinimalDefender")
    return client_config
Ejemplo n.º 6
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.001, epsilon=1, render=False, eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=100, train_log_frequency=100,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=2200001,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=1000, attacker=True, defender=False, video_frequency=101,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=5000, input_dim_attacker=(4 + 2) * 3 * 1, output_dim_attacker=4 * 3,
                                                hidden_dim=32,
                                                num_hidden_layers=1, batch_size=8,
                                                gpu=True, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1, normalize_features=False, merged_ad_features=True,
                                                zero_mean_features=False, gpu_id=0)
    #input_dim = (4 + 3) * 2, output_dim_attacker = 4 * 2,
    env_name = "idsgame-random_defense-v14"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.REINFORCE_AGENT.value,
                                 mode=RunnerMode.TRAIN_ATTACKER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="TrainingREINFORCEAgent vs RandomDefender",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499])
    client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 7
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    dqn_config = DQNConfig(input_dim=242, defender_output_dim=242, attacker_output_dim=220, hidden_dim=64,
                           replay_memory_size=10000,
                           num_hidden_layers=1,
                           replay_start_size=1000, batch_size=32, target_network_update_freq=1000,
                           gpu=True, tensorboard=True, tensorboard_dir=default_output_dir() + "/results/tensorboard",
                           loss_fn="Huber", optimizer="Adam", lr_exp_decay=True, lr_decay_rate=0.9999)
    q_agent_config = QAgentConfig(gamma=0.999, alpha=0.00001, epsilon=1, render=False, eval_sleep=0.9,
                                  min_epsilon=0.01, eval_episodes=100, train_log_frequency=100,
                                  epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                  video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=20001,
                                  eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs",
                                  eval_frequency=1000, attacker=True, defender=True, video_frequency=101,
                                  save_dir=default_output_dir() + "/results/data", dqn_config=dqn_config,
                                  checkpoint_freq=5000)
    env_name = "idsgame-v4"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.DQN_AGENT.value,
                                 defender_type=AgentType.DQN_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value,
                                 q_agent_config=q_agent_config, output_dir=default_output_dir(),
                                 title="TrainingDQNAgent vs TrainingDQNAgent",
                                 run_many=True, random_seeds=[0, 999, 299, 399, 499])
    return client_config
Ejemplo n.º 8
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    q_agent_config = QAgentConfig(gamma=0.999, alpha=0.0005, epsilon=1, render=False, eval_sleep=0.9,
                                  min_epsilon=0.01, eval_episodes=100, train_log_frequency=100,
                                  epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                  video_fps=5, video_dir=default_output_dir() + "/results/videos", num_episodes=20001,
                                  eval_render=False, gifs=True, gif_dir=default_output_dir() + "/results/gifs",
                                  eval_frequency=1000, attacker=False, defender=True,
                                  video_frequency=101,
                                  save_dir=default_output_dir() + "/results/data")
    env_name = "idsgame-maximal_attack-v8"
    client_config = ClientConfig(env_name=env_name, defender_type=AgentType.TABULAR_Q_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER.value,
                                 q_agent_config=q_agent_config, output_dir=default_output_dir(),
                                 title="AttackMaximalAttacker vs TrainingQAgent",
                                 run_many=True, random_seeds=[0, 999, 299, 399, 499])

    # client_config.hp_tuning = True
    # client_config.hp_tuning_config = HpTuningConfig(param_1="alpha", param_2="epsilon_decay",
    #                                                 alpha=[0.0001, 0.001, 0.003, 0.05, 0.03],
    #                                                 epsilon_decay=[0.99999, 0.9999, 0.999, 0.99, 0.9])
    # client_config.run_many = False

    return client_config
Ejemplo n.º 9
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-minimal_defense-v18"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.MANUAL_ATTACK.value,
                                 mode=RunnerMode.MANUAL_ATTACKER.value, output_dir=default_output_dir(),
                                 title="ManualAttacker vs DefendMinimalDefender")
    return client_config
Ejemplo n.º 10
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.0001, epsilon=1, render=False,
                                                alpha_defender=0.0001,
                                                eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=500,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=100000000,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=550000000, attacker=False, defender=True,
                                                video_frequency=1001,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=100,
                                                input_dim_attacker=((4 + 2) * 4),
                                                output_dim_attacker=(4 + 1) * 4,
                                                input_dim_defender=((4 + 1) * 4),
                                                output_dim_defender=5 * 4,
                                                hidden_dim=128, num_hidden_layers=2,
                                                pi_hidden_layers=1, pi_hidden_dim=128, vf_hidden_layers=1,
                                                vf_hidden_dim=128,
                                                batch_size=2000,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1, normalize_features=False, merged_ad_features=True,
                                                zero_mean_features=False, gpu_id=0, lstm_network=False,
                                                lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10,
                                                eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95,
                                                cnn_feature_extractor=False, features_dim=512,
                                                flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.001,
                                                render_attacker_view=False, lr_progress_power_decay=4,
                                                lr_progress_decay=True, use_sde=False, sde_sample_freq=4,
                                                one_hot_obs=False, lstm_core=False, lstm_hidden_dim=32,
                                                multi_channel_obs=False,
                                                channel_1_dim=32, channel_1_layers=2, channel_1_input_dim=16,
                                                channel_2_dim=32, channel_2_layers=2, channel_2_input_dim=16,
                                                channel_3_dim=32, channel_3_layers=2, channel_3_input_dim=4,
                                                channel_4_dim=32, channel_4_layers=2, channel_4_input_dim=4,
                                                mini_batch_size=64, ar_policy=True,
                                                attacker_node_input_dim=((4 + 2) * 4),
                                                attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1),
                                                attacker_node_net_output_dim=4,
                                                defender_node_input_dim=((4+1)*4), defender_at_net_input_dim=(4+1),
                                                defender_node_net_output_dim=4, defender_at_net_output_dim=5)
    env_name = "idsgame-maximal_attack-v20"
    client_config = ClientConfig(env_name=env_name,
                                 defender_type=AgentType.PPO_OPENAI_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="AttackMaximalAttacker vs OpenAI-PPO",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499], random_seed=999)
    # client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 11
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-random_attack-v4"
    client_config = ClientConfig(env_name=env_name, defender_type=AgentType.MANUAL_DEFENSE.value,
                                 mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(),
                                 title="RandomAttacker vs ManualDefender")
    return client_config
Ejemplo n.º 12
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-v14"
    pg_agent_config = PolicyGradientAgentConfig(
        gamma=0.999,
        alpha_attacker=0.001,
        epsilon=1,
        render=False,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=100,
        train_log_frequency=100,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=10000,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=1000,
        attacker=True,
        defender=False,
        video_frequency=101,
        save_dir=default_output_dir() + "/results/data",
        checkpoint_freq=1000,
        input_dim_attacker=(4 + 3) * 2,
        output_dim_attacker=4 * 2,
        hidden_dim=32,
        num_hidden_layers=1,
        batch_size=8,
        gpu=True,
        tensorboard=True,
        tensorboard_dir=default_output_dir() + "/results/tensorboard",
        optimizer="Adam",
        lr_exp_decay=False,
        lr_decay_rate=0.999,
        state_length=1,
        normalize_features=False,
        merged_ad_features=True,
        zero_mean_features=False,
        gpu_id=0,
        attacker_load_path=
        "/Users/kimham/workspace/rl/gym-idsgame/experiments/training/v14/minimal_defense/reinforce/results/data/0/1589370657.761538_attacker_policy_network.pt"
    )
    client_config = ClientConfig(env_name=env_name,
                                 attacker_type=AgentType.REINFORCE_AGENT.value,
                                 mode=RunnerMode.MANUAL_DEFENDER.value,
                                 output_dir=default_output_dir(),
                                 title="REINFORCE vs ManualDefender",
                                 pg_agent_config=pg_agent_config,
                                 bot_attacker=True)
    return client_config
Ejemplo n.º 13
0
def test_sim_random_vs_random(version) -> ClientConfig:
    simulation_config = SimulationConfig(log_frequency=1, num_episodes=10)
    env_name = "idsgame-v" + str(version)
    client_config = ClientConfig(env_name=env_name,
                                 attacker_type=AgentType.RANDOM.value,
                                 defender_type=AgentType.RANDOM.value,
                                 mode=RunnerMode.SIMULATE.value,
                                 simulation_config=simulation_config,
                                 output_dir=default_output_dir(),
                                 title="RandomAttacker vs RandomDefender")
    Runner.run(client_config)
Ejemplo n.º 14
0
def test_sim_random_vs_defend_minimal(version) -> ClientConfig:
    simulation_config = SimulationConfig(log_frequency=1, num_episodes=10)
    env_name = "idsgame-v" + str(version)
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.RANDOM.value,
        defender_type=AgentType.DEFEND_MINIMAL_VALUE.value,
        mode=RunnerMode.SIMULATE.value,
        simulation_config=simulation_config,
        output_dir=default_output_dir())
    Runner.run(client_config)
Ejemplo n.º 15
0
def test_train_random_attack_tabular_q_learning(version) -> ClientConfig:
    q_agent_config = QAgentConfig(num_episodes=10,
                                  eval_frequency=100,
                                  attacker=False,
                                  defender=True)
    env_name = "idsgame-random_attack-v" + str(version)
    client_config = ClientConfig(env_name=env_name,
                                 defender_type=AgentType.TABULAR_Q_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER.value,
                                 q_agent_config=q_agent_config,
                                 output_dir=default_output_dir())
    Runner.run(client_config)
Ejemplo n.º 16
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000,
                                              pool_increment_period=300,
                                              head_to_head_period=1,
                                              quality_scores=True,
                                              quality_score_eta=0.01,
                                              initial_quality=1000,
                                              pool_prob=0.5)
    pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.000008, epsilon=1, render=False,
                                                alpha_defender=0.000003,
                                                eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=1000, train_log_frequency=1,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=500,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=100000000,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=175000, attacker=True, defender=True,
                                                video_frequency=1001,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=250,
                                                input_dim_attacker=((4 + 2) * 2)*1,
                                                output_dim_attacker=(4 + 1) * 2,
                                                input_dim_defender=((4 + 1) * 3)*1,
                                                output_dim_defender=5 * 3,
                                                hidden_dim=64,
                                                num_hidden_layers=4, batch_size=2000,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1, normalize_features=False, merged_ad_features=True,
                                                zero_mean_features=False, gpu_id=0, lstm_network=False,
                                                lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10,
                                                eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95,
                                                cnn_feature_extractor=False, features_dim=512,
                                                flatten_feature_planes=False, cnn_type=5, vf_coef=0.5, ent_coef=0.01,
                                                render_attacker_view=False, lr_progress_power_decay=4,
                                                lr_progress_decay=True, use_sde=False, sde_sample_freq=4,
                                                opponent_pool=True, opponent_pool_config=opponent_pool_config,
                                                alternating_optimization=True, alternating_period=300,
                                                baselines_in_pool=True)
    env_name = "idsgame-v18"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value,
                                 defender_type=AgentType.PPO_OPENAI_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="OpenAI-PPO vs OpenAI-PPO",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499])
    # client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 17
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-v19"
    opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000,
                                              pool_increment_period=50,
                                              head_to_head_period=1,
                                              quality_scores=True,
                                              quality_score_eta=0.01,
                                              initial_quality=1000,
                                              pool_prob=0.5)
    pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.00001, epsilon=1, render=False,
                                                alpha_defender=0.0001,
                                                eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=100, train_log_frequency=1,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=100000000,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=100000, attacker=True, defender=False,
                                                video_frequency=101,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=5000,
                                                input_dim_attacker=(4 + 2) * 2,
                                                output_dim_attacker=(4+1) * 2,
                                                input_dim_defender=(4 + 2) * 3,
                                                output_dim_defender=5 * 3,
                                                hidden_dim=64,
                                                num_hidden_layers=4, batch_size=2000,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1, normalize_features=False, merged_ad_features=True,
                                                zero_mean_features=False, gpu_id=0, lstm_network=False,
                                                lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10,
                                                eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95,
                                                cnn_feature_extractor=False, features_dim=512,
                                                flatten_feature_planes=False,
                                                attacker_load_path="/home/kim/storage/workspace/gym-idsgame/experiments/manual_play/v19/minimal_defense/manual_vs_openai_ppo/v4/1591164917.874881_attacker_policy_network.zip",
                                                ar_policy=True, attacker_node_input_dim=((4 + 2) * 4),
                                                attacker_at_net_input_dim=(4 + 2), attacker_at_net_output_dim=(4 + 1),
                                                attacker_node_net_output_dim=4
                                                )
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value,
                                 mode=RunnerMode.MANUAL_DEFENDER.value, output_dir=default_output_dir(),
                                 title="OpenAI PPO vs ManualDefender", pg_agent_config=pg_agent_config,
                                 bot_attacker=True)
    return client_config
Ejemplo n.º 18
0
def setup_train(config: ClientConfig, random_seed):
    time_str = str(time.time())
    util.create_artefact_dirs(config.output_dir, random_seed)
    logger = util.setup_logger("tabular_q_vs_random_defense-v3",
                               config.output_dir + "/results/logs/" +
                               str(random_seed) + "/",
                               time_str=time_str)
    config.q_agent_config.save_dir = default_output_dir(
    ) + "/results/data/" + str(random_seed) + "/"
    config.q_agent_config.video_dir = default_output_dir(
    ) + "/results/videos/" + str(random_seed) + "/"
    config.q_agent_config.gif_dir = default_output_dir(
    ) + "/results/gifs/" + str(random_seed) + "/"
    # config.q_agent_config.dqn_config.tensorboard_dir = default_output_dir() + "/results/tensorboard/" \
    #                                                    + str(random_seed) + "/"
    config.logger = logger
    config.q_agent_config.logger = logger
    config.q_agent_config.random_seed = random_seed
    config.random_seed = random_seed
    config.q_agent_config.to_csv(config.output_dir +
                                 "/results/hyperparameters/" +
                                 str(random_seed) + "/" + time_str + ".csv")
    return time_str
Ejemplo n.º 19
0
def setup_agent_defender() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    q_agent_config = QAgentConfig(
        gamma=0.999,
        alpha=0.0005,
        epsilon=1,
        render=False,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=100,
        train_log_frequency=50,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=5001,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=1000,
        attacker=False,
        defender=True,
        video_frequency=101,
        save_dir=default_output_dir() + "/results/data",
        a_mu0=-10,
        a_lambda0=4.0,
        a_alpha0=3.0,
        a_beta0=3.0,
        num_mixture_samples=1000)
    # env_name = "idsgame-random_defense-v3"
    env_name = "idsgame-maximal_attack-v3"
    client_config = ClientConfig(
        env_name=env_name,
        defender_type=AgentType.TABULAR_Q_AGENT.value,
        mode=RunnerMode.TRAIN_DEFENDER.value,
        q_agent_config=q_agent_config,
        output_dir=default_output_dir(),
        title="TrainingQAgent vs DefendMinimalDefender",
        run_many=True,
        random_seeds=[0, 999, 299])
    env = gym.make(client_config.env_name,
                   idsgame_config=client_config.idsgame_config,
                   save_dir=client_config.output_dir + "/results/data/" +
                   str(client_config.random_seed),
                   initial_state_path=client_config.initial_state_path)
    defender = BQLAgent(env, client_config.q_agent_config)
    return defender, client_config, env
Ejemplo n.º 20
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(
        gamma=0.999,
        alpha_attacker=0.00001,
        epsilon=1,
        render=False,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=100,
        train_log_frequency=100,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=200001,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=10000,
        attacker=False,
        defender=True,
        video_frequency=101,
        save_dir=default_output_dir() + "/results/data",
        checkpoint_freq=5000,
        input_dim_attacker=33,
        output_dim_defender=33,
        hidden_dim=64,
        num_hidden_layers=1,
        batch_size=32,
        gpu=False,
        tensorboard=True,
        tensorboard_dir=default_output_dir() + "/results/tensorboard",
        optimizer="Adam",
        lr_exp_decay=False,
        lr_decay_rate=0.999)
    env_name = "idsgame-maximal_attack-v8"
    client_config = ClientConfig(env_name=env_name,
                                 defender_type=AgentType.REINFORCE_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER.value,
                                 pg_agent_config=pg_agent_config,
                                 output_dir=default_output_dir(),
                                 title="AttackMaximalAttacker vs REINFORCE",
                                 run_many=False,
                                 random_seeds=[0, 999, 299, 399, 499])
    client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 21
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    opponent_pool_config = OpponentPoolConfig(pool_maxsize=100000,
                                              pool_increment_period=50,
                                              head_to_head_period=1,
                                              quality_scores=True,
                                              quality_score_eta=0.01,
                                              initial_quality=1000,
                                              pool_prob=0.5)

    pg_agent_config = PolicyGradientAgentConfig(gamma=0.999, alpha_attacker=0.0001, alpha_defender=0.0001,
                                                epsilon=1, render=False, eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=100, train_log_frequency=1,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=1350001,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=20000, attacker=True, defender=True, video_frequency=101,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=100,
                                                input_dim_attacker=((4 + 2) * 4),
                                                output_dim_attacker=(4 + 1) * 4,
                                                input_dim_defender=((4 + 1) * 4),
                                                output_dim_defender=5 * 4,
                                                hidden_dim=128,
                                                num_hidden_layers=2, batch_size=64,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=True, lr_decay_rate=0.999,
                                                state_length=1, alternating_optimization=True,
                                                alternating_period=500, opponent_pool=True,
                                                opponent_pool_config=opponent_pool_config,
                                                normalize_features=False, merged_ad_features=True,
                                                zero_mean_features=False)
    env_name = "idsgame-v19"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.ACTOR_CRITIC_AGENT.value,
                                 defender_type=AgentType.ACTOR_CRITIC_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER_AND_ATTACKER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="Actor-Critic vs Actor-Critic",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499],
                                 random_seed=0)
    #client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 22
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    q_agent_config = QAgentConfig(
        gamma=0.999,
        alpha=0.0005,
        epsilon=1,
        render=False,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=100,
        train_log_frequency=100,
        epsilon_decay=0.999,
        video=True,
        eval_log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=20001,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=2000,
        attacker=True,
        defender=False,
        video_frequency=101,
        save_dir=default_output_dir() + "/results/data",
        tab_full_state_space=True)
    env_name = "idsgame-minimal_defense-v13"
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.TABULAR_Q_AGENT.value,
        mode=RunnerMode.TRAIN_ATTACKER.value,
        q_agent_config=q_agent_config,
        output_dir=default_output_dir(),
        title="TrainingQAgent vs DefendMinimalDefender",
        run_many=False,
        random_seeds=[0, 999, 299, 399, 499])
    return client_config
Ejemplo n.º 23
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(gamma=1, alpha_attacker=0.0001, epsilon=1, render=False,
                                                alpha_defender=0.0001,
                                                eval_sleep=0.9,
                                                min_epsilon=0.01, eval_episodes=100, train_log_frequency=1,
                                                epsilon_decay=0.9999, video=True, eval_log_frequency=1,
                                                video_fps=5, video_dir=default_output_dir() + "/results/videos",
                                                num_episodes=100000000,
                                                eval_render=False, gifs=True,
                                                gif_dir=default_output_dir() + "/results/gifs",
                                                eval_frequency=25000, attacker=False, defender=True,
                                                video_frequency=101,
                                                save_dir=default_output_dir() + "/results/data",
                                                checkpoint_freq=5000, input_dim_attacker=(4 + 1) * 3,
                                                output_dim_attacker=4 * 3,
                                                input_dim_defender=(4 + 1) * 3,
                                                output_dim_defender=5 * 3,
                                                hidden_dim=32,
                                                num_hidden_layers=2, batch_size=2000,
                                                gpu=False, tensorboard=True,
                                                tensorboard_dir=default_output_dir() + "/results/tensorboard",
                                                optimizer="Adam", lr_exp_decay=False, lr_decay_rate=0.999,
                                                state_length=1, normalize_features=False, merged_ad_features=False,
                                                zero_mean_features=False, gpu_id=0, lstm_network=False,
                                                lstm_seq_length=4, num_lstm_layers=2, optimization_iterations=10,
                                                eps_clip=0.2, max_gradient_norm=0.5, gae_lambda=0.95)
    env_name = "idsgame-maximal_attack-v17"
    client_config = ClientConfig(env_name=env_name, attacker_type=AgentType.PPO_OPENAI_AGENT.value,
                                 defender_type=AgentType.PPO_OPENAI_AGENT.value,
                                 mode=RunnerMode.TRAIN_DEFENDER.value,
                                 pg_agent_config=pg_agent_config, output_dir=default_output_dir(),
                                 title="AttackMaximalAttacker vs OpenAI-PPO",
                                 run_many=False, random_seeds=[0, 999, 299, 399, 499])
    # client_config = hp_tuning_config(client_config)
    return client_config
Ejemplo n.º 24
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-v19"
    pg_agent_config = PolicyGradientAgentConfig(
        gamma=1,
        alpha_attacker=0.0001,
        epsilon=1,
        render=False,
        alpha_defender=0.0001,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=1000,
        train_log_frequency=1,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=500,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=100000000,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=55000,
        attacker=False,
        defender=True,
        video_frequency=1001,
        save_dir=default_output_dir() + "/results/data",
        checkpoint_freq=250,
        input_dim_attacker=((4 + 2) * 4),
        output_dim_attacker=(4 + 1) * 4,
        input_dim_defender=((4 + 1) * 4),
        output_dim_defender=5 * 4,
        hidden_dim=128,
        num_hidden_layers=2,
        pi_hidden_layers=1,
        pi_hidden_dim=128,
        vf_hidden_layers=1,
        vf_hidden_dim=128,
        batch_size=2000,
        gpu=False,
        tensorboard=True,
        tensorboard_dir=default_output_dir() + "/results/tensorboard",
        optimizer="Adam",
        lr_exp_decay=False,
        lr_decay_rate=0.999,
        state_length=1,
        normalize_features=False,
        merged_ad_features=True,
        zero_mean_features=False,
        gpu_id=0,
        lstm_network=False,
        lstm_seq_length=4,
        num_lstm_layers=2,
        optimization_iterations=10,
        eps_clip=0.2,
        max_gradient_norm=0.5,
        gae_lambda=0.95,
        cnn_feature_extractor=False,
        features_dim=512,
        flatten_feature_planes=False,
        cnn_type=5,
        vf_coef=0.5,
        ent_coef=0.001,
        render_attacker_view=False,
        lr_progress_power_decay=4,
        lr_progress_decay=True,
        use_sde=False,
        sde_sample_freq=4,
        one_hot_obs=False,
        lstm_core=False,
        lstm_hidden_dim=32,
        multi_channel_obs=False,
        channel_1_dim=32,
        channel_1_layers=2,
        channel_1_input_dim=16,
        channel_2_dim=32,
        channel_2_layers=2,
        channel_2_input_dim=16,
        channel_3_dim=32,
        channel_3_layers=2,
        channel_3_input_dim=4,
        channel_4_dim=32,
        channel_4_layers=2,
        channel_4_input_dim=4,
        mini_batch_size=64,
        ar_policy=True,
        attacker_node_input_dim=((4 + 2) * 4),
        attacker_at_net_input_dim=(4 + 2),
        attacker_at_net_output_dim=(4 + 1),
        attacker_node_net_output_dim=4,
        defender_node_input_dim=((4 + 1) * 4),
        defender_at_net_input_dim=(4 + 1),
        defender_node_net_output_dim=4,
        defender_at_net_output_dim=5,
        defender_load_path=
        "/home/kim/workspace/gym-idsgame/experiments/manual_play/v19/maximal_attack/manual_vs_openai_ppo/1592125075.4390159_defender_node_policy_network.zip"
    )
    client_config = ClientConfig(
        env_name=env_name,
        defender_type=AgentType.PPO_OPENAI_AGENT.value,
        mode=RunnerMode.MANUAL_ATTACKER.value,
        output_dir=default_output_dir(),
        title="ManualAttacker vs OpenAI PPO",
        pg_agent_config=pg_agent_config,
        bot_defender=True)
    return client_config
Ejemplo n.º 25
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    env_name = "idsgame-v16"
    pg_agent_config = PolicyGradientAgentConfig(
        gamma=1,
        alpha_attacker=0.00001,
        epsilon=1,
        render=False,
        alpha_defender=0.0001,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=100,
        train_log_frequency=1,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=1,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=100000000,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=100000,
        attacker=True,
        defender=False,
        video_frequency=101,
        save_dir=default_output_dir() + "/results/data",
        checkpoint_freq=5000,
        input_dim_attacker=(4 + 2) * 3,
        output_dim_attacker=4 * 3,
        input_dim_defender=(4 + 2) * 3,
        output_dim_defender=5 * 3,
        hidden_dim=64,
        num_hidden_layers=4,
        batch_size=2000,
        gpu=False,
        tensorboard=True,
        tensorboard_dir=default_output_dir() + "/results/tensorboard",
        optimizer="Adam",
        lr_exp_decay=False,
        lr_decay_rate=0.999,
        state_length=1,
        normalize_features=False,
        merged_ad_features=True,
        zero_mean_features=False,
        gpu_id=1,
        lstm_network=False,
        lstm_seq_length=4,
        num_lstm_layers=2,
        optimization_iterations=10,
        eps_clip=0.2,
        max_gradient_norm=0.5,
        gae_lambda=0.95,
        cnn_feature_extractor=False,
        features_dim=512,
        flatten_feature_planes=False,
        attacker_load_path=
        "/Users/kimham/workspace/rl/gym-idsgame/experiments/manual_play/v16/minimal_defense/manual_vs_openai_ppo/1590564756.5454807_attacker_policy_network.zip"
    )
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.PPO_OPENAI_AGENT.value,
        mode=RunnerMode.MANUAL_DEFENDER.value,
        output_dir=default_output_dir(),
        title="OpenAI PPO vs ManualDefender",
        pg_agent_config=pg_agent_config,
        bot_attacker=True)
    return client_config
Ejemplo n.º 26
0
def default_config() -> ClientConfig:
    """
    :return: Default configuration for the experiment
    """
    pg_agent_config = PolicyGradientAgentConfig(
        gamma=1,
        alpha_attacker=0.00003,
        epsilon=1,
        render=False,
        alpha_defender=0.0001,
        eval_sleep=0.9,
        min_epsilon=0.01,
        eval_episodes=1000,
        train_log_frequency=1,
        epsilon_decay=0.9999,
        video=True,
        eval_log_frequency=500,
        video_fps=5,
        video_dir=default_output_dir() + "/results/videos",
        num_episodes=100000000,
        eval_render=False,
        gifs=True,
        gif_dir=default_output_dir() + "/results/gifs",
        eval_frequency=175000,
        attacker=True,
        defender=False,
        video_frequency=1001,
        save_dir=default_output_dir() + "/results/data",
        checkpoint_freq=250,
        input_dim_attacker=((4 + 2) * 2),
        output_dim_attacker=(4 + 1) * 2,
        input_dim_defender=((4 + 1) * 3),
        output_dim_defender=5 * 3,
        hidden_dim=32,
        num_hidden_layers=2,
        batch_size=2000,
        gpu=True,
        tensorboard=True,
        tensorboard_dir=default_output_dir() + "/results/tensorboard",
        optimizer="Adam",
        lr_exp_decay=False,
        lr_decay_rate=0.999,
        state_length=1,
        normalize_features=False,
        merged_ad_features=True,
        zero_mean_features=False,
        gpu_id=0,
        lstm_network=False,
        lstm_seq_length=4,
        num_lstm_layers=2,
        optimization_iterations=10,
        eps_clip=0.2,
        max_gradient_norm=0.5,
        gae_lambda=0.95,
        cnn_feature_extractor=False,
        features_dim=512,
        flatten_feature_planes=False,
        cnn_type=5,
        vf_coef=0.5,
        ent_coef=0.00,
        render_attacker_view=True,
        lr_progress_power_decay=4,
        lr_progress_decay=True,
        use_sde=False,
        sde_sample_freq=4,
        one_hot_obs=False)
    # input_dim_attacker = (3, 3, 5),
    # output_dim_attacker = (5 * 2) * 3,
    # input_dim_defender = (3, 3, 5),
    # output_dim_defender = 6 * 3,
    # input_dim_attacker = ((5 * 2 + 1) * 3),
    # output_dim_attacker = (5 * 2) * 3,
    # input_dim_defender = ((5 + 1) * 3),
    # output_dim_defender = 6 * 3,
    env_name = "idsgame-minimal_defense-v18"
    client_config = ClientConfig(
        env_name=env_name,
        attacker_type=AgentType.PPO_OPENAI_AGENT.value,
        mode=RunnerMode.TRAIN_ATTACKER.value,
        pg_agent_config=pg_agent_config,
        output_dir=default_output_dir(),
        title="OpenAI-PPO vs DefendMinimalDefender",
        run_many=False,
        random_seeds=[0, 999, 299, 399, 499])
    # client_config = hp_tuning_config(client_config)
    return client_config