def test_agent_training() -> None: dqn_learning_run = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=10, batch_size=512, learning_rate=0.01), # torch default is 1e-2 episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, # epsilon_multdecay=0.75, # 0.999, epsilon_exponential_decay=5000, # 10000 epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") assert dqn_learning_run random_run = learner.epsilon_greedy_search( cyberbattlechain, ep, learner=learner.RandomPolicy(), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search") assert random_run
def qlearning_run(gamma, gym_env): """Execute one run of the q-learning algorithm for the specified gamma value""" return learner.epsilon_greedy_search( gym_env, ep, a.QTabularLearner(ep, gamma=gamma, learning_rate=0.90, exploit_percentile=100), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_multdecay=0.75, # 0.999, epsilon_minimum=0.01, verbosity=Verbosity.Quiet, title="Q-learning" )
identifiers=ctf_env.identifiers ) # %% # Evaluate the Deep Q-learning agent dqn_learning_run = learner.epsilon_greedy_search( cyberbattle_gym_env=ctf_env, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=5, batch_size=512, learning_rate=0.01 # torch default learning rate is 1e-2 ), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, epsilon_exponential_decay=5000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, render=False, plot_episodes_length=False, title="DQL" ) # %% # initialize the environment current_o = ctf_env.reset()
training_episode_count = 50 eval_episode_count = 10 # %% # Run Deep Q-learning # 0.015 best_dqn_learning_run_10 = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain_10, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=10, batch_size=512, learning_rate=0.01), # torch default is 1e-2 episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, # epsilon_multdecay=0.75, # 0.999, epsilon_exponential_decay=5000, # 10000 epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") # %% Plot episode length p.plot_episodes_length([best_dqn_learning_run_10]) # %%
plt.show() # %% Plot episode length p.plot_episodes_length(qlearning_results) # %% nolearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=a.QTabularLearner(ep, trained=qlearning_bestrun_10['learner'], gamma=0.0, learning_rate=0.0, exploit_percentile=100), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=0.30, # 0.35, render=False, title="Exploiting Q-matrix", verbosity=Verbosity.Quiet ) # %% randomlearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=a.QTabularLearner(ep, trained=qlearning_bestrun_10['learner'], gamma=0.0, learning_rate=0.0, exploit_percentile=100), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random
maximum_node_count=12, identifiers=cyberbattlechain_10.identifiers ) iteration_count = 9000 training_episode_count = 50 eval_episode_count = 5 # %% credexplot = learner.epsilon_greedy_search( cyberbattlechain_10, learner=CredentialCacheExploiter(), environment_properties=ep, episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_multdecay=0.75, # 0.999, epsilon_minimum=0.01, verbosity=Verbosity.Quiet, title="Random+CredLookup" ) # %% randomlearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, environment_properties=ep, learner=CredentialCacheExploiter(), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random
maximum_node_count=22, identifiers=cyberbattlechain_defender.identifiers) iteration_count = 600 training_episode_count = 10 # %% dqn_with_defender = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain_defender, environment_properties=ep, learner=dqla.DeepQLearnerPolicy(ep=ep, gamma=0.15, replay_memory_size=10000, target_update=5, batch_size=256, learning_rate=0.01), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_exponential_decay=5000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") # %% dql_exploit_run = learner.epsilon_greedy_search( cyberbattlechain_defender, ep, learner=dqn_with_defender['learner'], episode_count=training_episode_count,
fe_example = w.RavelEncoding(ep, [w.Feature_active_node_properties(ep), w.Feature_discovered_node_count(ep)]) a = w.StateAugmentation(o0) w.Feature_discovered_ports(ep).get(a, None) fe_example.encode_at(a, 0) # %% # Evaluate a random agent that opportunistically exploits # credentials gathere in its local cache credlookup_run = learner.epsilon_greedy_search( gym_env, ep, learner=rca.CredentialCacheExploiter(), episode_count=10, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_exponential_decay=10000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="Credential lookups (ϵ-greedy)" ) # %% # Evaluate a Tabular Q-learning agent tabularq_run = learner.epsilon_greedy_search( gym_env, ep, learner=tqa.QTabularLearner( ep, gamma=0.015, learning_rate=0.01, exploit_percentile=100),
w.Feature_discovered_node_count(ep) ]) a = w.StateAugmentation(o0) w.Feature_discovered_ports(ep).get(a, None) fe_example.encode_at(a, 0) iteration_count = 9000 training_episode_count = 50 eval_episode_count = 5 # %% random_run = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=learner.RandomPolicy(), episode_count=10, # training_episode_count, iteration_count=iteration_count, epsilon=1.0, render=False, verbosity=Verbosity.Quiet, title="Random") # %% credlookup_run = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=rca.CredentialCacheExploiter(), episode_count=10, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_exponential_decay=10000,