ep, learner=learner.RandomPolicy(), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search") # %% # Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit themodel = dqla.CyberBattleStateActionModel(ep) p.plot_averaged_cummulative_rewards( all_runs=[best_dqn_learning_run_10, random_run, dql_exploit_run], title= f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n' f'State: {[f.name() for f in themodel.state_space.feature_selection]} ' f'({len(themodel.state_space.feature_selection)}\n' f"Action: abstract_action ({themodel.action_space.flat_size()})") # %% # plot cumulative rewards for all episodes p.plot_all_episodes(best_dqn_learning_run_10) ################################################## # %% # %% best_dqn_4 = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain_4, environment_properties=ep,
) # %% # Plot averaged cumulative rewards for Q-learning vs Random vs Q-Exploit all_runs = [*qlearning_results, randomlearning_results, nolearning_results ] Q_source_10 = cast(a.QTabularLearner, qlearning_bestrun_10['learner']).qsource Q_attack_10 = cast(a.QTabularLearner, qlearning_bestrun_10['learner']).qattack p.plot_averaged_cummulative_rewards( all_runs=all_runs, title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n' f'dimension={Q_source_10.state_space.flat_size()}x{Q_source_10.action_space.flat_size()}, ' f'{Q_attack_10.state_space.flat_size()}x{Q_attack_10.action_space.flat_size()}\n' f'Q1={[f.name() for f in Q_source_10.state_space.feature_selection]} ' f'-> {[f.name() for f in Q_source_10.action_space.feature_selection]})\n' f"Q2={[f.name() for f in Q_attack_10.state_space.feature_selection]} -> 'action'") # %% # plot cumulative rewards for all episodes p.plot_all_episodes(qlearning_results[0]) # %% # Plot the Q-matrices # %% # Print non-zero coordinate in the Q matrix Q_source
title="Random+CredLookup" ) # %% randomlearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, environment_properties=ep, learner=CredentialCacheExploiter(), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search" ) # %% p.plot_episodes_length([credexplot]) p.plot_all_episodes(credexplot) all_runs = [credexplot, randomlearning_results ] p.plot_averaged_cummulative_rewards( title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n', all_runs=all_runs) # %%
learner=rca.CredentialCacheExploiter(), episode_count=10, iteration_count=iteration_count, epsilon=0.90, render=False, epsilon_exponential_decay=10000, epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="Credential lookups (ϵ-greedy)") # %% # Plots all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run] p.plot_averaged_cummulative_rewards( all_runs=all_runs, title= f'Attacker agents vs Basic Defender -- rewards\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}' ) # p.plot_episodes_length(all_runs) p.plot_averaged_availability( title= f"Attacker agents vs Basic Defender -- availability\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}", all_runs=all_runs) # %% # %% # %%
# %% # Compare and plot results for all the agents all_runs = [ random_run, credlookup_run, tabularq_run, tabularq_exploit_run, dql_run, dql_exploit_run ] # Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit themodel = dqla.CyberBattleStateActionModel(ep) p.plot_averaged_cummulative_rewards( all_runs=all_runs, title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n' f'State: {[f.name() for f in themodel.state_space.feature_selection]} ' f'({len(themodel.state_space.feature_selection)}\n' f"Action: abstract_action ({themodel.action_space.flat_size()})") # %% contenders = [ credlookup_run, tabularq_run, dql_run, dql_exploit_run ] p.plot_episodes_length(contenders) p.plot_averaged_cummulative_rewards( title=f'Agent Benchmark top contenders\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=contenders)
iteration_count=iteration_count, epsilon=0.00, epsilon_minimum=0.00, render=False, verbosity=Verbosity.Quiet, title="Exploiting DQL") # %% all_runs = [ random_run, credlookup_run, tabularq_run, tabularq_exploit_run, dql_run, dql_exploit_run ] p.plot_episodes_length(all_runs) p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=all_runs) # %% contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run] p.plot_episodes_length(contenders) p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark top contenders\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=contenders) # %% for r in contenders: p.plot_all_episodes(r) # %%