target_update=10, batch_size=512, learning_rate=0.01), # torch default is 1e-2 episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, # epsilon_multdecay=0.75, # 0.999, epsilon_exponential_decay=5000, # 10000 epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") # %% Plot episode length p.plot_episodes_length([best_dqn_learning_run_10]) # %% if not os.path.exists("images"): os.mkdir("images") # %% dql_exploit_run = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=best_dqn_learning_run_10['learner'], episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=0.0, # 0.35, render=False, render_last_episode_rewards_to='images/chain10',
qlearning_results = [qlearning_run(gamma, cyberbattlechain_10) for gamma in gamma_sweep] qlearning_bestrun_10 = qlearning_results[0] # %% p.new_plot_loss() for results in qlearning_results: p.plot_all_episodes_loss(cast(a.QTabularLearner, results['learner']).loss_qsource.all_episodes, 'Q_source', results['title']) p.plot_all_episodes_loss(cast(a.QTabularLearner, results['learner']).loss_qattack.all_episodes, 'Q_attack', results['title']) plt.legend(loc="upper right") plt.show() # %% Plot episode length p.plot_episodes_length(qlearning_results) # %% nolearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=a.QTabularLearner(ep, trained=qlearning_bestrun_10['learner'], gamma=0.0, learning_rate=0.0, exploit_percentile=100), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=0.30, # 0.35, render=False, title="Exploiting Q-matrix",
# Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit themodel = dqla.CyberBattleStateActionModel(ep) p.plot_averaged_cummulative_rewards( all_runs=all_runs, title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n' f'State: {[f.name() for f in themodel.state_space.feature_selection]} ' f'({len(themodel.state_space.feature_selection)}\n' f"Action: abstract_action ({themodel.action_space.flat_size()})") # %% contenders = [ credlookup_run, tabularq_run, dql_run, dql_exploit_run ] p.plot_episodes_length(contenders) p.plot_averaged_cummulative_rewards( title=f'Agent Benchmark top contenders\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=contenders) # %% # Plot cumulative rewards for all episodes for r in contenders: p.plot_all_episodes(r) # %%
title="Random+CredLookup" ) # %% randomlearning_results = learner.epsilon_greedy_search( cyberbattlechain_10, environment_properties=ep, learner=CredentialCacheExploiter(), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search" ) # %% p.plot_episodes_length([credexplot]) p.plot_all_episodes(credexplot) all_runs = [credexplot, randomlearning_results ] p.plot_averaged_cummulative_rewards( title=f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n', all_runs=all_runs) # %%
learner=dql_run['learner'], episode_count=50, iteration_count=iteration_count, epsilon=0.00, epsilon_minimum=0.00, render=False, verbosity=Verbosity.Quiet, title="Exploiting DQL") # %% all_runs = [ random_run, credlookup_run, tabularq_run, tabularq_exploit_run, dql_run, dql_exploit_run ] p.plot_episodes_length(all_runs) p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=all_runs) # %% contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run] p.plot_episodes_length(contenders) p.plot_averaged_cummulative_rewards(title=f'Agent Benchmark top contenders\n' f'max_nodes:{ep.maximum_node_count}\n', all_runs=contenders) # %% for r in contenders: p.plot_all_episodes(r)