def test_agent_training() -> None: dqn_learning_run = learner.epsilon_greedy_search( cyberbattle_gym_env=cyberbattlechain, environment_properties=ep, learner=dqla.DeepQLearnerPolicy( ep=ep, gamma=0.015, replay_memory_size=10000, target_update=10, batch_size=512, learning_rate=0.01), # torch default is 1e-2 episode_count=training_episode_count, iteration_count=iteration_count, epsilon=0.90, render=False, # epsilon_multdecay=0.75, # 0.999, epsilon_exponential_decay=5000, # 10000 epsilon_minimum=0.10, verbosity=Verbosity.Quiet, title="DQL") assert dqn_learning_run random_run = learner.epsilon_greedy_search( cyberbattlechain, ep, learner=learner.RandomPolicy(), episode_count=training_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search") assert random_run
cyberbattlechain_10, ep, learner=best_dqn_learning_run_10['learner'], episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=0.0, # 0.35, render=False, render_last_episode_rewards_to='images/chain10', title="Exploiting DQL", verbosity=Verbosity.Quiet) # %% random_run = learner.epsilon_greedy_search( cyberbattlechain_10, ep, learner=learner.RandomPolicy(), episode_count=eval_episode_count, iteration_count=iteration_count, epsilon=1.0, # purely random render=False, verbosity=Verbosity.Quiet, title="Random search") # %% # Plot averaged cumulative rewards for DQL vs Random vs DQL-Exploit themodel = dqla.CyberBattleStateActionModel(ep) p.plot_averaged_cummulative_rewards( all_runs=[best_dqn_learning_run_10, random_run, dql_exploit_run], title= f'Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\n' f'State: {[f.name() for f in themodel.state_space.feature_selection]} '