# set new weights agent.set_weights(new_weights) manager.set_agent(new_weights) # get new weights agent = manager.get_agent() # update aggregator time_steps = manager.test(test_steps) manager.update_aggregator(loss=loss, time_steps=time_steps) # print progress print( f"epoch ::: {e} loss ::: {loss} avg env steps ::: {np.mean(time_steps)}" ) # you can also alter your managers parameters if e % 5 == 0: epsilon = epsilon * .9 manager.set_epsilon(epsilon=epsilon) print(f"New epsilon: {epsilon}") # if e % saving_after == 0: # #you can save models # manager.save_model(saving_path, e) # and load mmodels # manager.load_model(saving_path) print("done") print("testing optimized agent") manager.test(test_steps, test_episodes=10, render=True, do_print=True)
# get and apply new weights new_weights = agent.get_weights() manager.set_agent(new_weights) # get new agent agent = manager.get_agent() # update aggregator time_steps = manager.test(test_steps, render=True if e % 5 == 0 else False) manager.update_aggregator(loss=losses_list, time_steps=time_steps) print( f"epoch ::: {e} loss ::: {np.mean([np.mean(l) for l in losses_list])} avg env steps ::: {np.mean(time_steps)}" ) manager.agg.save_graphic() print("---") # alter epsilon parameter new_epsilon = 0.90 + (0.05 / (e + 1)) manager.set_epsilon(epsilon=new_epsilon) # test after training print("done") print("testing optimized agent") manager.test(test_steps, test_episodes=10, render=True, do_print=True, evaluation_measure="time_and_reward")
np.mean(np.random.normal(size=(64, 100)), axis=0) for _ in range(1000) ] new_weights = agent.model.get_weights() # set new weights manager.set_agent(new_weights) # get new weights agent = manager.get_agent() # update aggregator time_steps = manager.test(test_steps) manager.update_aggregator(loss=dummy_losses, time_steps=time_steps) # print progress print( f"epoch ::: {e} loss ::: {np.mean([np.mean(l) for l in dummy_losses])} avg env steps ::: {np.mean(time_steps)}" ) # yeu can also alter your managers parameters manager.set_epsilon(epsilon=0.99) if e % saving_after == 0: # you can save models manager.save_model(saving_path, e) # and load mmodels manager.load_model(saving_path) print("done") print("testing optimized agent") manager.test(test_steps, test_episodes=10, render=True)
# get new weights agent = manager.get_agent() # update aggregator time_steps = manager.test(test_steps, test_episodes=50, render=False) manager.update_aggregator(loss=loss, time_steps=time_steps) print( f"epoch ::: {e} loss ::: {loss.numpy()} avg env steps ::: {np.mean(time_steps)}" ) # store performance mean_list.append(np.mean(time_steps)) # if performace was good 5 times in a row, stop training if np.mean(mean_list) > 195: break # Annealing epsilon if (e + 1) % 5 == 0: new_epsilon = 0.85 * manager.kwargs['epsilon'] manager.set_epsilon(new_epsilon) print("New Epsilon: ", new_epsilon) print("Done!") print("Testing optimized agent...") manager.set_epsilon(0.0) manager.test(test_steps, test_episodes=10, render=True, do_print=True)