Esempio n. 1
0
def check_if_q_table_stayed_the_same(qtable1, qtable2):
    q_variation = q_table_variation(qtable1, qtable2)
    if q_variation != 0:
        raise Exception("Q Learning changed after test", q_variation)
Esempio n. 2
0
     except NoActionPlayedError:
         print("\n!!! Agent was unbale to play an action !!!")
         av_reward = 0
     # except ServerDownError as e:
     #     print("\n!!! Server is Down !!!")
     #     print("iteration={}; trained_eps={}")
     #     print(str(e))
     #     break
     # check if agent trained correctly
     check_if_q_table_stayed_the_same(q_table_after_train, agent.q_table)
     sum_trained_eps = trained_eps_list[-1] + num_train_ep
     if agent.trained_eps != sum_trained_eps:
         raise Exception("Trained episodes and expected number do "
                         "not match")
     # Calc metrics:
     q_var = round(q_table_variation(prev_q_table, q_table_after_train), 4)
     print("<<TRAIN>> Q variation ", q_var)
     # Save metrics:
     trained_eps_list.append(sum_trained_eps)
     avr_epsilons_list.append(agent.epsilon)
     learning_rates.append(agent.learning_rate)
     avr_rewards_list.append(av_reward)
     qlearning_variation_list.append(q_var)
 print("\n\n!!!!!!!!! AGENT FINISHED !!!!!!!!!!!!\n\n")
 # Save and export metrics:
 save_model(q_table=agent.q_table,
            file_name="agent_model",
            directory=save_dir)
 export_metrics(trained_eps=trained_eps_list,
                rewards=avr_rewards_list,
                epsilons=avr_epsilons_list,
Esempio n. 3
0
                     features=features_manager,
                     actions=actions_manager,
                     reward_funct=reward_function)
    # Save metrics:
    trained_eps_list.append(0)
    avr_epsilons_list.append(agent.epsilon)
    avr_rewards_list.append(av_reward)
    q_tables_list.append(agent.q_table.copy().tolist())
    q_table_after_train = agent.q_table.copy()

    # Train - test iterations:
    for i in range(num_repetitions):
        print(">>>> {}/{} <<<<".format(i, num_repetitions))
        try:
            print("\n???? Check if q_table stayed the same after train: ",
                  q_table_variation(q_table_after_train, agent.q_table))
            prev_q_table = agent.q_table.copy()
            # Train:
            train(num_train_episodes=num_train_ep,
                  num_total_train_ep=num_train_ep * num_repetitions,
                  game_interface=hfo_interface,
                  features=features_manager,
                  agent=agent,
                  actions=actions_manager,
                  reward_funct=reward_function)
            q_table_after_train = agent.q_table.copy()
            q_tables_list.append(q_table_after_train.tolist())
            sum_trained_eps = trained_eps_list[-1] + num_train_ep
            print("???? Agent trained episodes ", sum_trained_eps)
            print("???? Difference between q tables after training: ",
                  q_table_variation(prev_q_table, q_table_after_train))
Esempio n. 4
0
 def save_metrics(self, old_q_table: np.ndarray, new_q_table: np.ndarray):
     self.rewards.append(self.cum_reward)
     self.eps_history.append(self.epsilon)
     self.lr_history.append(self.learning_rate)
     self.q_table_history.append(q_table_variation(old_q_table,
                                                   new_q_table))
Esempio n. 5
0
                   game_interface=hfo_interface,
                   features=features_manager,
                   agent=agent,
                   actions=actions_manager,
                   reward_funct=reward_function,
                   save_metrics=False)
     # Test:
     av_reward = test(num_episodes=num_test_ep,
                      agent=agent,
                      game_interface=hfo_interface,
                      features=features_manager,
                      actions=actions_manager,
                      reward_funct=reward_function)
     # Save metrics:
     q_table_variation_history.append(
         q_table_variation(prev_qtable, agent.q_table))
     epsilons_history.append(agent.epsilon)
     num_ep_trained = (num_train_ep * i) + num_train_ep
     avr_rewards.append(av_reward)
 actions_name = [
     actions_manager.map_action_to_str(i, has_ball=True)
     for i in range(agent.num_actions)
 ]
 # Export:
 export_test_metrics(eps_history=epsilons_history,
                     rewards=avr_rewards,
                     save_dir=agent.save_dir,
                     q_table_variation=q_table_variation_history,
                     actions_name=actions_name,
                     visited_states_matrix=agent.visited_states_counter,
                     training=False)