Ejemplo n.º 1
0
 def init_table(self) -> dict:
     """Returns a dict with reward statistics for every combination of oao|ar"""
     table = {}
     a_length = self.environment.action_length
     o_length = self.environment.observation_length
     if o_length == 0:
         table[''] = {}
         for action_idx in range(pow(2, a_length)):
             action = Utility.get_bitstring_from_decimal(
                 action_idx, a_length)
             table[''][action] = {}
             table[''][action][''] = Utility.init_heap(a_length)
     else:
         for last_observation_idx in range(pow(2, o_length)):
             last_observation = Utility.get_bitstring_from_decimal(
                 last_observation_idx, o_length)
             table[last_observation] = {}
             for action_idx in range(pow(2, a_length)):
                 action = Utility.get_bitstring_from_decimal(
                     action_idx, a_length)
                 table[last_observation][action] = {}
                 for observation_idx in range(pow(2, o_length)):
                     observation = Utility.get_bitstring_from_decimal(
                         observation_idx, o_length)
                     table[last_observation][action][
                         observation] = Utility.init_heap(a_length)
     return table
Ejemplo n.º 2
0
def init_double_heap(length: int):
    """initialize heap with 2 actions for pi2forward agent"""
    reward_statistics = []
    for action_one_idx in range(pow(2, length)):
        action_one = Utility.get_bitstring_from_decimal(action_one_idx, length)
        for action_two_idx in range(pow(2, length)):
            action_two = Utility.get_bitstring_from_decimal(action_two_idx, length)
            # (expected_reward, actions, number_of_times)
            heapq.heappush(reward_statistics, (2, action_one + action_two, 0))
    return reward_statistics