Ejemplo n.º 1
0
def trial(pair: Tuple[Type[Agent], Type[Environment]],
          sign_bit: str,
          num_trials: int = 100,
          num_cycles: int = 10000,
          seed: int = 1) -> Tuple[str, str, str, list]:
    """Trial agent in environment"""
    agent_class, environment_class = pair[0], pair[1]
    ag_name, env_name = agent_class.__name__, environment_class.__name__
    rewards = []
    sign = "positive" if sign_bit == "0" else "negative"
    a, m = 16807, (pow(2, 31) - 1)
    for trial_idx in range(num_trials):
        total_reward = 0
        environment = environment_class(sign_bit, seed=seed)
        seed = (seed * a) % m
        agent = agent_class(environment=environment, seed=seed)
        seed = (seed * a) % m
        observation = "0" * environment.observation_length
        for cycle_idx in range(num_cycles):
            action = agent.calculate_action(observation)
            observation, reward = environment.calculate_percept(action)
            total_reward += Utility.get_reward_from_bitstring(reward)
            agent.train(reward)
        total_reward /= num_cycles * environment_class.max_average_reward_per_cycle
        rewards.append(total_reward)
    return ag_name, env_name, sign, rewards
Ejemplo n.º 2
0
 def calculate_action(self, observation: str) \
         -> str:
     """Feed percept into nn and calculate best activations action. Returns action."""
     action = ""
     reward = -2
     # calculate expected reward by trying every action
     number_of_actions = pow(2, self.environment.action_length)
     if self.seeded_rand_range(0, 10) == 0:
         action_idx = self.seeded_rand_range(0, number_of_actions)
         action_string = format(action_idx, 'b').zfill(self.environment.action_length)
         nn_input = NNUtility.bitstr_to_narray(observation + action_string)
         nn_output = self.nn.forward(nn_input)
         action = action_string
         self.activations = nn_output
     else:
         for action_idx in range(number_of_actions):
             action_string = format(action_idx, 'b').zfill(self.environment.action_length)
             nn_input = NNUtility.bitstr_to_narray(observation + action_string)
             nn_output = self.nn.forward(nn_input)
             reward_string = NNUtility.narray_to_bitstr(nn_output[1][-1])
             action_reward = Utility.get_reward_from_bitstring(reward_string)
             if action_reward == reward:
                 i = self.seeded_rand_range()
                 if i == 1:
                     action = action_string
                     self.activations = nn_output
             else:
                 if action_reward > reward:
                     action = action_string
                     reward = action_reward
                     self.activations = nn_output
     return action
Ejemplo n.º 3
0
 def train(self, reward: str):
     """Add returned reward to statistic"""
     reward_value = -1 * Utility.get_reward_from_bitstring(reward)
     expected_reward = self.action_statistic[0]
     action = self.action_statistic[1]
     cnt = self.action_statistic[2]
     new_expected_reward = (expected_reward * cnt + reward_value) / (cnt + 1)
     heapq.heappush(self.table[self.observation], (new_expected_reward, action, cnt + 1))
Ejemplo n.º 4
0
 def train(self, reward: str):
     """Add returned reward to statistic"""
     self.cnt += 1
     self.r = self.nr
     self.nr = -1 * Utility.get_reward_from_bitstring(reward)
     action_heap = self.table[self.sl_o][self.sl_a][self.l_o]
     for idx in range(len(action_heap)):
         if action_heap[idx][1] == self.l_a + self.a:
             expected_reward = action_heap[idx][0]
             reward = self.r + self.nr
             cnt = action_heap[idx][2]
             new_expected_reward = (expected_reward * cnt + reward) / (cnt + 1)
             action_heap[idx] = (new_expected_reward, action_heap[idx][1], cnt + 1)
             if expected_reward < reward:
                 Utility.heapq_siftup(action_heap, idx)
             else:
                 Utility.heapq_siftdown(action_heap, 0, idx)
             break