예제 #1
0
    def get_best_action(self, coordinates, shape):
        highest_value = -10000000000
        selected_actions = []
        allowed_actions = Action.get_allowed_action(coordinates, shape)
        for action in allowed_actions:
            value = self._q_table[action]
            # if this was the highest value yet, then clear all the items in selected action
            if value > highest_value:
                selected_actions.clear()
                highest_value = value
                selected_actions.append(action)

            # if the value was equal to previous selected values, then append the action to list
            elif value == highest_value:
                selected_actions.append(action)

        # choose from the action list randomly as this list contains values that are all equal
        return random.choice(selected_actions)
예제 #2
0
 def get_random_action(self, coordinates, shape):
     allowed_actions = Action.get_allowed_action(coordinates, shape)
     return random.choice(allowed_actions)