Beispiel #1
0
    def update(self, action, measurements, reward):
        """Updates model after the given action and ending up in the state corresponding to the given measurements.

        Args:
            action (tuple): The action taken
            measurements (dict): The measurements collected after the action was taken
            reward (double): The reward acquired after this action was taken

        """
        if self._current_state is None:
            raise StateNotSetError()

        self._current_state.visit()
        q_state = self._current_state.get_q_state(action)
        if q_state is None:
            return

        new_state = self._get_state(measurements)
        q_state.update(new_state, reward)

        if self._update_algorithm == SINGLE_UPDATE:
            self._q_update(q_state)
            self._current_state.update_value()
        elif self._update_algorithm == VALUE_ITERATION:
            self.value_iteration()
        elif self._update_algorithm == PRIORITIZED_SWEEPING:
            self.prioritized_sweeping()

        self._current_state = new_state
    def suggest_action(self):
        """Suggest the optimal action to take from the current state.

        Returns:
            action (tuple): The optimal action from the current state

        """
        if self._current_state is None:
            raise StateNotSetError(logger)

        return self._current_state.get_optimal_action()
Beispiel #3
0
    def get_legal_actions(self):
        """Returns all the legal actions from the current_state.

        Returns:
            legal_actions (list(tuple)): A list of all the legal actions from the current state

        """
        if self._current_state is None:
            raise StateNotSetError()

        return self._current_state.get_legal_actions()
Beispiel #4
0
    def suggest_action(self):
        """Suggest the next action based on the greedy criterion.

        Returns:
            optimal_action (tuple(str, int)): The suggested optimal action

        """
        if self._current_state is None:
            raise StateNotSetError()

        return self._current_state.get_optimal_action()
    def prioritized_sweeping(self,
                             initial_state=None,
                             error=None,
                             max_updates=None,
                             debug=False):
        """Runs prioritized sweeping starting from the given state."""
        if self._current_state is None and initial_state is None:
            raise StateNotSetError(logger)

        if initial_state is None:
            initial_state = self._current_state
        if error is None:
            error = self._update_error
        if max_updates is None:
            max_updates = self._max_updates

        # transition probabilities have changed for the initial state
        reverse_transitions = [{} for _ in self._states]
        for state in self._states:
            for state_num, t in state.get_max_transitions().items():
                reverse_transitions[state_num][state.state_num] = t

        state = initial_state
        for i in range(max_updates):

            # update the state value
            old_value = state.get_value()
            self._v_update(state)
            new_value = state.get_value()
            delta = abs(new_value - old_value)

            # update the priorities of the predecessors
            rev_transitions = reverse_transitions[state.state_num]
            for state_num, t in rev_transitions.items():
                self._priorities[state_num] = max(t * delta,
                                                  self._priorities[state_num])

            # zero the updated state's priority
            self._priorities[state.state_num] = 0

            # Choose the next max priority state
            # TODO with Priority Queue - but needs to support item removal
            max_index, max_priority = 0, 0
            for j in range(len(self._priorities)):
                if self._priorities[j] > max_priority:
                    max_priority = self._priorities[j]
                    max_index = j

            # stop if the priority gets below the supplied limit
            if max_priority <= error:
                break

            state = self._states[max_index]
    def update(self, action, measurements, reward):
        """Updates model after taking given action and ending up in the state corresponding to the measurements.

        Args:
            action (tuple): The recent taken action
            measurements (dict): The measurements collected after the action
            reward (double): The reward acquired through the specific action

        """
        if self._current_measurements is None:
            raise StateNotSetError(logger)

        # TODO move this where the splitting is decided
        self._current_state = self._root.get_state(self._current_measurements)

        # determine the new state
        new_state = self._root.get_state(measurements)
        new_num = new_state.state_num

        # store the transition information
        trans_data = (self._current_measurements, measurements, action, reward)
        self._current_state.store_transition(trans_data, new_num)

        # update the qstate
        q_state = self._current_state.get_q_state(action)
        q_state.update(new_state, reward)

        # update the model values according to the chosen algorithm
        if self._update_algorithm == SINGLE_UPDATE:
            self._q_update(q_state)
            self._current_state.update_value()
        elif self._update_algorithm == VALUE_ITERATION:
            self.value_iteration()
        elif self._update_algorithm == PRIORITIZED_SWEEPING:
            self.prioritized_sweeping()

        # consider splitting the initial_state
        if self._allow_splitting:
            self.split()

        # update the current state and store the last measurements
        self._current_state = new_state
        self._current_measurements = measurements
    def get_legal_actions(self):
        """Returns all the legal actions from the current_state."""
        if self._current_state is None:
            raise StateNotSetError(logger)

        return self._current_state.get_legal_actions()
Beispiel #8
0
    def prioritized_sweeping(self,
                             initial_state=None,
                             error=None,
                             max_updates=None):
        """Runs prioritized sweeping starting from the given state.

        Args:
            initial_state (State): The initial state in the prioritized sweeping process
            error (double): The updating error
            max_updates (int): The max number of updates

        """
        if self._current_state is None and initial_state is None:
            raise StateNotSetError()

        if initial_state is None:
            initial_state = self._current_state
        if error is None:
            error = self._update_error
        if max_updates is None:
            max_updates = self._max_updates

        # transition probabilities have changed for the initial state
        max_transitions = initial_state.get_max_transitions()
        initial_s_num = initial_state.state_num
        for state_num, t in max_transitions.items():
            self._reverse_transitions[state_num][initial_s_num] = t

        state, num_updates = initial_state, 0
        for i in range(max_updates):

            num_updates += 1

            # Update the state value
            old_value = state.value
            self._v_update(state)
            new_value = state.value
            delta = abs(new_value - old_value)

            # Update the priorities of the predecessors
            rev_transitions = self._reverse_transitions[state.state_num]
            for state_num, t in rev_transitions.items():
                self._priorities[state_num] = max(t * delta,
                                                  self._priorities[state_num])

            # zero the updated state's priority
            self._priorities[state.state_num] = 0

            # choose the next max priority state
            # TODO with Priority Queue - but needs to support item removal
            max_index, max_priority = 0, 0
            for j in range(len(self._priorities)):
                if self._priorities[j] > max_priority:
                    max_priority = self._priorities[j]
                    max_index = j

            # stop if the priority gets below the supplied limit
            if max_priority <= error:
                break

            state = self._states[max_index]