Esempio n. 1
0
    def evaluate(self, state):
        """ Evaluates the policy for the given state """
        # If bias is desired, we simply append an additional dimension that
        # always takes the value 1
        if self.bias:
            dimensions = [dimension for dimension in state.dimensions]
            biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]])
            dimensions.append(biasDimension)
            input = State(numpy.hstack((state, [1])), dimensions)
        else:  # Just create a copy of the state
            input = State(state, state.dimensions)

        # Scale state dimensions to range  (-1, 1)
        input.scale(-1, 1)

        # Compute the activation (the preference of the policy) for each action
        # The last action has always activation 0 (remove redundant
        # representations for the same policy)
        actionActivations = []
        for actionIndex in range(self.numActions - 1):
            activation = numpy.dot(
                self.weights[self.inputDims * actionIndex:self.inputDims *
                             (actionIndex + 1)], input)
            actionActivations.append(activation)
        actionActivations.append(0.0)

        # Greedy action selection
        selectedAction = max(
            zip(actionActivations, range(len(actionActivations))))[1]

        return self.actions[selectedAction]
Esempio n. 2
0
 def _jointStateAction(self, state, action):
     """ Create a joint state-action pseudo-state """
     dimensions = [dimension for dimension in state.dimensions]
     actionDimension = copy.deepcopy(
         self.actionSpace.getDimensions()[0])  # there is per assert only 1
     dimensions.append(actionDimension)
     stateAction = State(numpy.hstack((state, action)), dimensions)
     stateAction.scale()
     return stateAction
Esempio n. 3
0
 def getTile(self, state):
     """ Compute the activated tile for the given state_value """
     if state in self.stateToTileCache:
         return self.stateToTileCache[state]
     else:
         scaledState = State(state, copy.copy(
             state.dimensions))  # avoid side-effects
         scaledState.scale(0, 1)
         tile = tuple(
             numpy.round((numpy.array(scaledState) + self.offset) *
                         self.tilesPerDimension).astype(numpy.int))
         self.stateToTileCache[state] = tile
         self.recentStatesOrder.appendleft(state)
         if len(self.recentStatesOrder) > 50:
             oldestState = self.recentStatesOrder.pop()
             self.stateToTileCache.pop(oldestState)
         return tile
Esempio n. 4
0
    def evaluate(self, state):
        """ Evaluates the policy for the given state """
        # If bias is desired, we simply append an additional dimension that
        # always takes the value 1
        if self.bias:
            dimensions = [dimension for dimension in state.dimensions]
            biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]])
            dimensions.append(biasDimension)
            state = State(numpy.hstack((state, [1])), dimensions)

        # Scale state dimensions to range  (-1, 1)
        state.scale(-1, 1)

        # Compute the activation (the preference of the policy) for each action
        output = []
        for outputDimIndex in range(self.numActions):
            activation = numpy.dot(
                self.weights[self.inputDims * outputDimIndex:self.inputDims *
                             (outputDimIndex + 1)], state)
            output.append(activation)

        return output