def evaluate(self, state): """ Evaluates the policy for the given state """ # If bias is desired, we simply append an additional dimension that # always takes the value 1 if self.bias: dimensions = [dimension for dimension in state.dimensions] biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]]) dimensions.append(biasDimension) input = State(numpy.hstack((state, [1])), dimensions) else: # Just create a copy of the state input = State(state, state.dimensions) # Scale state dimensions to range (-1, 1) input.scale(-1, 1) # Compute the activation (the preference of the policy) for each action # The last action has always activation 0 (remove redundant # representations for the same policy) actionActivations = [] for actionIndex in range(self.numActions - 1): activation = numpy.dot( self.weights[self.inputDims * actionIndex:self.inputDims * (actionIndex + 1)], input) actionActivations.append(activation) actionActivations.append(0.0) # Greedy action selection selectedAction = max( zip(actionActivations, range(len(actionActivations))))[1] return self.actions[selectedAction]
def _jointStateAction(self, state, action): """ Create a joint state-action pseudo-state """ dimensions = [dimension for dimension in state.dimensions] actionDimension = copy.deepcopy( self.actionSpace.getDimensions()[0]) # there is per assert only 1 dimensions.append(actionDimension) stateAction = State(numpy.hstack((state, action)), dimensions) stateAction.scale() return stateAction
def getTile(self, state): """ Compute the activated tile for the given state_value """ if state in self.stateToTileCache: return self.stateToTileCache[state] else: scaledState = State(state, copy.copy( state.dimensions)) # avoid side-effects scaledState.scale(0, 1) tile = tuple( numpy.round((numpy.array(scaledState) + self.offset) * self.tilesPerDimension).astype(numpy.int)) self.stateToTileCache[state] = tile self.recentStatesOrder.appendleft(state) if len(self.recentStatesOrder) > 50: oldestState = self.recentStatesOrder.pop() self.stateToTileCache.pop(oldestState) return tile
def evaluate(self, state): """ Evaluates the policy for the given state """ # If bias is desired, we simply append an additional dimension that # always takes the value 1 if self.bias: dimensions = [dimension for dimension in state.dimensions] biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]]) dimensions.append(biasDimension) state = State(numpy.hstack((state, [1])), dimensions) # Scale state dimensions to range (-1, 1) state.scale(-1, 1) # Compute the activation (the preference of the policy) for each action output = [] for outputDimIndex in range(self.numActions): activation = numpy.dot( self.weights[self.inputDims * outputDimIndex:self.inputDims * (outputDimIndex + 1)], state) output.append(activation) return output