def _jointStateAction(self, state, action): """ Create a joint state-action pseudo-state """ dimensions = [dimension for dimension in state.dimensions] actionDimension = copy.deepcopy( self.actionSpace.getDimensions()[0]) # there is per assert only 1 dimensions.append(actionDimension) stateAction = State(numpy.hstack((state, action)), dimensions) stateAction.scale() return stateAction
def getStateList(self): """ Returns a list of all possible states. Even if this state space has more than one dimension, it returns a one dimensional list that contains all possible states. This is achieved by creating the crossproduct of the values of all dimensions. It requires that all dimensions are discrete. """ # Check that all dimensions are discrete for dimension in self.getDimensions(): assert dimension.isDiscrete(), \ "State list are available only for discrete state spaces!" #Create cross product of all possible actions crossProduct = lambda ss, row=[], level=0: \ len(ss)>1 \ and reduce(lambda x,y:x+y,[crossProduct(ss[1:],row+[i],level+1) for i in ss[0]]) \ or [row+[i] for i in ss[0]] listOfStateDimensionValues = [ dimension.getValues() for dimension in self.getDimensions() ] # Return crossproduct of states return map(lambda value: State(value, self.getDimensions()), crossProduct(listOfStateDimensionValues))
def computeQ(self, state, action): """ Computes the Q-value of the given state-action pair The Q-Value of the query state-action is computed as weighted linear combination of the *k* nearest neighbors, where the weighting is based on the distance between the respective state and the query state. """ if not action in self.actionsKDTree \ or self.actionsKDTree[action] == None: return 0.0 k = min(self.k, self.states[action].shape[0]) indices, distances = self.actionsKDTree[action].knn(state, k) qValue = 0.0 denominator = 0.0 for index, distance in zip(indices[0], distances[0]): neighbor = State(self.states[action][index], state.dimensions) neighborsQValue = self.qValues[(neighbor, action)] weight = gaussian(distance, self.b_X) qValue += weight * neighborsQValue denominator += weight return qValue / denominator
def getNearestNeighbors(self, state, k, b): """ Determines *k* most similar states to the given *state* Determines *k* most similar states to the given *state*. Returns an iterator over (weight, neighbor), where weight is the guassian weigthed influence of the neighbor onto *state*. The weight is computed via exp(-dist/b**2)/sum_over_neighbors(exp(-dist_1/b**2)). Note that the weights sum to 1. """ if self.states is not None: k = min(k, self.states.shape[1]) if hasattr(self, "kdTree"): # if we can use approximate nearest neighbor indices, distances = self.kdTree.knn(state, k=k) # Compute weights based on distance weights = numpy.exp(-distances[0] / (b**2)) denominator = numpy.sum(weights) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0: import warnings warnings.warn( "Too large distances, returning only closest example") indices[0] = [indices[0][0]] weights[0] = 1.0 else: # Normalize weights weights = weights / denominator for index, weight in zip(indices[0], weights): yield weight, State(self.states.T[index], state.dimensions) else: assert k == 1 minDist = numpy.inf closestSample = None for index in range(self.states.shape[1]): sampleState = self.states.T[index] dist = numpy.linalg.norm(state - sampleState) if dist < minDist: minDist = dist closestSample = sampleState yield 1.0, State(closestSample, state.dimensions) else: raise ModelNotInitialized("No state samples available")
def getSuccessorDistribution(self, state): """ Return the successor distribution for the given *state*. Returns an iterator that yields pairs of states and their probabilities of being the successor of the given *state*. """ if self.states == None: raise ModelNotInitialized() k = min(self.states.shape[0], self.k) if self.rebuildSucc: self.succKDTree = ann.kdtree(self.states) self.rebuildSucc = False indices, distances = self.succKDTree.knn(state, k) denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2))) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0 or numpy.isnan(denominator): import warnings warnings.warn( "Too large distances, returning only closest example") indices[0] = [indices[0][0]] distances[0] = [0.0] denominator = numpy.exp(0.0 / (self.b_Sa**2)) for index, distance in zip(indices[0], distances[0]): neighbor = State( self.states[index], state.dimensions) # TODO: not use state.dimensions succState, reward = self.successorSamples[neighbor] delta = succState - neighbor predictedSuccState = State(state + delta, state.dimensions) if not 0 <= gaussian(distance, self.b_Sa) / denominator <= 1: import warnings import sys warnings.warn("Invalid distances in KNN Model!") print distances sys.exit(0) yield predictedSuccState, gaussian(distance, self.b_Sa) / denominator
def getTile(self, state): """ Compute the activated tile for the given state_value """ if state in self.stateToTileCache: return self.stateToTileCache[state] else: scaledState = State(state, copy.copy( state.dimensions)) # avoid side-effects scaledState.scale(0, 1) tile = tuple( numpy.round((numpy.array(scaledState) + self.offset) * self.tilesPerDimension).astype(numpy.int)) self.stateToTileCache[state] = tile self.recentStatesOrder.appendleft(state) if len(self.recentStatesOrder) > 50: oldestState = self.recentStatesOrder.pop() self.stateToTileCache.pop(oldestState) return tile
def sampleState(self): """ Return a state drawn randomly """ stateDensity = self.exampleSet.getStateDensity() if stateDensity != None: # TODO: Does it make sense to sample based on the data set? return State(stateDensity.resample(1).T[0]) else: raise ModelNotInitialized()
def parseStateDict(self, stateDict): #Check whether the given state dict is a valid one assert self._isValidState( stateDict), "State %s is invalid!" % stateDict state = State([stateDict[key] for key in sorted(stateDict.keys())], map(lambda name: self[name], sorted(stateDict.keys()))) return state
def getStates(self): """ Return all states contained in this example set """ if self.states is not None and self.states.shape[1] >= 1: return [ State(self.states[:, i], self.stateDimensions) for i in range(self.states.shape[1]) ] else: return []
def sampleSuccessorState(self, state): """ Return a state drawn from the state's successor distribution """ if self._retrainingRequired(): self._updateModel() if self.succStateModel != None: return State(state + self.succStateModel.predict(state), state.dimensions) else: raise ModelNotInitialized()
def redraw(self): # Update policy visualization for arrow in self.arrowInstances: arrow.remove() self.arrowInstances = [] # Iterate over all states and compute the value of the observed function dimensions = [ self.stateSpace[dimName] for dimName in ["column", "row"] ] states = [ State((column, row), dimensions) # for column in range(self.maze.getColumns()) for row in range(self.maze.getRows()) ] for state in states: # Evaluate function for this state actionValues = dict( (action, self.valueAccessFunction(state, (action, )) if self. valueAccessFunction is not None else 0.0) for action in ["up", "down", "left", "right"]) maxValue = max(actionValues.values()) axis = self.figPolicy.gca() for action in actionValues.keys(): if actionValues[action] == maxValue: self._plotArrow(axis, (state[0], state[1]), action) # Update Q-function visualization for state in states: for action in ["up", "down", "left", "right"]: value = self.valueAccessFunction(state, (action,)) \ if self.valueAccessFunction is not None else 0.0 if int(value) == value: valueString = "%s\n%s" % (int(value), self.samples[(state, action)]) else: valueString = "%.1f\n%s" % (value, self.samples[(state, action)]) if (state, action) not in self.textInstances.keys(): if isinstance( action, tuple ): # For TD-Agents that use crossproduct of action space axis = self.figValueFunction[action[0]].gca() else: axis = self.figValueFunction[action].gca() textInstance = \ axis.text(state[0] - 0.3, state[1], valueString, fontsize=8) self.textInstances[(state, action)] = textInstance else: self.textInstances[(state, action)].set_text(valueString) self.canvasPolicy.draw() for index, action in enumerate(self.actions): self.canvasValueFunction[action].draw()
def boundState(state): """ Return the given state with each dimension bounded to [-0.1, 1.1] """ assert type(state) == State dimensions = state.dimensions # change to numpy array since where operator does not work on States state = numpy.array(state) # Create a copy state[numpy.where(state < 0.0)] = 0.0 state[numpy.where(state > 1.0)] = 1.0 # Change back to State state = State(state, dimensions) return state
def evaluate(self, state): """ Evaluates the policy for the given state """ # If bias is desired, we simply append an additional dimension that # always takes the value 1 if self.bias: dimensions = [dimension for dimension in state.dimensions] biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]]) dimensions.append(biasDimension) input = State(numpy.hstack((state, [1])), dimensions) else: # Just create a copy of the state input = State(state, state.dimensions) # Scale state dimensions to range (-1, 1) input.scale(-1, 1) # Compute the activation (the preference of the policy) for each action # The last action has always activation 0 (remove redundant # representations for the same policy) actionActivations = [] for actionIndex in range(self.numActions - 1): activation = numpy.dot( self.weights[self.inputDims * actionIndex:self.inputDims * (actionIndex + 1)], input) actionActivations.append(activation) actionActivations.append(0.0) # Greedy action selection selectedAction = max( zip(actionActivations, range(len(actionActivations))))[1] return self.actions[selectedAction]
def evaluate(self, state): """ Evaluates the policy for the given state """ # If bias is desired, we simply append an additional dimension that # always takes the value 1 if self.bias: dimensions = [dimension for dimension in state.dimensions] biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]]) dimensions.append(biasDimension) state = State(numpy.hstack((state, [1])), dimensions) # Scale state dimensions to range (-1, 1) state.scale(-1, 1) # Compute the activation (the preference of the policy) for each action output = [] for outputDimIndex in range(self.numActions): activation = numpy.dot( self.weights[self.inputDims * outputDimIndex:self.inputDims * (outputDimIndex + 1)], state) output.append(activation) return output
def getPredecessorDistribution(self, state): """ Return a states drawn from *state*'s predecessor distribution Returns a possible predecessor state of *state* drawn from the predecessor state distribution according to its probability mass function. """ if self.succStates == None: raise ModelNotInitialized() k = min(self.states.shape[0], self.k) if self.rebuildPred: self.predKDTree = ann.kdtree(self.succStates) self.rebuildPred = False indices, distances = self.predKDTree.knn(state, k) denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2))) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0: import warnings warnings.warn("Too large distances, returing only closest example") indices[0] = [indices[0][0]] distances[0] = [0.0] denominator = numpy.exp(0.0 / (self.b_Sa**2)) for index, distance in zip(indices[0], distances[0]): neighbor = State( self.succStates[index], state.dimensions) # TODO: not use state.dimensions predState, reward = self.predecessorSamples[neighbor] delta = predState - neighbor predictedPredState = State(state + delta, state.dimensions) yield predictedPredState, gaussian(distance, self.b_Sa) / denominator
def generate2dStateSlice(varyDimensions, stateSpace, defaultDimValues, gridNodesPerDim, varyValueRanges=None): """ Generate a set of states that form a 2d slice through state space. The set of states consists of gridNodesPerDim**2 states. Each of them has the value given by defaultDimValues except for the values in the two dimensions passed by varyDimensions. In these two dimensions, the value is determined based on a 2d grid that fills [0,1]x[0,1]. """ assert len(varyDimensions) == 2, \ " We need two varyDimensions to create a 2d state space slice." assert (sorted(stateSpace.keys()) == sorted(set(defaultDimValues.keys() + varyDimensions))), \ "Cannot create a 2d state space slice since value definition is not "\ "consistent with state space definition." if varyValueRanges == None: # Set default value ranges varyValueRanges = [ stateSpace[varyDimensions[0]].getValueRanges()[0], stateSpace[varyDimensions[1]].getValueRanges()[0] ] # Sort state dimensions according to dimension name dimensions = [stateSpace[dimName] for dimName in sorted(stateSpace.keys())] defaultValue = [ defaultDimValues[dimName] for dimName in sorted(defaultDimValues.keys()) ] # The indices of the dimensions which vary varyDimensionIndex1 = sorted(stateSpace.keys()).index(varyDimensions[0]) varyDimensionIndex2 = sorted(stateSpace.keys()).index(varyDimensions[1]) # Create the 2d slice slice2d = {} for i, value1 in enumerate( numpy.linspace(varyValueRanges[0][0], varyValueRanges[0][1], gridNodesPerDim)): for j, value2 in enumerate( numpy.linspace(varyValueRanges[1][0], varyValueRanges[1][1], gridNodesPerDim)): # instantiate default value defaultValue[varyDimensionIndex1] = value1 defaultValue[varyDimensionIndex2] = value2 # Create state object slice2d[(i, j)] = State(defaultValue, dimensions) return slice2d
def getSuccessorDistribution(self, state): """ Return the successor distribution for the given state. Returns an iterator that yields pairs of grid nodes and their probabilities of being the successor of the given state. """ if self._retrainingRequired(): self._updateModel() if self.succStateModel != None: # This is a deterministic model! yield (State(state + self.succStateModel.predict(state), state.dimensions), 1.0) else: raise ModelNotInitialized()
def updateValues(self, valueAccessFunction, actions): self.axisValueFunction.clear() for action in actions: actionValues = [] for state in sorted(self.states): actionValues.append( valueAccessFunction( State([state], self.stateSpace.values()), action)) self.axisValueFunction.plot(sorted(self.states), actionValues, label=str(action)) self.axisValueFunction.set_xlabel('Sum of cards') self.axisValueFunction.set_ylabel('Value') self.axisValueFunction.legend() self.canvasValueFunction.draw()
def stateTransitionFct(self, state, action): """ Returns iterator of the successor states of *action* in *state*.""" #Applies the action and calculates the new position and velocity def minmax(item, limit1, limit2): "Bounds item to between limit1 and limit2 (or -limit1)" return max(limit1, min(limit2, item)) # Get position and velocity position = state["position"] velocity = state["velocity"] # Determine acceleration factor if action == 'left': # action is backward thrust factor = -1 elif action == 'none': # action is coast factor = 0 else: # action is forward thrust factor = 1 # Do the actual state update velocityChange = self.configDict["accelerationFactor"] * factor \ - 0.0025 * cos(3 * position) velocity = minmax(velocity + velocityChange, -self.maxVelocity, self.maxVelocity) position += velocity position = minmax(position, self.minPosition, self.maxPosition) if (position <= self.minPosition) and (velocity < 0): velocity = 0.0 if position >= self.goalPosition \ and abs(velocity) > self.configDict["maxGoalVelocity"]: velocity = -velocity yield State( [position, velocity], [self.stateSpace["position"], self.stateSpace["velocity"]]), 1.0
def getExpectedReward(self, state): """ Returns the expected reward for the given state """ if self.states == None: return 0.0 k = min(self.states.shape[0], self.k) if self.rebuildSucc: self.succKDTree = ann.kdtree(self.states) self.rebuildSucc = False indices, distances = self.succKDTree.knn(state, k) denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2))) # If the distances become too large, then all values can become zero # In this situation, we simply return the closest state and probability 1. if denominator == 0: import warnings warnings.warn( "Too large distances, returning only closest example") indices[0] = [indices[0][0]] distances[0] = [0.0] denominator = numpy.exp(0.0 / (self.b_Sa**2)) expectedReward = 0.0 for index, distance in zip(indices[0], distances[0]): neighbor = State( self.states[index], state.dimensions) # TODO: not use state.dimensions succState, reward = self.successorSamples[neighbor] weight = gaussian(distance, self.b_Sa) / denominator expectedReward += reward * weight return expectedReward
def _updateSamples(self, state, action, reward, succState, episodeTerminated): # Determine color if self.colorCriterion == "Action": value = action elif self.colorCriterion == "Reward": value = reward elif self.colorCriterion == "Q-Value": if self.evalFunction is None: return queryState = State((succState['x'], succState['xdot'], succState['y'], succState['ydot']), self.dimensions) value = self.evalFunction(queryState) self.minValue = min(value, self.minValue) self.maxValue = max(value, self.maxValue) if self.drawingEnabledCheckbox.checkState(): # Immediate drawing # Remove ball patch if it is drawn currently if self.ballPatch != None: self.ballPatch.remove() self.ballPatch = None if self.drawStyle == "Current Position": # Remove old trajectory self._removeTrajectory() self.rememberedSegments = [] # Plot ball self.ballPatch = Circle([state["x"], state["y"]], self.pinballMazeEnv.maze.ballRadius, facecolor='k') self.axis.add_patch(self.ballPatch) self.canvas.draw() elif self.drawStyle == "Online (All)": # If drawing was just reactivated self._drawRememberedSegments() # Draw current transition lines = self.axis.plot([state["x"], succState["x"]], [state["y"], succState["y"]], '-', color=self._determineColor(value)) self.linePatches.extend(lines) self.canvas.draw() else: # "Last Episode" # Remember state trajectory, it will be drawn at the end # of the episode self.rememberedSegments.append((state["x"], succState["x"], state["y"], succState["y"], value)) if episodeTerminated: # Remove last trajectory, draw this episode's trajectory self._removeTrajectory() self._drawRememberedSegments() self.canvas.draw() # When coloring trajectory based on real valued criteria, # we have to update the legend now if self.colorCriterion == "Q-Value": self.legendWidget.clear() for value in numpy.logspace(0, numpy.log10(self.maxValue - self.minValue + 1), 10): value = value - 1 + self.minValue color = self._determineColor(value) item = QtGui.QListWidgetItem(str(value), self.legendWidget) qColor = QtGui.QColor(int(color[0]*255), int(color[1]*255), int(color[2]*255)) item.setTextColor(qColor) self.legendWidget.addItem(item) else: if self.drawStyle != "Current Position": # Remember state trajectory, it will be drawn once drawing is # reenabled self.rememberedSegments.append((state["x"], succState["x"], state["y"], succState["y"], value))
def _extractState(self, stateAction): """ Extracts the state from the joint state-action pseudo-state """ dimensions = [dimension for dimension in stateAction.dimensions][:-1] state = State(stateAction[:-1], dimensions) return state
def plot(self, ax, stateSpace, plotStateDims, dimValues, plotSamples, colorFct, **kwargs): # Determine index of plot dimensions stateIndex1 = sorted(stateSpace.keys()).index(plotStateDims[0]) stateIndex2 = sorted(stateSpace.keys()).index(plotStateDims[1]) xValues = numpy.linspace(0, 1, dimValues[stateIndex1]) yValues = numpy.linspace(0, 1, dimValues[stateIndex2]) U = numpy.zeros((len(xValues), len(yValues))) V = numpy.zeros((len(xValues), len(yValues))) color = numpy.zeros((len(xValues), len(yValues))) for i in range(len(xValues)): for j in range(len(yValues)): numberOfDimensions = stateSpace.getNumberOfDimensions() node = numpy.zeros(numberOfDimensions) for k in range(numberOfDimensions): if k == stateIndex1: node[k] = xValues[i] elif k == stateIndex2: node[k] = yValues[j] else: node[k] = (dimValues[k] / 2 + 0.5) / dimValues[k] node = State(node, [ Dimension( sorted(stateSpace.keys())[dimNum], "continuous", [[0, 1]]) for dimNum in range(numberOfDimensions) ]) # Find the maximum likely successor state p = 0.0 maxSuccNode = node meanSuccNode = numpy.zeros(len(node)) for succNode, prob in self.getSuccessorDistribution(node): meanSuccNode += succNode * prob if prob > p: maxSuccNode = succNode p = prob U[i, j] = meanSuccNode[stateIndex1] - node[stateIndex1] V[i, j] = meanSuccNode[stateIndex2] - node[stateIndex2] color[i, j] = colorFct(self, node, meanSuccNode) X, Y = numpy.meshgrid(xValues, yValues) ax.contourf(Y, X, color, 15) # pylab.colorbar() # Decide whether we plot the training samples or the predictions if plotSamples: ax.scatter(self.states[:, stateIndex1], self.states[:, stateIndex2], marker='o', c='b', s=5) else: ax.quiver(Y, X, U, V) ax.plot(range(0)) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_xlabel(plotStateDims[0]) ax.set_ylabel(plotStateDims[1]) ax.set_xticklabels([]) ax.set_yticklabels([])
def _plotFunction(self): if self.evalFunction is None: return self.lock.acquire() # Clean up old plot for patch in self.plottedPatches: patch.remove() self.plottedPatches = [] self.colorMapping = dict() self.colors = cycle(["b", "g", "r", "c", "m", "y"]) cmap = pylab.get_cmap("jet") # Check if the observed function returns discrete or continuous value discreteFunction = isinstance(self.functionObservable, FunctionOverStateSpaceObservable) \ and self.functionObservable.discreteValues if not discreteFunction: # The values of the observed function over the 2d state space values = numpy.ma.array(numpy.zeros( (self.maze.getColumns(), self.maze.getRows())), mask=numpy.zeros((self.maze.getColumns(), self.maze.getRows()))) # Iterate over all states and compute the value of the observed function dimensions = [ self.stateSpace[dimName] for dimName in ["column", "row"] ] for column in range(self.maze.getColumns()): for row in range(self.maze.getRows()): # Create state object state = State((column, row), dimensions) # Evaluate function for this state if isinstance(self.functionObservable, FunctionOverStateSpaceObservable): functionValue = self.evalFunction(state) else: # StateActionValuesObservable # Determine chosen option first selectedOption = None for option in self.actions: selectedOptionName = str( self.suboptionComboBox.currentText()) if str(option) == selectedOptionName: selectedOption = option break assert selectedOption is not None functionValue = self.evalFunction(state, option) # Map function value onto color value if discreteFunction: # Deal with situations where the function is only defined over # part of the state space if functionValue == None or functionValue in [ numpy.nan, numpy.inf, -numpy.inf ]: continue # Determine color value for function value if not functionValue in self.colorMapping: # Choose value for function value that occurrs for the # first time self.colorMapping[functionValue] = self.colors.next() patch = self.maze.plotSquare( self.axis, (column, row), self.colorMapping[functionValue]) self.plottedPatches.append(patch[0]) else: # Remember values since we have to know the min and max value # before we can plot values[column, row] = functionValue if functionValue == None or functionValue in [ numpy.nan, numpy.inf, -numpy.inf ]: values.mask[column, row] = True # Do the actual plotting for functions with continuous values if not discreteFunction: minValue = values.min() maxValue = values.max() for column in range(self.maze.getColumns()): for row in range(self.maze.getRows()): if values.mask[column, row]: continue value = (values[column, row] - minValue) / (maxValue - minValue) patch = self.maze.plotSquare(self.axis, (column, row), cmap(value), zorder=0) self.plottedPatches.append(patch[0]) # Set limits self.axis.set_xlim(0, len(self.maze.structure[0]) - 1) self.axis.set_ylim(0, len(self.maze.structure) - 1) # Update legend self.legendWidget.clear() if discreteFunction: for functionValue, colorValue in self.colorMapping.items(): if isinstance(functionValue, tuple): functionValue = functionValue[0] # deal with '(action,)' rgbaColor = matplotlib.colors.ColorConverter().to_rgba( colorValue) item = QtGui.QListWidgetItem(str(functionValue), self.legendWidget) color = QtGui.QColor(int(rgbaColor[0] * 255), int(rgbaColor[1] * 255), int(rgbaColor[2] * 255)) item.setTextColor(color) self.legendWidget.addItem(item) else: for value in numpy.linspace(values.min(), values.max(), 10): rgbaColor = cmap( (value - values.min()) / (values.max() - values.min())) item = QtGui.QListWidgetItem(str(value), self.legendWidget) color = QtGui.QColor(int(rgbaColor[0] * 255), int(rgbaColor[1] * 255), int(rgbaColor[2] * 255)) item.setTextColor(color) self.legendWidget.addItem(item) self.canvas.draw() self.lock.release()
def plot(self, function, actions, fig, stateSpace, plotStateDims=None, plotActions=None, rasterPoints=100): """ Plots the q-Function for the case of a 2-dim subspace of the state space. plotStateDims :The 2 dims that should be plotted plotActions : The action that should be plotted rasterPoints : How many raster points per dimension """ # All actions that should be plotted if plotActions == None: plotActions = actions else: # Check if plot actions are valid actions for i in range(len(plotActions)): if plotActions[i] in actions: continue # ok... try: plotActions[i] = eval(plotActions[i]) except: raise Exception("Invalid plot action %s" % plotActions[i]) # Determine the indices of the dimension that should be plotted if plotStateDims == None or plotStateDims == []: if len(stateSpace.items()) != 2: warnings.warn("%s: Not two state space dimensions." "Please specify plotStateDims explicitly. " % self.__class__.__name__) return plotStateDims = [stateSpace.keys()[0], stateSpace.keys()[1]] elif len(plotStateDims) != 2: warnings.warn( "%s: StateActionValuesObservable logging only defined when " "2 plotStateDims are explicitly specified." % self.__class__.__name__) return # Prepare plotting fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.1, hspace=0.2) # Different plotting for discrete and continuous dimensions if stateSpace.hasContinuousDimensions(): # Generate 2d state slice defaultDimValues = {} for dimensionName in stateSpace.keys(): defaultDimValues[dimensionName] = 0.5 stateSlice = generate2dStateSlice(plotStateDims, stateSpace, defaultDimValues, gridNodesPerDim=rasterPoints) rows = int(math.ceil(len(plotActions) / 2.0)) data = dict() # determine absolute min and max to align subplots colormaps # initialized that will be exceeded in any case by the corresponding comparison absmin = float('inf') absmax = -float('inf') # For all actions that should be plotted for plotNum, action in enumerate(plotActions): # Compute values that should be plotted, colormapping == {} in continuous case values, colorMapping = \ generate2dPlotArray(lambda state : function(state, action), stateSlice, True, shape=(rasterPoints, rasterPoints)) # Check if there is something to plot if values.mask.all(): continue # some comparison for colormap alignment thismin = values.min() if thismin < absmin: absmin = thismin thismax = values.max() if thismax > absmax: absmax = thismax # save the data to plot later, when ranges are known data[(plotNum, action)] = values.T # plot the data for plotNum, action in data.keys(): # add subplot subplot = fig.add_subplot(rows, 2, plotNum + 1) subplot.clear() # create pseudocolorplot in current subplot polyCollection = fig.gca().pcolor( numpy.linspace(0.0, 1.0, rasterPoints), numpy.linspace(0.0, 1.0, rasterPoints), data[(plotNum, action)], vmin=absmin, vmax=absmax) # Add colorbar fig.colorbar(polyCollection) # Labeling etc. subplot.set_xlim(0, 1) subplot.set_ylim(0, 1) subplot.set_xlabel(plotStateDims[0]) subplot.set_ylabel(plotStateDims[1]) subplot.set_title(action) else: assert (len(stateSpace.items()) == 2), \ "Discrete state spaces can only be plotted if they have two dimensions." valuesX = stateSpace[plotStateDims[0]]["dimensionValues"] valuesY = stateSpace[plotStateDims[1]]["dimensionValues"] stateSlice = {} from mmlf.framework.state import State for i, valueX in enumerate(valuesX): for j, valueY in enumerate(valuesY): # Create state object stateSlice[(i, j)] = State([valueX, valueY], [ stateSpace[plotStateDims[0]], stateSpace[plotStateDims[1]] ]) rows = int(math.ceil(len(plotActions) / 2.0)) # For all actions that should be plotted for plotNum, action in enumerate(plotActions): # Clear old plot subplot = fig.add_subplot(rows, 2, plotNum + 1) subplot.clear() # Compute values that should be plotted values, colorMapping = \ generate2dPlotArray(lambda state : function(state, action), stateSlice, True, shape=(len(valuesX), len(valuesY))) # Check if there is something to plot if values.mask.all(): continue # Do the actual plotting polyCollection = \ fig.gca().pcolor(numpy.array(valuesX) - 0.5, numpy.array(valuesY) - 0.5, values.T) # Add colorbar fig.colorbar(polyCollection) # Labeling etc. subplot.set_xlim(min(valuesX), max(valuesX)) subplot.set_ylim(min(valuesY), max(valuesY)) subplot.set_xlabel(plotStateDims[0]) subplot.set_ylabel(plotStateDims[1]) subplot.set_title(action)
def plot(self, function, fig, stateSpace, actionSpace, plotStateDims=None, rasterPoints=100): """ Creates a graphical representation of a FunctionOverStateSpace. Creates a plot of *policy* in the 2D subspace of the state space spanned by *stateIndex1* and *stateIndex2*. """ # Determine the indices of the dimension that should be plotted if plotStateDims == None or plotStateDims == []: if len(stateSpace.items()) != 2: warnings.warn("%s: Not two state space dimensions. " "Please specify plotStateDims explicitly. " % self.__class__.__name__) return plotStateDims = [stateSpace.keys()[0], stateSpace.keys()[1]] elif len(plotStateDims) != 2: warnings.warn( "%s: FunctionOverStateSpace logging only defined when " "2 plotStateDims are explicitly specified." % self.__class__.__name__) return # Prepare plotting fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.1, hspace=0.1) # Different plotting for discrete and continuous dimensions if stateSpace.hasContinuousDimensions(): # Generate 2d state slice defaultDimValues = {} for dimensionName in stateSpace.keys(): defaultDimValues[dimensionName] = 0.5 stateSlice = generate2dStateSlice(plotStateDims, stateSpace, defaultDimValues, gridNodesPerDim=rasterPoints) # Compute values that should be plotted values, colorMapping = \ generate2dPlotArray(function, stateSlice, not self.discreteValues, shape=(rasterPoints, rasterPoints)) # Check if there is something to plot if values.mask.all(): return # Do the actual plotting polyCollection = \ fig.gca().pcolor(numpy.linspace(0.0, 1.0, rasterPoints), numpy.linspace(0.0, 1.0, rasterPoints), values.T) # Polishing of figure fig.gca().set_xlim(0.0, 1.0) fig.gca().set_ylim(0.0, 1.0) else: assert (len(stateSpace.items()) == 2), \ "Discrete state spaces can only be plotted if they have two dimensions." valuesX = stateSpace[plotStateDims[0]]["dimensionValues"] valuesY = stateSpace[plotStateDims[1]]["dimensionValues"] stateSlice = {} from mmlf.framework.state import State for i, valueX in enumerate(valuesX): for j, valueY in enumerate(valuesY): # Create state object stateSlice[(i, j)] = State([valueX, valueY], [ stateSpace[plotStateDims[0]], stateSpace[plotStateDims[1]] ]) # Compute values that should be plotted values, colorMapping = \ generate2dPlotArray(function, stateSlice, not self.discreteValues, shape=(len(valuesX), len(valuesY))) polyCollection = \ fig.gca().pcolor(numpy.array(valuesX) - 0.5, numpy.array(valuesY) - 0.5, values.T) # Polishing of figure fig.gca().set_xlim(min(valuesX), max(valuesX)) fig.gca().set_ylim(min(valuesY), max(valuesY)) fig.gca().set_xlabel(plotStateDims[0]) fig.gca().set_ylabel(plotStateDims[1]) # Create legend respective colorbar if not self.discreteValues: fig.colorbar(polyCollection) else: # Some dummy code that creates patches that are not shown but allow # for a colorbar from matplotlib.patches import Rectangle linearSegmentedColorbar = polyCollection.get_cmap() patches = [] functionValues = [] for functionValue, colorValue in colorMapping.items(): if isinstance(functionValue, tuple): functionValue = functionValue[0] # deal with '(action,)' normValue = polyCollection.norm(colorValue) if isinstance(normValue, numpy.ndarray): normValue = normValue[ 0] # happens when function is constant rgbaColor = linearSegmentedColorbar(normValue) p = Rectangle((0, 0), 1, 1, fc=rgbaColor) functionValues.append(functionValue) patches.append(p) fig.gca().legend(patches, functionValues)