def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="PinballMaze", discreteActionSpace=True, episodic=True, continuousStateSpace=True, continuousActionSpace=False, stochastic=False) super(PinballMazeEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) mazeString = open( os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir + os.sep + self.configDict['MAZE'], 'r').read() #The maze object is created from the description self.maze = PinballMaze.createMazeFromString(mazeString) #The state space of the Maze2d Simulation oldStyleStateSpace = { "x": ("continuous", [(0.0, 1.0)]), "y": ("continuous", [(0.0, 1.0)]), "xdot": ("continuous", [(-1.0, 1.0)]), "ydot": ("continuous", [(-1.0, 1.0)]), } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") #The action space of the Maze2d Simulation oldStyleActionSpace = { "action": ("discrete", ["xinc", "xdec", "yinc", "ydec", "none"]) } self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft") #The current state is initially set to the initial state self.currentState = self.getInitialState() if useGUI: # Add viewer specific for the pinball world from mmlf.gui.viewers import VIEWERS from mmlf.worlds.pinball_maze.environments.pinball_maze_trajectory_viewer \ import PinballMazeTrajectoryViewer from mmlf.worlds.pinball_maze.environments.pinball_maze_function_viewer \ import PinballMazeFunctionViewer VIEWERS.addViewer( lambda: PinballMazeTrajectoryViewer(self, self.stateSpace), 'PinballMaze TrajectoryViewer') VIEWERS.addViewer( lambda: PinballMazeFunctionViewer(self, self.stateSpace), 'PinballMaze FunctionViewer')
def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="Single Pole Balancing", discreteActionSpace=False, episodic=True, continuousStateSpace=True, continuousActionSpace=True, stochastic=False) super(SinglePoleBalancingEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) #The state space of the Single Pole Balancing Simulation oldStyleStateSpace = {"cartPosition": ("continuous", [(-3.125, 3.125)]), "cartVelocity": ("continuous", [(-0.5, 0.5)]), "poleAngularPosition": ("continuous", [(-1.13, 1.13)]), "poleAngularVelocity": ("continuous", [(-0.80, 0.80)]), } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") #The action space of the Single Pole Balancing Simulation oldStyleActionSpace = {"force": ("continuous", [(-2, 2)])} self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft") #The current state of the simulation #Note that the values of this dict can be accesed directly as #attributes of the class (see the __getattr__ and _setattr__ method) self.initialState = { "cartPosition": 0.0, "poleAngularPosition": 0.1, "cartVelocity": 0.0, "poleAngularVelocity": 0.0, } #The current state is initially set to the initial state self.currentState = deepcopy(self.initialState) if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer from mmlf.worlds.single_pole_balancing.environments.spb_trajectory_viewer import SPBTrajectoryViewer # Add general trajectory viewer VIEWERS.addViewer(lambda : TrajectoryViewer(self.stateSpace), 'TrajectoryViewer') VIEWERS.addViewer(lambda : SPBTrajectoryViewer(), 'SPB Cart Viewer')
def stop(self): """ Halt the execution of the current world. """ if self.iServ is not None: # stop the IServer Loop self.iServ.stop() # Stop the environment self.environment.stop() # Remove world-specific viewers if mmlf.QtGui: from mmlf.gui.viewers import VIEWERS for viewerName, viewer in VIEWERS.allViewers.items(): if viewerName == "FloatStreamViewer": continue VIEWERS.removeViewer(viewer, viewerName)
def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="17 and 4", discreteActionSpace=True, episodic=True, continuousStateSpace=False, continuousActionSpace=False, stochastic=True) super(SeventeenAndFourEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) # State and action space definition oldStyleStateSpace = {"count": ("discrete", range(23))} self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") oldStyleActionSpace = {"action": ("discrete", ["continue", "stop"])} self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="hard") # The available cards self.cards = [ 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11 ] # Initialize first game self.getInitialState() # Some observables self.pointsObservable = \ FloatStreamObservable(title='%s Points' % self.__class__.__name__, time_dimension_name='Episode', value_name='Points') if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.worlds.seventeen_and_four.environments.seventeen_and_four_viewers\ import SeventeenAndFourValuefunctionViewer # Add a Q-value viewer for this world VIEWERS.addViewer( lambda: SeventeenAndFourValuefunctionViewer(self.stateSpace), 'SeventeenAndFourValuefunctionViewer')
def addRemoveObservable(observable, action): # Check if new observable was added and if the observable is a # ModelObservable if action == 'added' and isinstance(observable, ModelObservable): from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.model_viewer import ModelViewer if spec is not None and spec["active"]: # Register function that writes all updates into log file topDirectory = observable.title.split(" ")[0] logDirectory = "".join( observable.title.split(" ")[1:]).strip("()") if not self.plotThisObservableFct(logDirectory): # Plotting this observable was not specified return self.userDirObj.createPath([topDirectory], refName=topDirectory, baseRef='currentlogdir', force=True) self.userDirObj.createPath([logDirectory], refName=logDirectory, baseRef=topDirectory, force=True) def plotGraphicToFile(model): if (self.episodeCounter + 1) % spec['logFrequency'] != 0: return # Not an episode in which we plot logFile = \ self.userDirObj.getAbsolutePath(logDirectory, 'episode_%05d.pdf' % self.episodeCounter) if os.path.exists(logFile): return # we do not plot several times per episode fig = pylab.figure(0, figsize=(22, 11)) fig.clear() # Let observable plot itself observable.plot( model, fig, stateSpace=self.environment.stateSpace, colouring=spec['colouring'], plotSamples=spec['plotSamples'], minExplorationValue=spec['minExplorationValue'], plotStateDims=spec['stateDims'], dimValues=spec['rasterPoints']) # Draw structure over state space for axis in fig.axes: if type(axis) == __import__( "matplotlib").axes.Axes: # Plot only into matplotlib.axes.AxesSubplot continue self.environment.plotStateSpaceStructure(axis) pylab.savefig(logFile) observable.addObserver(plotGraphicToFile) def createModelViewer(): modelViewer = ModelViewer(observable, self.environment.stateSpace) observable.addObserver(lambda *_x: modelViewer.update(*_x)) return modelViewer VIEWERS.addViewer(createModelViewer, 'ModelViewer')
def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = \ EnvironmentInfo(versionNumber="0.3", environmentName="Double Pole Balancing", discreteActionSpace=False, episodic=True, continuousStateSpace=True, continuousActionSpace=True, stochastic=False) super(DoublePoleBalancingEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) # Convert from degrees to radians self.configDict["INITIALPOLEANGULARPOSITION1"] *= pi / 180.0 self.configDict['MAXPOLEANGULARPOSITION1'] *= pi / 180.0 self.configDict['MAXPOLEANGULARPOSITION2'] *= pi / 180.0 # The object which computes the dpb dynamics self.dpbDynamics = DoublePoleBalancingDynamics(self.configDict) #The state space of the Double Pole Balancing Simulation oldStyleStateSpace = { "cartPosition": ("continuous", [(-1.0, 1.0)]), "cartVelocity": ("continuous", [(-0.1, 0.1)]), "poleAngularPosition1": ("continuous", [(-1.0, 1.0)]), "poleAngularVelocity1": ("continuous", [(-0.5, 0.5)]), "poleAngularPosition2": ("continuous", [(-1.0, 1.0)]), "poleAngularVelocity2": ("continuous", [(-0.5, 0.5)]) } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") #The action space of the Double Pole Balancing Simulation oldStyleActionSpace = {"force": ("continuous", [(-10, 10)])} self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft") # The name of the state dimensions that are send to the agent. # NOTE: The ordering of the state dimensions is important! self.stateNameList = [ "cartPosition", "cartVelocity", "poleAngularPosition1", "poleAngularVelocity1", "poleAngularPosition2", "poleAngularVelocity2" ] # The vector used for normalization of the state for the agent self.normalizationVector = array([ 1.0 / self.configDict['MAXCARTPOSITION'], 0.1, 1.0 / self.configDict['MAXPOLEANGULARPOSITION1'], 0.2, 1.0 / self.configDict['MAXPOLEANGULARPOSITION2'], 0.1 ]) #The current state of the simulation self.initialState = array([ 0.0, 0.0, self.configDict["INITIALPOLEANGULARPOSITION1"], 0.0, 0.0, 0.0 ]) #The current state is initially set to the initial state self.currentState = array(self.initialState) if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer # Add general trajectory viewer VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace), 'TrajectoryViewer')
def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="Maze Cliff", discreteActionSpace=True, episodic=True, continuousStateSpace=False, continuousActionSpace=False, stochastic=False) super(MazeCliffEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) # A string which describes the structure of the maze # A * indicates a wall, an S the start position of the agent # and a G the goal. A blank indicates a free cell. mazeDescriptionString = """************** * * * * * * *S G* ************** """ #The maze object is created from the description self.maze = Maze.createMazeFromString(mazeDescriptionString, cliffPenalty=self.configDict["cliffPenalty"], stochasticity=self.configDict["stochasticity"]) #The state space of the Maze2d Simulation oldStyleStateSpace = { "column": ("discrete", range(self.maze.getColumns())), "row": ("discrete", range(self.maze.getRows())), } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") #The action space of the Single Pole Balancing Simulation oldStyleActionSpace = { "action": ("discrete", ["up", "down", "left", "right"]) } self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft") # dictionary which contains all configuration options specific to this environment # it is VERY important to put ALL configuration options which uniquely determine # the behavior of the environment in this dictionary. self.configDict = {} #The current state of the simulation self.initialState = { "row": self.maze.getStartPosition()[0], "column": self.maze.getStartPosition()[1], } #The current state is initially set to the initial state self.currentState = deepcopy(self.initialState) #A counter which stores the number of steps which have been perfomed in this episode self.stepCounter = 0 self.episodeCounter = 0 #The accumulated reward self.reward = 0.0 if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer # Create customized trajectory viewer class MazeCliffTrajectoryViewer(TrajectoryViewer): def __init__(self, stateSpace, plotStateSpaceStructure): super(MazeCliffTrajectoryViewer, self).__init__(stateSpace) plotStateSpaceStructure(self.axisTrajectory) VIEWERS.addViewer(lambda : \ MazeCliffTrajectoryViewer(self.stateSpace, lambda ax : self.plotStateSpaceStructure(ax)), 'MazeCliffTrajectoryViewer') # Add viewers for the maze world VIEWERS.addViewer(lambda : Maze2DDetailedViewer(self.maze, self.stateSpace, ["left", "right", "up", "down"]), 'MazeCliffDetailedViewer') VIEWERS.addViewer(lambda : Maze2DFunctionViewer(self.maze, self.stateSpace), 'MazeCliffFunctionViewer')
#Redraw self.canvas.draw() def _changeMWA(self): self.mwaSize = 2**self.mwaSlider.value() self.mwaLabel.setText("Moving Window Average: %s" % self.mwaSize) # Replot self._plot() def _linePlotChanged(self, linePlot): self.linePlot = self.linePlotTypes[linePlot] # Replot self._plot() def _save(self): rootDirectory = \ self.tableModel.rootDirectory if hasattr(self.tableModel, "rootDirectory") \ else mmlf.getRWPath() graphicFileName = \ str(QtGui.QFileDialog.getSaveFileName(self, "Select a file for the stored graphic", rootDirectory, "Plots (*.pdf)")) self.fig.savefig(str(graphicFileName), dpi=400) VIEWERS.addViewer(lambda : ExperimentViewer(), 'ExperimentViewer')
def __init__(self, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="Maze2D", discreteActionSpace=True, episodic=True, continuousStateSpace=False, continuousActionSpace=False, stochastic=False) super(Maze2dEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs) # Reading string which describes the structure of the maze mazeDescriptionString = open(mmlf.getRWPath() + os.sep + "config" + os.sep + "maze2d" + os.sep + self.configDict['MAZE']).read() # Remove comment lines and superfluous whitespace lines = map(lambda line: line.strip(), mazeDescriptionString.split("\n")) lines = filter(lambda line: not line.startswith("#"), lines) mazeDescriptionString = "\n".join(lines) #The maze object is created from the description self.maze = Maze.createMazeFromString(mazeDescriptionString) #The state space of the Maze2d Simulation oldStyleStateSpace = { "column": ("discrete", range(self.maze.getColumns())), "row": ("discrete", range(self.maze.getRows())) } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") #The action space of the Maze2d Simulation oldStyleActionSpace = { "action": ("discrete", ["left", "right", "up", "down"]) } self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft") #The current state of the simulation self.initialState = { "row": self.maze.getStartPosition()[0], "column": self.maze.getStartPosition()[1] } #The current state is initially set to the initial state self.currentState = deepcopy(self.initialState) if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer # Create customized trajectory viewer class Maze2dTrajectoryViewer(TrajectoryViewer): def __init__(self, stateSpace, plotStateSpaceStructure): super(Maze2dTrajectoryViewer, self).__init__(stateSpace) plotStateSpaceStructure(self.axisTrajectory) VIEWERS.addViewer(lambda : \ Maze2dTrajectoryViewer(self.stateSpace, lambda ax : self.plotStateSpaceStructure(ax)), 'Maze2dTrajectoryViewer') # Add viewers for the maze world VIEWERS.addViewer( lambda: Maze2DDetailedViewer(self.maze, self.stateSpace, ["left", "right", "up", "down"]), 'Maze2DDetailedViewer') VIEWERS.addViewer( lambda: Maze2DFunctionViewer(self.maze, self.stateSpace), 'Maze2DFunctionViewer')
if self.observable is not None: # Remove old observable self.observable.removeObserver(self.observableCallback) # Get new observable and add as listener self.observable = self.floatStreamObservables[comboBoxIndex] self.observable.addObserver(self.observableCallback) # Remove old values self.values = deque() self.times = deque() def _changeWindowSize(self): self.windowSize = 2**self.windowSizeSlider.value() while len(self.values) > self.windowSize: self.values.popleft() self.times.popleft() self._redraw() self.windowSizeLabel.setText("WindowSize: %s" % self.windowSize) def _changeMWA(self): self.mwaSize = self.mwaSlider.value() self._redraw() self.mwaLabel.setText("Moving Window Average : %s" % self.mwaSize) VIEWERS.addViewer(lambda: FloatStreamViewer(), 'FloatStreamViewer')
def __init__(self, config, useGUI, *args, **kwargs): self.environmentInfo = EnvironmentInfo(versionNumber="0.3", environmentName="Mountain Car", discreteActionSpace=True, episodic=True, continuousStateSpace=True, continuousActionSpace=False, stochastic=True) # Add value for N to config dict (required for discretization # in optimal policy computation) if "N" not in config["configDict"]: config["configDict"]["N"] = "50" super(MountainCarEnvironment, self).__init__(config, useGUI=useGUI, *args, **kwargs) # configuration self.randomStarts = True # Some constants self.minPosition = -1.2 # Minimum car position self.maxPosition = 0.6 # Maximum car position (past goal) self.maxVelocity = 0.07 # Maximum velocity of car self.goalPosition = 0.5 # Goal position - how to tell we are done # If "maxGoalVelocity" is not set in configDict, set it to maximal # velocity if not "maxGoalVelocity" in self.configDict: self.configDict["maxGoalVelocity"] = self.maxVelocity # The current state of the system self.state = None # Some counters self.overallStepCounter = 0 # State and action space definition oldStyleStateSpace = { "position": ("continuous", [(self.minPosition, self.maxPosition)]), "velocity": ("continuous", [(-self.maxVelocity, self.maxVelocity)]) } self.stateSpace = StateSpace() self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft") self.actions = ["left", "right", "none"] oldStyleActionSpace = {"thrust": ("discrete", self.actions)} self.actionSpace = ActionSpace() self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="hard") if useGUI: from mmlf.gui.viewers import VIEWERS from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer from mmlf.worlds.mountain_car.environments.mcar_policy_viewer \ import MountainCarPolicyViewer from mmlf.worlds.mountain_car.environments.mcar_valuefunction_viewer \ import MountainCarValueFunctionViewer # Add general trajectory viewer VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace), 'TrajectoryViewer') VIEWERS.addViewer(lambda: MountainCarPolicyViewer(self.stateSpace), 'MountainCar PolicyViewer') VIEWERS.addViewer( lambda: MountainCarValueFunctionViewer(self.stateSpace), 'MountainCar ValueFunctionViewer')