def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="PinballMaze",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(PinballMazeEnvironment, self).__init__(useGUI=useGUI,
                                                     *args,
                                                     **kwargs)

        mazeString = open(
            os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir +
            os.sep + self.configDict['MAZE'], 'r').read()

        #The maze object is created from the description
        self.maze = PinballMaze.createMazeFromString(mazeString)

        #The state space of the Maze2d Simulation
        oldStyleStateSpace = {
            "x": ("continuous", [(0.0, 1.0)]),
            "y": ("continuous", [(0.0, 1.0)]),
            "xdot": ("continuous", [(-1.0, 1.0)]),
            "ydot": ("continuous", [(-1.0, 1.0)]),
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Maze2d Simulation
        oldStyleActionSpace = {
            "action": ("discrete", ["xinc", "xdec", "yinc", "ydec", "none"])
        }

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        #The current state is initially set to the initial state
        self.currentState = self.getInitialState()

        if useGUI:
            # Add viewer specific for the pinball world
            from mmlf.gui.viewers import VIEWERS
            from mmlf.worlds.pinball_maze.environments.pinball_maze_trajectory_viewer \
                        import PinballMazeTrajectoryViewer
            from mmlf.worlds.pinball_maze.environments.pinball_maze_function_viewer \
                        import PinballMazeFunctionViewer

            VIEWERS.addViewer(
                lambda: PinballMazeTrajectoryViewer(self, self.stateSpace),
                'PinballMaze TrajectoryViewer')
            VIEWERS.addViewer(
                lambda: PinballMazeFunctionViewer(self, self.stateSpace),
                'PinballMaze FunctionViewer')
Exemple #2
0
    def __init__(self, useGUI, *args, **kwargs):
        
        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Single Pole Balancing",
                                               discreteActionSpace=False,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=True,
                                               stochastic=False)

        super(SinglePoleBalancingEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)
        
        #The state space of the Single Pole Balancing Simulation
        oldStyleStateSpace = {"cartPosition": ("continuous", [(-3.125, 3.125)]),
                              "cartVelocity": ("continuous", [(-0.5, 0.5)]),
                              "poleAngularPosition": ("continuous", [(-1.13, 1.13)]),
                              "poleAngularVelocity": ("continuous", [(-0.80, 0.80)]),
                              }
        
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")
        
        #The action space of the Single Pole Balancing Simulation
        oldStyleActionSpace =  {"force": ("continuous", [(-2, 2)])}
        
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft")

        #The current state of the simulation
        #Note that the values of this dict can be accesed directly as 
        #attributes of the class (see the __getattr__ and _setattr__ method)
        self.initialState =  { 
                     "cartPosition": 0.0,
                     "poleAngularPosition": 0.1,
                     "cartVelocity": 0.0,
                     "poleAngularVelocity": 0.0,
                  }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)
        
        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.single_pole_balancing.environments.spb_trajectory_viewer import SPBTrajectoryViewer
            
            # Add general trajectory viewer
            VIEWERS.addViewer(lambda : TrajectoryViewer(self.stateSpace), 
                              'TrajectoryViewer')
            
            VIEWERS.addViewer(lambda : SPBTrajectoryViewer(),
                              'SPB Cart Viewer')
Exemple #3
0
    def stop(self):
        """ Halt the execution of the current world. """
        if self.iServ is not None:
            # stop the IServer Loop
            self.iServ.stop()

        # Stop the environment
        self.environment.stop()

        # Remove world-specific viewers
        if mmlf.QtGui:
            from mmlf.gui.viewers import VIEWERS
            for viewerName, viewer in VIEWERS.allViewers.items():
                if viewerName == "FloatStreamViewer": continue
                VIEWERS.removeViewer(viewer, viewerName)
Exemple #4
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="17 and 4",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=True)

        super(SeventeenAndFourEnvironment, self).__init__(useGUI=useGUI,
                                                          *args,
                                                          **kwargs)

        # State and action space definition
        oldStyleStateSpace = {"count": ("discrete", range(23))}

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        oldStyleActionSpace = {"action": ("discrete", ["continue", "stop"])}

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="hard")

        # The available cards
        self.cards = [
            2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9,
            9, 9, 10, 10, 10, 10, 11, 11, 11, 11
        ]
        # Initialize first game
        self.getInitialState()

        # Some observables
        self.pointsObservable = \
                FloatStreamObservable(title='%s Points' % self.__class__.__name__,
                                      time_dimension_name='Episode',
                                      value_name='Points')

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.worlds.seventeen_and_four.environments.seventeen_and_four_viewers\
                        import SeventeenAndFourValuefunctionViewer
            # Add a Q-value viewer for this world
            VIEWERS.addViewer(
                lambda: SeventeenAndFourValuefunctionViewer(self.stateSpace),
                'SeventeenAndFourValuefunctionViewer')
Exemple #5
0
        def addRemoveObservable(observable, action):
            # Check if new observable was added and if the observable is a
            # ModelObservable
            if action == 'added' and isinstance(observable, ModelObservable):
                from mmlf.gui.viewers import VIEWERS
                from mmlf.gui.viewers.model_viewer import ModelViewer
                if spec is not None and spec["active"]:
                    # Register function that writes all updates into log file
                    topDirectory = observable.title.split(" ")[0]
                    logDirectory = "".join(
                        observable.title.split(" ")[1:]).strip("()")
                    if not self.plotThisObservableFct(logDirectory):
                        # Plotting this observable was not specified
                        return
                    self.userDirObj.createPath([topDirectory],
                                               refName=topDirectory,
                                               baseRef='currentlogdir',
                                               force=True)
                    self.userDirObj.createPath([logDirectory],
                                               refName=logDirectory,
                                               baseRef=topDirectory,
                                               force=True)

                    def plotGraphicToFile(model):
                        if (self.episodeCounter +
                                1) % spec['logFrequency'] != 0:
                            return  # Not an episode in which we plot
                        logFile = \
                            self.userDirObj.getAbsolutePath(logDirectory,
                                                            'episode_%05d.pdf' % self.episodeCounter)
                        if os.path.exists(logFile):
                            return  # we do not plot several times per episode
                        fig = pylab.figure(0, figsize=(22, 11))
                        fig.clear()

                        # Let observable plot itself
                        observable.plot(
                            model,
                            fig,
                            stateSpace=self.environment.stateSpace,
                            colouring=spec['colouring'],
                            plotSamples=spec['plotSamples'],
                            minExplorationValue=spec['minExplorationValue'],
                            plotStateDims=spec['stateDims'],
                            dimValues=spec['rasterPoints'])

                        # Draw structure over state space
                        for axis in fig.axes:
                            if type(axis) == __import__(
                                    "matplotlib").axes.Axes:
                                # Plot only into matplotlib.axes.AxesSubplot
                                continue
                            self.environment.plotStateSpaceStructure(axis)

                        pylab.savefig(logFile)

                    observable.addObserver(plotGraphicToFile)

                def createModelViewer():
                    modelViewer = ModelViewer(observable,
                                              self.environment.stateSpace)
                    observable.addObserver(lambda *_x: modelViewer.update(*_x))
                    return modelViewer

                VIEWERS.addViewer(createModelViewer, 'ModelViewer')
Exemple #6
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = \
            EnvironmentInfo(versionNumber="0.3",
                            environmentName="Double Pole Balancing",
                            discreteActionSpace=False, episodic=True,
                            continuousStateSpace=True,
                            continuousActionSpace=True, stochastic=False)

        super(DoublePoleBalancingEnvironment, self).__init__(useGUI=useGUI,
                                                             *args,
                                                             **kwargs)

        # Convert from degrees to radians
        self.configDict["INITIALPOLEANGULARPOSITION1"] *= pi / 180.0
        self.configDict['MAXPOLEANGULARPOSITION1'] *= pi / 180.0
        self.configDict['MAXPOLEANGULARPOSITION2'] *= pi / 180.0

        # The object which computes the dpb dynamics
        self.dpbDynamics = DoublePoleBalancingDynamics(self.configDict)

        #The state space of the Double Pole Balancing Simulation
        oldStyleStateSpace = {
            "cartPosition": ("continuous", [(-1.0, 1.0)]),
            "cartVelocity": ("continuous", [(-0.1, 0.1)]),
            "poleAngularPosition1": ("continuous", [(-1.0, 1.0)]),
            "poleAngularVelocity1": ("continuous", [(-0.5, 0.5)]),
            "poleAngularPosition2": ("continuous", [(-1.0, 1.0)]),
            "poleAngularVelocity2": ("continuous", [(-0.5, 0.5)])
        }
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Double Pole Balancing Simulation
        oldStyleActionSpace = {"force": ("continuous", [(-10, 10)])}
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        # The name of the state dimensions that are send to the agent.
        # NOTE: The ordering of the state dimensions is important!
        self.stateNameList = [
            "cartPosition", "cartVelocity", "poleAngularPosition1",
            "poleAngularVelocity1", "poleAngularPosition2",
            "poleAngularVelocity2"
        ]

        # The vector used for normalization of the state for the agent
        self.normalizationVector = array([
            1.0 / self.configDict['MAXCARTPOSITION'], 0.1,
            1.0 / self.configDict['MAXPOLEANGULARPOSITION1'], 0.2,
            1.0 / self.configDict['MAXPOLEANGULARPOSITION2'], 0.1
        ])

        #The current state of the simulation
        self.initialState = array([
            0.0, 0.0, self.configDict["INITIALPOLEANGULARPOSITION1"], 0.0, 0.0,
            0.0
        ])
        #The current state is initially set to the initial state
        self.currentState = array(self.initialState)

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer

            # Add general trajectory viewer
            VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace),
                              'TrajectoryViewer')
Exemple #7
0
    def __init__(self, useGUI, *args, **kwargs):
        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Maze Cliff",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(MazeCliffEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)
        
        # A string which describes the structure of the maze
        # A * indicates a wall, an S the start position of the agent
        # and a G the goal. A blank indicates a free cell.
        mazeDescriptionString =  """**************
                                    *            *
                                    *            *
                                    *            *
                                    *S          G*
                                    **************
                                    """                            
                                    
        #The maze object is created from the description
        self.maze = Maze.createMazeFromString(mazeDescriptionString,
                                              cliffPenalty=self.configDict["cliffPenalty"],
                                              stochasticity=self.configDict["stochasticity"])
        
        #The state space of the Maze2d Simulation
        oldStyleStateSpace =   {
                                "column": ("discrete", range(self.maze.getColumns())),
                                "row": ("discrete", range(self.maze.getRows())),
                            }
        
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")
        
        #The action space of the Single Pole Balancing Simulation
        oldStyleActionSpace =  {
                                "action": ("discrete", ["up", "down", "left", "right"])
                            }
        
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft")
        
        
        # dictionary which contains all configuration options specific to this environment
        # it is VERY important to put ALL configuration options which uniquely determine
        # the behavior of the environment in this dictionary.
        self.configDict =  {}
               
        #The current state of the simulation
        self.initialState =  { 
                     "row": self.maze.getStartPosition()[0],
                     "column": self.maze.getStartPosition()[1],
                  }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)
        
        #A counter which stores the number of steps which have been perfomed in this episode
        self.stepCounter = 0
        self.episodeCounter = 0
        
        #The accumulated reward
        self.reward = 0.0
             
        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer
            from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer
            
            # Create customized trajectory viewer
            class MazeCliffTrajectoryViewer(TrajectoryViewer):
                def __init__(self, stateSpace, plotStateSpaceStructure):
                    super(MazeCliffTrajectoryViewer, self).__init__(stateSpace)
                    plotStateSpaceStructure(self.axisTrajectory)
                
            
            VIEWERS.addViewer(lambda : \
                                MazeCliffTrajectoryViewer(self.stateSpace,
                                                          lambda ax : self.plotStateSpaceStructure(ax)), 
                              'MazeCliffTrajectoryViewer')
            
            # Add viewers for the maze world
            VIEWERS.addViewer(lambda : Maze2DDetailedViewer(self.maze,
                                                            self.stateSpace,
                                                            ["left", "right", "up", "down"]),
                              'MazeCliffDetailedViewer')
            VIEWERS.addViewer(lambda : Maze2DFunctionViewer(self.maze,
                                                            self.stateSpace),
                              'MazeCliffFunctionViewer')
Exemple #8
0
        #Redraw
        self.canvas.draw()
        
    def _changeMWA(self):
        self.mwaSize = 2**self.mwaSlider.value()
        self.mwaLabel.setText("Moving Window Average: %s" % self.mwaSize)
        # Replot
        self._plot()
        
        
    def _linePlotChanged(self, linePlot):
        self.linePlot = self.linePlotTypes[linePlot]
        # Replot
        self._plot()
        
    def _save(self):
        rootDirectory = \
            self.tableModel.rootDirectory if hasattr(self.tableModel, 
                                                     "rootDirectory") \
                else mmlf.getRWPath()
        graphicFileName = \
            str(QtGui.QFileDialog.getSaveFileName(self,
                                                  "Select a file for the stored graphic",
                                                  rootDirectory,   
                                                  "Plots (*.pdf)"))
        self.fig.savefig(str(graphicFileName), dpi=400)
        
    

VIEWERS.addViewer(lambda : ExperimentViewer(), 'ExperimentViewer') 
Exemple #9
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Maze2D",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(Maze2dEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)

        # Reading string which describes the structure of the maze
        mazeDescriptionString = open(mmlf.getRWPath() + os.sep + "config" +
                                     os.sep + "maze2d" + os.sep +
                                     self.configDict['MAZE']).read()
        # Remove comment lines and superfluous whitespace
        lines = map(lambda line: line.strip(),
                    mazeDescriptionString.split("\n"))
        lines = filter(lambda line: not line.startswith("#"), lines)
        mazeDescriptionString = "\n".join(lines)

        #The maze object is created from the description
        self.maze = Maze.createMazeFromString(mazeDescriptionString)

        #The state space of the Maze2d Simulation
        oldStyleStateSpace = {
            "column": ("discrete", range(self.maze.getColumns())),
            "row": ("discrete", range(self.maze.getRows()))
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Maze2d Simulation
        oldStyleActionSpace = {
            "action": ("discrete", ["left", "right", "up", "down"])
        }

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        #The current state of the simulation
        self.initialState = {
            "row": self.maze.getStartPosition()[0],
            "column": self.maze.getStartPosition()[1]
        }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer
            from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer

            # Create customized trajectory viewer
            class Maze2dTrajectoryViewer(TrajectoryViewer):
                def __init__(self, stateSpace, plotStateSpaceStructure):
                    super(Maze2dTrajectoryViewer, self).__init__(stateSpace)
                    plotStateSpaceStructure(self.axisTrajectory)


            VIEWERS.addViewer(lambda : \
                                Maze2dTrajectoryViewer(self.stateSpace,
                                                       lambda ax : self.plotStateSpaceStructure(ax)),
                              'Maze2dTrajectoryViewer')

            # Add viewers for the maze world
            VIEWERS.addViewer(
                lambda: Maze2DDetailedViewer(self.maze, self.stateSpace,
                                             ["left", "right", "up", "down"]),
                'Maze2DDetailedViewer')
            VIEWERS.addViewer(
                lambda: Maze2DFunctionViewer(self.maze, self.stateSpace),
                'Maze2DFunctionViewer')
Exemple #10
0
        if self.observable is not None:
            # Remove old observable
            self.observable.removeObserver(self.observableCallback)
        # Get new observable and add as listener
        self.observable = self.floatStreamObservables[comboBoxIndex]
        self.observable.addObserver(self.observableCallback)
        # Remove old values
        self.values = deque()
        self.times = deque()

    def _changeWindowSize(self):
        self.windowSize = 2**self.windowSizeSlider.value()

        while len(self.values) > self.windowSize:
            self.values.popleft()
            self.times.popleft()

        self._redraw()

        self.windowSizeLabel.setText("WindowSize: %s" % self.windowSize)

    def _changeMWA(self):
        self.mwaSize = self.mwaSlider.value()

        self._redraw()

        self.mwaLabel.setText("Moving Window Average : %s" % self.mwaSize)


VIEWERS.addViewer(lambda: FloatStreamViewer(), 'FloatStreamViewer')
Exemple #11
0
    def __init__(self, config, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Mountain Car",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=False,
                                               stochastic=True)

        # Add value for N to config dict (required for discretization
        # in optimal policy computation)
        if "N" not in config["configDict"]:
            config["configDict"]["N"] = "50"

        super(MountainCarEnvironment, self).__init__(config,
                                                     useGUI=useGUI,
                                                     *args,
                                                     **kwargs)

        # configuration
        self.randomStarts = True

        # Some constants
        self.minPosition = -1.2  # Minimum car position
        self.maxPosition = 0.6  # Maximum car position (past goal)
        self.maxVelocity = 0.07  # Maximum velocity of car
        self.goalPosition = 0.5  # Goal position - how to tell we are done

        # If "maxGoalVelocity" is not set in configDict, set it to maximal
        # velocity
        if not "maxGoalVelocity" in self.configDict:
            self.configDict["maxGoalVelocity"] = self.maxVelocity

        # The current state of the system
        self.state = None

        # Some counters
        self.overallStepCounter = 0

        # State and action space definition
        oldStyleStateSpace = {
            "position": ("continuous", [(self.minPosition, self.maxPosition)]),
            "velocity": ("continuous", [(-self.maxVelocity, self.maxVelocity)])
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        self.actions = ["left", "right", "none"]
        oldStyleActionSpace = {"thrust": ("discrete", self.actions)}

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="hard")

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.mountain_car.environments.mcar_policy_viewer \
                    import MountainCarPolicyViewer
            from mmlf.worlds.mountain_car.environments.mcar_valuefunction_viewer \
                    import MountainCarValueFunctionViewer
            # Add general trajectory viewer
            VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace),
                              'TrajectoryViewer')
            VIEWERS.addViewer(lambda: MountainCarPolicyViewer(self.stateSpace),
                              'MountainCar PolicyViewer')
            VIEWERS.addViewer(
                lambda: MountainCarValueFunctionViewer(self.stateSpace),
                'MountainCar ValueFunctionViewer')