Esempio n. 1
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = \
            EnvironmentInfo(versionNumber="0.3",
                            environmentName="Partially Observable Double Pole Balancing",
                            discreteActionSpace=False, episodic=True,
                            continuousStateSpace=True,
                            continuousActionSpace=True, stochastic=False)

        super(PODoublePoleBalancingEnvironment, self).__init__(useGUI=useGUI,
                                                               *args,
                                                               **kwargs)

        #The state space of partially observable double pole balancing
        oldStyleStateSpace = {
            "cartPosition": ("continuous", [(-1.0, 1.0)]),
            "poleAngularPosition1": ("continuous", [(-1.0, 1.0)]),
            "poleAngularPosition2": ("continuous", [(-1.0, 1.0)])
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        # The name of the state dimensions that are send to the agent.
        # NOTE: The ordering of the state dimensions is important!
        self.stateNameList = [
            "cartPosition", "poleAngularPosition1", "poleAngularPosition2"
        ]
Esempio n. 2
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="PinballMaze",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(PinballMazeEnvironment, self).__init__(useGUI=useGUI,
                                                     *args,
                                                     **kwargs)

        mazeString = open(
            os.path.dirname(os.path.abspath(__file__)) + os.sep + os.pardir +
            os.sep + self.configDict['MAZE'], 'r').read()

        #The maze object is created from the description
        self.maze = PinballMaze.createMazeFromString(mazeString)

        #The state space of the Maze2d Simulation
        oldStyleStateSpace = {
            "x": ("continuous", [(0.0, 1.0)]),
            "y": ("continuous", [(0.0, 1.0)]),
            "xdot": ("continuous", [(-1.0, 1.0)]),
            "ydot": ("continuous", [(-1.0, 1.0)]),
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Maze2d Simulation
        oldStyleActionSpace = {
            "action": ("discrete", ["xinc", "xdec", "yinc", "ydec", "none"])
        }

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        #The current state is initially set to the initial state
        self.currentState = self.getInitialState()

        if useGUI:
            # Add viewer specific for the pinball world
            from mmlf.gui.viewers import VIEWERS
            from mmlf.worlds.pinball_maze.environments.pinball_maze_trajectory_viewer \
                        import PinballMazeTrajectoryViewer
            from mmlf.worlds.pinball_maze.environments.pinball_maze_function_viewer \
                        import PinballMazeFunctionViewer

            VIEWERS.addViewer(
                lambda: PinballMazeTrajectoryViewer(self, self.stateSpace),
                'PinballMaze TrajectoryViewer')
            VIEWERS.addViewer(
                lambda: PinballMazeFunctionViewer(self, self.stateSpace),
                'PinballMaze FunctionViewer')
Esempio n. 3
0
    def __init__(self, useGUI, *args, **kwargs):
        
        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Single Pole Balancing",
                                               discreteActionSpace=False,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=True,
                                               stochastic=False)

        super(SinglePoleBalancingEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)
        
        #The state space of the Single Pole Balancing Simulation
        oldStyleStateSpace = {"cartPosition": ("continuous", [(-3.125, 3.125)]),
                              "cartVelocity": ("continuous", [(-0.5, 0.5)]),
                              "poleAngularPosition": ("continuous", [(-1.13, 1.13)]),
                              "poleAngularVelocity": ("continuous", [(-0.80, 0.80)]),
                              }
        
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")
        
        #The action space of the Single Pole Balancing Simulation
        oldStyleActionSpace =  {"force": ("continuous", [(-2, 2)])}
        
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft")

        #The current state of the simulation
        #Note that the values of this dict can be accesed directly as 
        #attributes of the class (see the __getattr__ and _setattr__ method)
        self.initialState =  { 
                     "cartPosition": 0.0,
                     "poleAngularPosition": 0.1,
                     "cartVelocity": 0.0,
                     "poleAngularVelocity": 0.0,
                  }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)
        
        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.single_pole_balancing.environments.spb_trajectory_viewer import SPBTrajectoryViewer
            
            # Add general trajectory viewer
            VIEWERS.addViewer(lambda : TrajectoryViewer(self.stateSpace), 
                              'TrajectoryViewer')
            
            VIEWERS.addViewer(lambda : SPBTrajectoryViewer(),
                              'SPB Cart Viewer')
Esempio n. 4
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="17 and 4",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=True)

        super(SeventeenAndFourEnvironment, self).__init__(useGUI=useGUI,
                                                          *args,
                                                          **kwargs)

        # State and action space definition
        oldStyleStateSpace = {"count": ("discrete", range(23))}

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        oldStyleActionSpace = {"action": ("discrete", ["continue", "stop"])}

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="hard")

        # The available cards
        self.cards = [
            2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9,
            9, 9, 10, 10, 10, 10, 11, 11, 11, 11
        ]
        # Initialize first game
        self.getInitialState()

        # Some observables
        self.pointsObservable = \
                FloatStreamObservable(title='%s Points' % self.__class__.__name__,
                                      time_dimension_name='Episode',
                                      value_name='Points')

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.worlds.seventeen_and_four.environments.seventeen_and_four_viewers\
                        import SeventeenAndFourValuefunctionViewer
            # Add a Q-value viewer for this world
            VIEWERS.addViewer(
                lambda: SeventeenAndFourValuefunctionViewer(self.stateSpace),
                'SeventeenAndFourValuefunctionViewer')
    def __init__(self, useGUI, *args, **kwargs):
        # Create the environment info
        self.environmentInfo = \
            EnvironmentInfo(# Which communication protocol version can the 
                            # environment handle?
                            versionNumber="0.3",
                            # Name of the environment (can be chosen arbitrarily)
                            environmentName="LinearMarkovChain",
                            # Is the action space of this environment discrete?
                            discreteActionSpace=True,
                            # Is the environment episodic?
                            episodic=True,
                            # Is the state space of environment continuous?
                            continuousStateSpace=False,
                            # Is the action space of environment continuous?
                            continuousActionSpace=False,
                            # Is the environment stochastic?
                            stochastic=False)

        # Calls constructor of base class
        # After this call, the environment has an attribute "self.configDict",
        # The values of this dict are evaluated, i.e. instead of '100' (string),
        # the key 'length' will have the same value 100 (int).
        super(LinearMarkovChainEnvironment, self).__init__(useGUI=useGUI,
                                                           *args,
                                                           **kwargs)

        # The state space of the linear markov chain
        oldStyleStateSpace = {
            "field": ("discrete", range(self.configDict["length"]))
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        # The action space of the linear markov chain
        oldStyleActionSpace = {"action": ("discrete", ["left", "right"])}

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        # The initial state of the environment
        self.initialState = {"field": self.configDict["length"] / 2}
        # The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)
Esempio n. 6
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = \
            EnvironmentInfo(versionNumber="0.3",
                            environmentName="Double Pole Balancing",
                            discreteActionSpace=False, episodic=True,
                            continuousStateSpace=True,
                            continuousActionSpace=True, stochastic=False)

        super(DoublePoleBalancingEnvironment, self).__init__(useGUI=useGUI,
                                                             *args,
                                                             **kwargs)

        # Convert from degrees to radians
        self.configDict["INITIALPOLEANGULARPOSITION1"] *= pi / 180.0
        self.configDict['MAXPOLEANGULARPOSITION1'] *= pi / 180.0
        self.configDict['MAXPOLEANGULARPOSITION2'] *= pi / 180.0

        # The object which computes the dpb dynamics
        self.dpbDynamics = DoublePoleBalancingDynamics(self.configDict)

        #The state space of the Double Pole Balancing Simulation
        oldStyleStateSpace = {
            "cartPosition": ("continuous", [(-1.0, 1.0)]),
            "cartVelocity": ("continuous", [(-0.1, 0.1)]),
            "poleAngularPosition1": ("continuous", [(-1.0, 1.0)]),
            "poleAngularVelocity1": ("continuous", [(-0.5, 0.5)]),
            "poleAngularPosition2": ("continuous", [(-1.0, 1.0)]),
            "poleAngularVelocity2": ("continuous", [(-0.5, 0.5)])
        }
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Double Pole Balancing Simulation
        oldStyleActionSpace = {"force": ("continuous", [(-10, 10)])}
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        # The name of the state dimensions that are send to the agent.
        # NOTE: The ordering of the state dimensions is important!
        self.stateNameList = [
            "cartPosition", "cartVelocity", "poleAngularPosition1",
            "poleAngularVelocity1", "poleAngularPosition2",
            "poleAngularVelocity2"
        ]

        # The vector used for normalization of the state for the agent
        self.normalizationVector = array([
            1.0 / self.configDict['MAXCARTPOSITION'], 0.1,
            1.0 / self.configDict['MAXPOLEANGULARPOSITION1'], 0.2,
            1.0 / self.configDict['MAXPOLEANGULARPOSITION2'], 0.1
        ])

        #The current state of the simulation
        self.initialState = array([
            0.0, 0.0, self.configDict["INITIALPOLEANGULARPOSITION1"], 0.0, 0.0,
            0.0
        ])
        #The current state is initially set to the initial state
        self.currentState = array(self.initialState)

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer

            # Add general trajectory viewer
            VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace),
                              'TrajectoryViewer')
Esempio n. 7
0
    def __init__(self, useGUI, *args, **kwargs):
        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Maze Cliff",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(MazeCliffEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)
        
        # A string which describes the structure of the maze
        # A * indicates a wall, an S the start position of the agent
        # and a G the goal. A blank indicates a free cell.
        mazeDescriptionString =  """**************
                                    *            *
                                    *            *
                                    *            *
                                    *S          G*
                                    **************
                                    """                            
                                    
        #The maze object is created from the description
        self.maze = Maze.createMazeFromString(mazeDescriptionString,
                                              cliffPenalty=self.configDict["cliffPenalty"],
                                              stochasticity=self.configDict["stochasticity"])
        
        #The state space of the Maze2d Simulation
        oldStyleStateSpace =   {
                                "column": ("discrete", range(self.maze.getColumns())),
                                "row": ("discrete", range(self.maze.getRows())),
                            }
        
        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")
        
        #The action space of the Single Pole Balancing Simulation
        oldStyleActionSpace =  {
                                "action": ("discrete", ["up", "down", "left", "right"])
                            }
        
        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace, limitType="soft")
        
        
        # dictionary which contains all configuration options specific to this environment
        # it is VERY important to put ALL configuration options which uniquely determine
        # the behavior of the environment in this dictionary.
        self.configDict =  {}
               
        #The current state of the simulation
        self.initialState =  { 
                     "row": self.maze.getStartPosition()[0],
                     "column": self.maze.getStartPosition()[1],
                  }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)
        
        #A counter which stores the number of steps which have been perfomed in this episode
        self.stepCounter = 0
        self.episodeCounter = 0
        
        #The accumulated reward
        self.reward = 0.0
             
        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer
            from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer
            
            # Create customized trajectory viewer
            class MazeCliffTrajectoryViewer(TrajectoryViewer):
                def __init__(self, stateSpace, plotStateSpaceStructure):
                    super(MazeCliffTrajectoryViewer, self).__init__(stateSpace)
                    plotStateSpaceStructure(self.axisTrajectory)
                
            
            VIEWERS.addViewer(lambda : \
                                MazeCliffTrajectoryViewer(self.stateSpace,
                                                          lambda ax : self.plotStateSpaceStructure(ax)), 
                              'MazeCliffTrajectoryViewer')
            
            # Add viewers for the maze world
            VIEWERS.addViewer(lambda : Maze2DDetailedViewer(self.maze,
                                                            self.stateSpace,
                                                            ["left", "right", "up", "down"]),
                              'MazeCliffDetailedViewer')
            VIEWERS.addViewer(lambda : Maze2DFunctionViewer(self.maze,
                                                            self.stateSpace),
                              'MazeCliffFunctionViewer')
Esempio n. 8
0
    def __init__(self, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Maze2D",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=False,
                                               continuousActionSpace=False,
                                               stochastic=False)

        super(Maze2dEnvironment, self).__init__(useGUI=useGUI, *args, **kwargs)

        # Reading string which describes the structure of the maze
        mazeDescriptionString = open(mmlf.getRWPath() + os.sep + "config" +
                                     os.sep + "maze2d" + os.sep +
                                     self.configDict['MAZE']).read()
        # Remove comment lines and superfluous whitespace
        lines = map(lambda line: line.strip(),
                    mazeDescriptionString.split("\n"))
        lines = filter(lambda line: not line.startswith("#"), lines)
        mazeDescriptionString = "\n".join(lines)

        #The maze object is created from the description
        self.maze = Maze.createMazeFromString(mazeDescriptionString)

        #The state space of the Maze2d Simulation
        oldStyleStateSpace = {
            "column": ("discrete", range(self.maze.getColumns())),
            "row": ("discrete", range(self.maze.getRows()))
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        #The action space of the Maze2d Simulation
        oldStyleActionSpace = {
            "action": ("discrete", ["left", "right", "up", "down"])
        }

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="soft")

        #The current state of the simulation
        self.initialState = {
            "row": self.maze.getStartPosition()[0],
            "column": self.maze.getStartPosition()[1]
        }
        #The current state is initially set to the initial state
        self.currentState = deepcopy(self.initialState)

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.maze2d.environments.maze2d_viewer import Maze2DDetailedViewer
            from mmlf.worlds.maze2d.environments.maze2d_function_viewer import Maze2DFunctionViewer

            # Create customized trajectory viewer
            class Maze2dTrajectoryViewer(TrajectoryViewer):
                def __init__(self, stateSpace, plotStateSpaceStructure):
                    super(Maze2dTrajectoryViewer, self).__init__(stateSpace)
                    plotStateSpaceStructure(self.axisTrajectory)


            VIEWERS.addViewer(lambda : \
                                Maze2dTrajectoryViewer(self.stateSpace,
                                                       lambda ax : self.plotStateSpaceStructure(ax)),
                              'Maze2dTrajectoryViewer')

            # Add viewers for the maze world
            VIEWERS.addViewer(
                lambda: Maze2DDetailedViewer(self.maze, self.stateSpace,
                                             ["left", "right", "up", "down"]),
                'Maze2DDetailedViewer')
            VIEWERS.addViewer(
                lambda: Maze2DFunctionViewer(self.maze, self.stateSpace),
                'Maze2DFunctionViewer')
Esempio n. 9
0
    def __init__(self, config, useGUI, *args, **kwargs):

        self.environmentInfo = EnvironmentInfo(versionNumber="0.3",
                                               environmentName="Mountain Car",
                                               discreteActionSpace=True,
                                               episodic=True,
                                               continuousStateSpace=True,
                                               continuousActionSpace=False,
                                               stochastic=True)

        # Add value for N to config dict (required for discretization
        # in optimal policy computation)
        if "N" not in config["configDict"]:
            config["configDict"]["N"] = "50"

        super(MountainCarEnvironment, self).__init__(config,
                                                     useGUI=useGUI,
                                                     *args,
                                                     **kwargs)

        # configuration
        self.randomStarts = True

        # Some constants
        self.minPosition = -1.2  # Minimum car position
        self.maxPosition = 0.6  # Maximum car position (past goal)
        self.maxVelocity = 0.07  # Maximum velocity of car
        self.goalPosition = 0.5  # Goal position - how to tell we are done

        # If "maxGoalVelocity" is not set in configDict, set it to maximal
        # velocity
        if not "maxGoalVelocity" in self.configDict:
            self.configDict["maxGoalVelocity"] = self.maxVelocity

        # The current state of the system
        self.state = None

        # Some counters
        self.overallStepCounter = 0

        # State and action space definition
        oldStyleStateSpace = {
            "position": ("continuous", [(self.minPosition, self.maxPosition)]),
            "velocity": ("continuous", [(-self.maxVelocity, self.maxVelocity)])
        }

        self.stateSpace = StateSpace()
        self.stateSpace.addOldStyleSpace(oldStyleStateSpace, limitType="soft")

        self.actions = ["left", "right", "none"]
        oldStyleActionSpace = {"thrust": ("discrete", self.actions)}

        self.actionSpace = ActionSpace()
        self.actionSpace.addOldStyleSpace(oldStyleActionSpace,
                                          limitType="hard")

        if useGUI:
            from mmlf.gui.viewers import VIEWERS
            from mmlf.gui.viewers.trajectory_viewer import TrajectoryViewer
            from mmlf.worlds.mountain_car.environments.mcar_policy_viewer \
                    import MountainCarPolicyViewer
            from mmlf.worlds.mountain_car.environments.mcar_valuefunction_viewer \
                    import MountainCarValueFunctionViewer
            # Add general trajectory viewer
            VIEWERS.addViewer(lambda: TrajectoryViewer(self.stateSpace),
                              'TrajectoryViewer')
            VIEWERS.addViewer(lambda: MountainCarPolicyViewer(self.stateSpace),
                              'MountainCar PolicyViewer')
            VIEWERS.addViewer(
                lambda: MountainCarValueFunctionViewer(self.stateSpace),
                'MountainCar ValueFunctionViewer')