Python OOMDPの例、simple_rl.mdp.oomdp.OOMDPClass.OOMDP Pythonの例

コード例 #1

0

ファイルを表示

ファイル: TaxiOOMDPClass.py プロジェクト: roma-patel/ltl-amdp

    def __init__(self,
                 width,
                 height,
                 agent,
                 walls,
                 passengers,
                 goal_loc=None,
                 slip_prob=0,
                 gamma=0.99):
        self.height = height
        self.width = width

        agent_obj = OOMDPObject(attributes=agent, name="agent")
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        pass_objs = self._make_oomdp_objs_from_list_of_dict(
            passengers, "passenger")
        init_state = self._create_state(agent_obj, wall_objs, pass_objs)

        self.goal_location = goal_loc
        self.terminal_func = taxi_helpers.is_taxi_terminal_state if goal_loc is None else self._navigation_terminal_func
        rf = self._taxi_reward_func if goal_loc is None else self._navigation_reward_func

        OOMDP.__init__(self,
                       TaxiOOMDP.ACTIONS,
                       self._taxi_transition_func,
                       rf,
                       init_state=init_state,
                       gamma=gamma)

        self.slip_prob = slip_prob

コード例 #2

0

ファイルを表示

 def reset(self):
     '''
     Summary:
         Resets the OOMDP back to the initial configuration.
     '''
     self.time = self.init_time
     OOMDP.reset(self)

コード例 #3

0

ファイルを表示

    def __init__(self,
                panel,
                date_time,
                name_ext,
                timestep=30,
                panel_step=.1,
                reflective_index=0.65,
                latitude_deg=40.7,
                longitude_deg=142.17,
                img_dims=16,
                mode_dict = {'dual_axis':True, 'image_mode':False, 'cloud_mode':False}):

                # Error check the lat/long.
        if abs(latitude_deg) > 90 or abs(longitude_deg) > 180:
            print "Error: latitude must be between [-90, 90], longitude between [-180,180]. Lat:", latitude_deg, "Long:", longitude_deg
            quit()

        if mode_dict['cloud_mode'] and not mode_dict['image_mode']:
            print "Warning (SolarOOMDP): Clouds were set to active but image mode is off. No cloud simulation supported for non-image-mode."
            mode_dict['cloud_mode'] = False

        # Mode information
        if not(mode_dict['dual_axis']):
            # If we are in 1-axis tracking mode, change actions accordingly.
            SolarOOMDP.ACTIONS = self.get_single_axis_actions()
            self.dual_axis = False
        else:
            self.dual_axis = True

        # Image stuff.
        self.img_dims = img_dims
        self.image_mode = mode_dict['image_mode']
        self.cloud_mode = mode_dict['cloud_mode']
        self.clouds = self._generate_clouds() if mode_dict['cloud_mode'] else []

        #get panel information.
        self.panel = panel
        self.sqrt_num_panels = 1 # Assume only 1 panel for now.

        # Global information
        self.latitude_deg = latitude_deg # positive in the northern hemisphere
        self.longitude_deg = longitude_deg # negative reckoning west from prime meridian in Greenwich, England
        self.panel_step = panel_step
        self.timestep = timestep #timestep in minutes
        self.reflective_index = reflective_index
        self.name_ext = name_ext

        # Time stuff.
        self.init_time = date_time
        self.time = date_time

        # Make state and call super.
        panels = self._get_default_panel_obj_list()
        init_state = self._create_state(panels, self.init_time)
        OOMDP.__init__(self, SolarOOMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state)

コード例 #4

0

ファイルを表示

ファイル: TaxiOOMDPClass.py プロジェクト: david-abel/simple_rl

    def __init__(self, width, height, agent, walls, passengers, slip_prob=0, gamma=0.99):
        self.height = height
        self.width = width

        agent_obj = OOMDPObject(attributes=agent, name="agent")
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        pass_objs = self._make_oomdp_objs_from_list_of_dict(passengers, "passenger")

        init_state = self._create_state(agent_obj, wall_objs, pass_objs)
        OOMDP.__init__(self, TaxiOOMDP.ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma)
        self.slip_prob = slip_prob

コード例 #5

0

ファイルを表示

ファイル: TaxiOOMDPClass.py プロジェクト: banerjs/simple_rl

    def __init__(self, width, height, agent, walls, passengers, slip_prob=0, gamma=0.99):
        self.height = height
        self.width = width

        agent_obj = OOMDPObject(attributes=agent, name="agent")
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        pass_objs = self._make_oomdp_objs_from_list_of_dict(passengers, "passenger")

        init_state = self._create_state(agent_obj, wall_objs, pass_objs)
        OOMDP.__init__(self, TaxiOOMDP.ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma)
        self.slip_prob = slip_prob

コード例 #6

0

ファイルを表示

 def __init__(self, width, height, agent, walls, blocks, exit, gamma=0.99):
     init_state = self._create_init_state(height, width, agent, walls,
                                          blocks, exit)
     OOMDP.__init__(self,
                    BlockDudeOOMDP.ACTIONS,
                    self.objects,
                    self._block_dude_transition_func,
                    self._block_dude_reward_func,
                    init_state=init_state,
                    gamma=gamma)
     self.height = height
     self.width = width

コード例 #7

0

ファイルを表示

ファイル: CookieCrumbOOMDPClass.py プロジェクト: SUCCESS-MURI/machine-teaching-human-IRL

    def __init__(self,
                 width,
                 height,
                 agent,
                 walls,
                 goals,
                 crumbs,
                 slip_prob=0,
                 gamma=0.99,
                 step_cost=0,
                 weights=None,
                 env_code=None,
                 sample_rate=5):
        self.env_code = env_code
        self.height = height
        self.width = width
        if weights is not None:
            self.weights = weights
        else:
            # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell)
            self.weights = np.array([[2, -0.4, -0.5]])

        # objects that belong in the state (changing)
        agent_obj = OOMDPObject(attributes=agent, name="agent")
        agent_exit = {"x": 100, "y": 100}
        crumb_objs = self._make_oomdp_objs_from_list_of_dict(crumbs, "crumb")
        crumb_objs_exit = self._make_oomdp_objs_from_list_of_dict(
            crumbs, "crumb")

        # objects that belong to the MDP (static)
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        self.walls = wall_objs
        goal_objs = self._make_oomdp_objs_from_list_of_dict(goals, "goal")
        self.goals = goal_objs
        self.slip_prob = slip_prob

        init_state = self._create_state(agent_obj, crumb_objs)
        self.exit_state = self._create_state(
            OOMDPObject(attributes=agent_exit, name="agent_exit"),
            crumb_objs_exit)
        self.exit_state.set_terminal(True)
        self.exit_state.set_goal(False)
        OOMDP.__init__(self,
                       CookieCrumbOOMDP.ACTIONS,
                       self._cookie_crumb_transition_func,
                       self._cookie_crumb_reward_func,
                       init_state=init_state,
                       gamma=gamma,
                       step_cost=step_cost,
                       sample_rate=sample_rate)

コード例 #8

0

ファイルを表示

ファイル: SkateboardOOMDPClass.py プロジェクト: SUCCESS-MURI/machine-teaching-human-IRL

    def __init__(self,
                 width,
                 height,
                 agent,
                 walls,
                 goal,
                 skateboard,
                 slip_prob=0,
                 gamma=0.99,
                 step_cost=0,
                 weights=None,
                 env_code=None,
                 sample_rate=5):
        self.env_code = env_code
        self.height = height
        self.width = width
        if weights is not None:
            self.weights = weights
        else:
            self.weights = np.array([[2, -0.4, -0.5]])

        # objects that belong in the state (changing)
        agent_obj = OOMDPObject(attributes=agent, name="agent")
        agent_exit = {"x": 100, "y": 100}
        skateboard_objs = self._make_oomdp_objs_from_list_of_dict(
            skateboard, "skateboard")
        skateboard_objs_exit = self._make_oomdp_objs_from_list_of_dict(
            skateboard, "skateboard")

        # objects that belong to the MDP (static)
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        self.walls = wall_objs
        self.goal = goal
        self.slip_prob = slip_prob

        init_state = self._create_state(agent_obj, skateboard_objs)
        self.exit_state = self._create_state(
            OOMDPObject(attributes=agent_exit, name="agent_exit"),
            skateboard_objs_exit)
        self.exit_state.set_terminal(True)
        self.exit_state.set_goal(False)
        OOMDP.__init__(self,
                       SkateboardOOMDP.ACTIONS,
                       self._skateboard_transition_func,
                       self._skateboard_reward_func,
                       init_state=init_state,
                       gamma=gamma,
                       step_cost=step_cost,
                       sample_rate=sample_rate)

コード例 #9

0

ファイルを表示

    def __init__(self,
                 width,
                 height,
                 agent,
                 walls,
                 passengers,
                 slip_prob=0,
                 gamma=0.99,
                 weights=None):
        self.height = height
        self.width = width
        if weights is not None:
            self.weights = weights
        else:
            # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell)
            self.weights = np.array([[10, 0, -1]])

        agent_obj = OOMDPObject(attributes=agent, name="agent")
        agent_exit = {"x": 100, "y": 100, "has_passenger": 0}
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        pass_objs = self._make_oomdp_objs_from_list_of_dict(
            passengers, "passenger")
        pass_objs_exit = self._make_oomdp_objs_from_list_of_dict(
            passengers, "passenger")
        self.walls = wall_objs

        self.exit_state = self._create_state(
            OOMDPObject(attributes=agent_exit, name="agent_exit"),
            pass_objs_exit)
        self.exit_state.set_terminal(True)
        self.exit_state.set_goal(False)

        init_state = self._create_state(agent_obj, pass_objs)
        OOMDP.__init__(self,
                       TaxiOOMDP.ACTIONS,
                       self._taxi_transition_func,
                       self._taxi_reward_func,
                       init_state=init_state,
                       gamma=gamma,
                       sample_rate=1)
        self.slip_prob = slip_prob

コード例 #10

0

ファイルを表示

 def create_goal_state(cls, dest_color):
     goal_agent = {'current_color': dest_color, 'has_passenger': 0}
     goal_passengers = [{
         'current_color': dest_color,
         'dest_color': dest_color
     }]
     goal_agent_obj = OOMDPObject(attributes=goal_agent, name='agent')
     goal_passenger_objs = OOMDP._make_oomdp_objs_from_list_of_dict(
         goal_passengers, 'passenger')
     return TaxiL1OOMDP._create_state(goal_agent_obj,
                                      goal_passenger_objs,
                                      is_terminal=True)

コード例 #11

0

ファイルを表示

    def __init__(self,
                 agent_color='red',
                 passenger_color='blue',
                 passenger_dest_color='green'):
        agent = {'current_color': agent_color, 'has_passenger': 0}
        passengers = [{
            'current_color': passenger_color,
            'dest_color': passenger_dest_color
        }]

        agent_obj = OOMDPObject(attributes=agent, name='agent')
        passenger_objs = self._make_oomdp_objs_from_list_of_dict(
            passengers, 'passenger')

        init_state = TaxiL1OOMDP._create_state(agent_obj, passenger_objs)

        self.goal_state = TaxiL1OOMDP.create_goal_state(passenger_dest_color)
        self.terminal_func = lambda state: state == self.goal_state

        OOMDP.__init__(self, TaxiL1OOMDP.ACTIONS, self._transition_func,
                       self._reward_func, init_state)

コード例 #12

0

ファイルを表示

    def __init__(
            self,
            date_time,
            serial_loc='/dev/ttyACM0',
            baud=9600,
            use_img=True,
            panel_step=0.1,  #radians, used to specify command to arduino
            timestep=30,
            latitude_deg=41.82,
            longitude_deg=-71.4128):

        self.image_dims = 16  #use for downsampling image from camera
        self.use_img = use_img
        # Global information
        self.latitude_deg = latitude_deg  # positive in the northern hemisphere
        self.longitude_deg = longitude_deg  # negative reckoning west from prime meridian in Greenwich, England
        self.timestep = timestep  #timestep in minutes
        self.panel_step = panel_step

        #initialize communication with the arduino
        self.ser = serial.Serial(serial_loc, baud, timeout=120)
        self.ser.flushOutput()
        self.ser.flushInput()
        self.ser.write("INIT")

        print "establishing communication with panel"
        result = self.ser.readline(
        )  #result should take the form of RECV,<current angle>

        if result.startswith("RECV"):
            initial_angle_ns = float(result.split(",")[1])
            print "connection established, initial angle along ns axis is {}".format(
                initial_angle_ns)
        else:
            raise Exception(
                "ERROR: invalid response from arduino: {}".format(result))

        if self.use_img:
            #initialize camera connection
            self.cam = VideoCapture(0)
            t.sleep(0.2)  # wait for camera

            #take image
            start_pixels = self._capture_img()
        else:
            start_pixels = None

        # Time stuff.
        self.init_time = date_time
        self.time = date_time

        #create initial state
        init_state = self._create_state(0, initial_angle_ns, start_pixels,
                                        self.init_time)

        OOMDP.__init__(self,
                       ArduinoOOMDP.ACTIONS,
                       self.objects,
                       self._transition_func,
                       self._reward_func,
                       init_state=init_state)

コード例 #13

0

ファイルを表示

    def __init__(self,
                 width,
                 height,
                 agent,
                 walls,
                 passengers,
                 tolls,
                 traffic,
                 fuel_stations,
                 slip_prob=0,
                 gamma=0.99,
                 step_cost=0,
                 weights=None,
                 env_code=None,
                 sample_rate=5):
        self.env_code = env_code
        self.height = height
        self.width = width
        if weights is not None:
            self.weights = weights
        else:
            # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell)
            self.weights = np.array([[2, -0.4, -0.5]])

        # objects that belong in the state (changing)
        agent_obj = OOMDPObject(attributes=agent, name="agent")
        agent_exit = {"x": 100, "y": 100, "has_passenger": 0}
        pass_objs = self._make_oomdp_objs_from_list_of_dict(
            passengers, "passenger")
        pass_objs_exit = self._make_oomdp_objs_from_list_of_dict(
            passengers, "passenger")

        # objects that belong to the MDP (static)
        wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall")
        toll_objs = self._make_oomdp_objs_from_list_of_dict(tolls, "toll")
        traffic_objs = self._make_oomdp_objs_from_list_of_dict(
            traffic, "traffic")
        fuel_station_objs = self._make_oomdp_objs_from_list_of_dict(
            fuel_stations, "fuel_station")
        self.tolls = toll_objs
        self.traffic_cells = traffic_objs
        self.walls = wall_objs
        self.fuel_stations = fuel_station_objs
        self.slip_prob = slip_prob

        init_state = self._create_state(agent_obj, pass_objs)
        self.exit_state = self._create_state(
            OOMDPObject(attributes=agent_exit, name="agent_exit"),
            pass_objs_exit)
        self.exit_state.set_terminal(True)
        self.exit_state.set_goal(False)
        if init_state.track_fuel():
            OOMDP.__init__(self,
                           AugmentedTaxiOOMDP.AUGMENTED_ACTIONS,
                           self._taxi_transition_func,
                           self._taxi_reward_func,
                           init_state=init_state,
                           gamma=gamma,
                           step_cost=step_cost,
                           sample_rate=sample_rate)
        else:
            OOMDP.__init__(self,
                           AugmentedTaxiOOMDP.BASE_ACTIONS,
                           self._taxi_transition_func,
                           self._taxi_reward_func,
                           init_state=init_state,
                           gamma=gamma,
                           step_cost=step_cost,
                           sample_rate=sample_rate)