def __init__(self, width, height, agent, walls, passengers, goal_loc=None, slip_prob=0, gamma=0.99): self.height = height self.width = width agent_obj = OOMDPObject(attributes=agent, name="agent") wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") pass_objs = self._make_oomdp_objs_from_list_of_dict( passengers, "passenger") init_state = self._create_state(agent_obj, wall_objs, pass_objs) self.goal_location = goal_loc self.terminal_func = taxi_helpers.is_taxi_terminal_state if goal_loc is None else self._navigation_terminal_func rf = self._taxi_reward_func if goal_loc is None else self._navigation_reward_func OOMDP.__init__(self, TaxiOOMDP.ACTIONS, self._taxi_transition_func, rf, init_state=init_state, gamma=gamma) self.slip_prob = slip_prob
def reset(self): ''' Summary: Resets the OOMDP back to the initial configuration. ''' self.time = self.init_time OOMDP.reset(self)
def __init__(self, panel, date_time, name_ext, timestep=30, panel_step=.1, reflective_index=0.65, latitude_deg=40.7, longitude_deg=142.17, img_dims=16, mode_dict = {'dual_axis':True, 'image_mode':False, 'cloud_mode':False}): # Error check the lat/long. if abs(latitude_deg) > 90 or abs(longitude_deg) > 180: print "Error: latitude must be between [-90, 90], longitude between [-180,180]. Lat:", latitude_deg, "Long:", longitude_deg quit() if mode_dict['cloud_mode'] and not mode_dict['image_mode']: print "Warning (SolarOOMDP): Clouds were set to active but image mode is off. No cloud simulation supported for non-image-mode." mode_dict['cloud_mode'] = False # Mode information if not(mode_dict['dual_axis']): # If we are in 1-axis tracking mode, change actions accordingly. SolarOOMDP.ACTIONS = self.get_single_axis_actions() self.dual_axis = False else: self.dual_axis = True # Image stuff. self.img_dims = img_dims self.image_mode = mode_dict['image_mode'] self.cloud_mode = mode_dict['cloud_mode'] self.clouds = self._generate_clouds() if mode_dict['cloud_mode'] else [] #get panel information. self.panel = panel self.sqrt_num_panels = 1 # Assume only 1 panel for now. # Global information self.latitude_deg = latitude_deg # positive in the northern hemisphere self.longitude_deg = longitude_deg # negative reckoning west from prime meridian in Greenwich, England self.panel_step = panel_step self.timestep = timestep #timestep in minutes self.reflective_index = reflective_index self.name_ext = name_ext # Time stuff. self.init_time = date_time self.time = date_time # Make state and call super. panels = self._get_default_panel_obj_list() init_state = self._create_state(panels, self.init_time) OOMDP.__init__(self, SolarOOMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state)
def __init__(self, width, height, agent, walls, passengers, slip_prob=0, gamma=0.99): self.height = height self.width = width agent_obj = OOMDPObject(attributes=agent, name="agent") wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") pass_objs = self._make_oomdp_objs_from_list_of_dict(passengers, "passenger") init_state = self._create_state(agent_obj, wall_objs, pass_objs) OOMDP.__init__(self, TaxiOOMDP.ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma) self.slip_prob = slip_prob
def __init__(self, width, height, agent, walls, blocks, exit, gamma=0.99): init_state = self._create_init_state(height, width, agent, walls, blocks, exit) OOMDP.__init__(self, BlockDudeOOMDP.ACTIONS, self.objects, self._block_dude_transition_func, self._block_dude_reward_func, init_state=init_state, gamma=gamma) self.height = height self.width = width
def __init__(self, width, height, agent, walls, goals, crumbs, slip_prob=0, gamma=0.99, step_cost=0, weights=None, env_code=None, sample_rate=5): self.env_code = env_code self.height = height self.width = width if weights is not None: self.weights = weights else: # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell) self.weights = np.array([[2, -0.4, -0.5]]) # objects that belong in the state (changing) agent_obj = OOMDPObject(attributes=agent, name="agent") agent_exit = {"x": 100, "y": 100} crumb_objs = self._make_oomdp_objs_from_list_of_dict(crumbs, "crumb") crumb_objs_exit = self._make_oomdp_objs_from_list_of_dict( crumbs, "crumb") # objects that belong to the MDP (static) wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") self.walls = wall_objs goal_objs = self._make_oomdp_objs_from_list_of_dict(goals, "goal") self.goals = goal_objs self.slip_prob = slip_prob init_state = self._create_state(agent_obj, crumb_objs) self.exit_state = self._create_state( OOMDPObject(attributes=agent_exit, name="agent_exit"), crumb_objs_exit) self.exit_state.set_terminal(True) self.exit_state.set_goal(False) OOMDP.__init__(self, CookieCrumbOOMDP.ACTIONS, self._cookie_crumb_transition_func, self._cookie_crumb_reward_func, init_state=init_state, gamma=gamma, step_cost=step_cost, sample_rate=sample_rate)
def __init__(self, width, height, agent, walls, goal, skateboard, slip_prob=0, gamma=0.99, step_cost=0, weights=None, env_code=None, sample_rate=5): self.env_code = env_code self.height = height self.width = width if weights is not None: self.weights = weights else: self.weights = np.array([[2, -0.4, -0.5]]) # objects that belong in the state (changing) agent_obj = OOMDPObject(attributes=agent, name="agent") agent_exit = {"x": 100, "y": 100} skateboard_objs = self._make_oomdp_objs_from_list_of_dict( skateboard, "skateboard") skateboard_objs_exit = self._make_oomdp_objs_from_list_of_dict( skateboard, "skateboard") # objects that belong to the MDP (static) wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") self.walls = wall_objs self.goal = goal self.slip_prob = slip_prob init_state = self._create_state(agent_obj, skateboard_objs) self.exit_state = self._create_state( OOMDPObject(attributes=agent_exit, name="agent_exit"), skateboard_objs_exit) self.exit_state.set_terminal(True) self.exit_state.set_goal(False) OOMDP.__init__(self, SkateboardOOMDP.ACTIONS, self._skateboard_transition_func, self._skateboard_reward_func, init_state=init_state, gamma=gamma, step_cost=step_cost, sample_rate=sample_rate)
def __init__(self, width, height, agent, walls, passengers, slip_prob=0, gamma=0.99, weights=None): self.height = height self.width = width if weights is not None: self.weights = weights else: # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell) self.weights = np.array([[10, 0, -1]]) agent_obj = OOMDPObject(attributes=agent, name="agent") agent_exit = {"x": 100, "y": 100, "has_passenger": 0} wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") pass_objs = self._make_oomdp_objs_from_list_of_dict( passengers, "passenger") pass_objs_exit = self._make_oomdp_objs_from_list_of_dict( passengers, "passenger") self.walls = wall_objs self.exit_state = self._create_state( OOMDPObject(attributes=agent_exit, name="agent_exit"), pass_objs_exit) self.exit_state.set_terminal(True) self.exit_state.set_goal(False) init_state = self._create_state(agent_obj, pass_objs) OOMDP.__init__(self, TaxiOOMDP.ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma, sample_rate=1) self.slip_prob = slip_prob
def create_goal_state(cls, dest_color): goal_agent = {'current_color': dest_color, 'has_passenger': 0} goal_passengers = [{ 'current_color': dest_color, 'dest_color': dest_color }] goal_agent_obj = OOMDPObject(attributes=goal_agent, name='agent') goal_passenger_objs = OOMDP._make_oomdp_objs_from_list_of_dict( goal_passengers, 'passenger') return TaxiL1OOMDP._create_state(goal_agent_obj, goal_passenger_objs, is_terminal=True)
def __init__(self, agent_color='red', passenger_color='blue', passenger_dest_color='green'): agent = {'current_color': agent_color, 'has_passenger': 0} passengers = [{ 'current_color': passenger_color, 'dest_color': passenger_dest_color }] agent_obj = OOMDPObject(attributes=agent, name='agent') passenger_objs = self._make_oomdp_objs_from_list_of_dict( passengers, 'passenger') init_state = TaxiL1OOMDP._create_state(agent_obj, passenger_objs) self.goal_state = TaxiL1OOMDP.create_goal_state(passenger_dest_color) self.terminal_func = lambda state: state == self.goal_state OOMDP.__init__(self, TaxiL1OOMDP.ACTIONS, self._transition_func, self._reward_func, init_state)
def __init__( self, date_time, serial_loc='/dev/ttyACM0', baud=9600, use_img=True, panel_step=0.1, #radians, used to specify command to arduino timestep=30, latitude_deg=41.82, longitude_deg=-71.4128): self.image_dims = 16 #use for downsampling image from camera self.use_img = use_img # Global information self.latitude_deg = latitude_deg # positive in the northern hemisphere self.longitude_deg = longitude_deg # negative reckoning west from prime meridian in Greenwich, England self.timestep = timestep #timestep in minutes self.panel_step = panel_step #initialize communication with the arduino self.ser = serial.Serial(serial_loc, baud, timeout=120) self.ser.flushOutput() self.ser.flushInput() self.ser.write("INIT") print "establishing communication with panel" result = self.ser.readline( ) #result should take the form of RECV,<current angle> if result.startswith("RECV"): initial_angle_ns = float(result.split(",")[1]) print "connection established, initial angle along ns axis is {}".format( initial_angle_ns) else: raise Exception( "ERROR: invalid response from arduino: {}".format(result)) if self.use_img: #initialize camera connection self.cam = VideoCapture(0) t.sleep(0.2) # wait for camera #take image start_pixels = self._capture_img() else: start_pixels = None # Time stuff. self.init_time = date_time self.time = date_time #create initial state init_state = self._create_state(0, initial_angle_ns, start_pixels, self.init_time) OOMDP.__init__(self, ArduinoOOMDP.ACTIONS, self.objects, self._transition_func, self._reward_func, init_state=init_state)
def __init__(self, width, height, agent, walls, passengers, tolls, traffic, fuel_stations, slip_prob=0, gamma=0.99, step_cost=0, weights=None, env_code=None, sample_rate=5): self.env_code = env_code self.height = height self.width = width if weights is not None: self.weights = weights else: # use true weighting over reward variables (on the goal with the passenger, on a toll, on a traffic cell) self.weights = np.array([[2, -0.4, -0.5]]) # objects that belong in the state (changing) agent_obj = OOMDPObject(attributes=agent, name="agent") agent_exit = {"x": 100, "y": 100, "has_passenger": 0} pass_objs = self._make_oomdp_objs_from_list_of_dict( passengers, "passenger") pass_objs_exit = self._make_oomdp_objs_from_list_of_dict( passengers, "passenger") # objects that belong to the MDP (static) wall_objs = self._make_oomdp_objs_from_list_of_dict(walls, "wall") toll_objs = self._make_oomdp_objs_from_list_of_dict(tolls, "toll") traffic_objs = self._make_oomdp_objs_from_list_of_dict( traffic, "traffic") fuel_station_objs = self._make_oomdp_objs_from_list_of_dict( fuel_stations, "fuel_station") self.tolls = toll_objs self.traffic_cells = traffic_objs self.walls = wall_objs self.fuel_stations = fuel_station_objs self.slip_prob = slip_prob init_state = self._create_state(agent_obj, pass_objs) self.exit_state = self._create_state( OOMDPObject(attributes=agent_exit, name="agent_exit"), pass_objs_exit) self.exit_state.set_terminal(True) self.exit_state.set_goal(False) if init_state.track_fuel(): OOMDP.__init__(self, AugmentedTaxiOOMDP.AUGMENTED_ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma, step_cost=step_cost, sample_rate=sample_rate) else: OOMDP.__init__(self, AugmentedTaxiOOMDP.BASE_ACTIONS, self._taxi_transition_func, self._taxi_reward_func, init_state=init_state, gamma=gamma, step_cost=step_cost, sample_rate=sample_rate)