def reset_boats(): global BOATS, CONTROLLERS, WAYPOINT_QUEUE, WAYPOINTS_INDEX, WAYPOINTS_BEFORE_RESET, LAST_COMPLETED_WP_TIME, LAST_TIME, FIRST_TIME, TEXT_BOXES BOATS = {"pid": Boat.Boat(design=Designs.AirboatDesign()), "q": Boat.Boat(design=Designs.TankDriveDesign())} # generate all the random waypoints generate_random_waypoints_queue() waypoint = WAYPOINT_QUEUE[0] px, py = xy_location_to_pixel_location(waypoint[0], waypoint[1]) LAST_TIME = 0 FIRST_TIME = ptime.time() for k in BOATS: boat = BOATS[k] WAYPOINTS_INDEX[k] = 0 LAST_COMPLETED_WP_TIME[k] = 0 boat.state = np.zeros((6,)) boat.time = 0 boat.name = k + " boat" NAVIGATION_LINES[k].set_data(pos=np.array([(px, py), xy_location_to_pixel_location(boat.state[0], boat.state[1])], dtype=np.float32)) TEXT_BOXES["waypoint_symbol"][k].pos = (px, py) TEXT_BOXES["waypoint_text"][k].text = "[{:.0f}, {:.0f}]".format(px, py) TEXT_BOXES["waypoint_count"][k].text = "#{} of {}".format(WAYPOINTS_INDEX[k] + 1, WAYPOINTS_BEFORE_RESET) #boat.strategy = Strategies.DestinationOnly(boat, waypoint, controller_name=CONTROLLERS[k]) #boat.strategy = Strategies.LineFollower(boat, waypoint, controller_name=CONTROLLERS[k]) if (k == "pid"): boat.strategy = Strategies.PseudoRandomBalancedHeading(boat, fixed_thrust=0.2, angle_divisions=8) else: boat.strategy = Strategies.DoNothing(boat) boat.sourceLocation = boat.state[0:2] boat.destinationLocation = waypoint boat.calculateQState() # need to initialize the state for Q learning
def __init__(self, t=0.0, name="boat", design=Designs.TankDriveDesign()): self._t = t # current time [s] self._name = name self._state = np.zeros((6, )) self._sourceLocation = np.zeros((2, )) # where the boat started from self._destinationLocation = np.zeros( (2, )) # where the boat is going (typically the next waypoint) # state: [x y u w th thdot] self._thrustSurge = 0.0 # surge thrust [N] self._thrustSway = 0.0 # sway thrust (zero for tank drive) [N] self._moment = 0.0 # [Nm] self._thrustSurgeOLD = 0.0 # used for exponential curve toward new value self._thrustSwayOLD = 0.0 # used for exponential curve toward new value self._momentOLD = 0.0 # used for exponential curve toward new value self._decayConstant = 0.5 # used for exponential curve toward new value, lower means slower self._thrustFraction = 0.0 self._momentFraction = 0.0 self._strategy = Strategies.DoNothing(self) self._design = design self._plotData = None # [x, y] data used to display current actions self._controlHz = 10 # the number of times per second the boat is allowed to change its signal, check if strategy is finished, and create Q experiences self._lastControlTime = 0 self._Q = _Q_ self._Qstate = np.zeros( (8, )) # [u w alpha delta phi alphadot deltadot phidot] self._QlastState = np.zeros( (8, )) # the state "s" in the experience (s, a, r, s') self._QlastAction = np.zeros( (2, ) ) # the action "a" in the experience (s, a, r, s'), [m0_signal m1_signal] # alpha = progress along line between origin and waypoint, normalized by the length of the line # delta = distance to the line (projection of boat onto the line) # phi = angle of the line with respect to the surge direction in the body frame # mX_signal = raw signal value of actuator X self._QExperienceQueue = list( ) # a list containing the current set of experiences in the order they were created self._lastExperienceTime = 0