class P9RLEnv(gym.Env): metadata = {'render.modes': ['human']} motors = [] timeStep = 1 restrictedGoals = True boxEnv = False isFixedGoal = False fixedGoal = [3.5, 0] logging = False maxTimestep = 7000 robotResetLocation = [0, 0, 0] def __init__(self): # Initialize TurtleBot specifics self.maxAngSpeed = 1.5 # 2.84 max self.maxLinSpeed = 0.1 # 0.22 max # Initialize reward parameters: self.moveTowardsParam = 1 self.moveAwayParam = 1.1 self.safetyLimit = 0.75 self.obsProximityParam = 1 self.EOEPunish = -300 self.EOEReward = 600 self.EOERewardCounter = 0 self.EpisodeCounter = 0 # Total reward counter for each component self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 # Initialize RL specific variables self.rewardInterval = 1 # How often do you want to get rewards self.epConc = '' # Conculsion of episode [Collision, Success, TimeOut] self.reward = 0 # Reward gained in the timestep self.totalreward = 0 # Reward gained throughout the episode self.counter = 0 # Timestep counter self.needReset = True # self.state = [] # State of the environment self.done = False # If the episode is done self.action = [0, 0] # Current action chosen self.prevAction = [0, 0] # Past action chosen self.goal = [] # Goal self.goals = [x / 100 for x in range(-450, 451, 150)] #self.goals = [[0.72, 6.78],[5.03, 6.78],[12.7, 6.78], [10.9, -1.68], [3, 0],[-11.5, -2.5]] self.seeded = False self.dist = 0 # Distance to the goal self.prevDist = 0 # Previous distance to the goal # Logging variables self.currentEpisode = 0 # Current episode number self.start = 0 # Timer for starting self.duration = 0 # Episode duration in seconds self.epInfo = [] # Logging info for the episode self.startPosition = [] self.prevMax = 0 # temporary variable # Initialize a supervisor self.supervisor = Supervisor() # Initialize robot self.robot = self.supervisor.getFromDef('TB') self.translationField = self.robot.getField('translation') self.orientationField = self.robot.getField('rotation') # Initialize lidar self.lidar = self.supervisor.getLidar('LDS-01') self.lidarDiscretization = 30 self.lidar.enable(self.timeStep) self.lidarRanges = [] # Initialize Motors self.motors.append(self.supervisor.getMotor("right wheel motor")) self.motors.append(self.supervisor.getMotor("left wheel motor")) self.motors[0].setPosition(float("inf")) self.motors[1].setPosition(float("inf")) self.motors[0].setVelocity(0.0) self.motors[1].setVelocity(0.0) self.direction = [] self.position = [] self.orientation = [] # Initialize Goal Object self.goalObject = self.supervisor.getFromDef('GOAL') self.goalTranslationField = self.goalObject.getField('translation') # Initialize action-space and observation-space self.action_space = spaces.Box(low=np.array( [-self.maxLinSpeed, -self.maxAngSpeed]), high=np.array([0, self.maxAngSpeed]), dtype=np.float32) #TODO: MAKE POSTIVE LINEAR SPEED A FORWARD MOTION self.observation_space = spaces.Box(low=-10, high=10, shape=(14, ), dtype=np.float16) self.counter = 0 def reset(self): self._startEpisode() # Reset evetything needs resetting self.EpisodeCounter = self.EpisodeCounter + 1 print(self.EpisodeCounter) if not self.seeded: random.seed(self.currentEpisode) self.seeded = True # Make it Happen self.supervisor.step(self.timeStep) self.goal = self._setGoal() # Set Goal self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]]) self._getState() self.startPosition = self.position[:] self.state = [self.dist, self.direction ] + self.lidarRanges # Set State #self.state = [self.dist, self.direction] + self.lidarRanges # Set State return np.asarray(self.state) def step(self, action): self.action = action # Take action self._take_action(action) self.counter += 1 #sself.supervisor.simulationSetMode(3) self._getState() # Observe new state self.state = np.asarray([self.dist, self.direction] + self.lidarRanges) self.prevAction = self.action[:] # Set previous action # Get State(dist from obstacle + lidar) and convert to numpyarray #print(self.lidarRanges[0]) self.supervisor.step(32) self.reward = self._calculateReward() # get Reward self.done, extraReward = self._isDone( ) # determine if state is Done, extra reward/punishment self.reward += extraReward self.totalreward += self.reward return [self.state, self.reward, self.done, {}] def _trimLidarReadings(self, lidar): lidarReadings = [] new_lidars = [x if x != 0 else 3.5 for x in lidar] # Replace 0's with 3.5 (max reading) for x in range(0, 360, self.lidarDiscretization): end = x + self.lidarDiscretization lidarReadings.append(min( new_lidars[x:end])) # Take the minimum of lidar Readings return lidarReadings def _resetObject(self, object, coordinates): object.getField('translation').setSFVec3f(coordinates) object.getField('rotation').setSFRotation([0, 1, 0, 0]) object.resetPhysics() def _setGoal(self): if len(self.goals) == 6: return random.choice(self.goals) if not self.isFixedGoal: while True: if self.restrictedGoals: gs = random.sample(self.goals, 1) gs.append(random.randrange(-45, 45) / 10) g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]] elif self.boxEnv: xGoal = random.randrange( -40, -25) / 10 if np.random.randint( 2) == 0 else random.randrange(25, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [yGoal, xGoal ] if np.random.randint(2) == 0 else [xGoal, yGoal] else: xGoal = random.randrange(-40, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [xGoal, yGoal] pos1 = self.robot.getPosition()[0] pos2 = self.robot.getPosition()[2] distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5 if distFromGoal >= 1: break else: g = self.fixedGoal print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1])) return g def _getDist(self): return ((self.goal[0] - self.position[0])**2 + (self.goal[1] - self.position[1])**2)**0.5 def _take_action(self, action): if self.logging: print( '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m' .format(action[0], action[1])) convertedVelocity = self.setVelocities(action[0], action[1]) self.motors[0].setVelocity(float(convertedVelocity[0])) self.motors[1].setVelocity(float(convertedVelocity[1])) def _isDone(self): if self.counter >= self.maxTimestep: # Check maximum timestep self.needReset = True return True, self.EOEPunish minScan = min(list(filter(lambda a: a != 0, self.lidarRanges[:]))) # Check LiDar if minScan < 0.2: self.needReset = True return True, self.EOEPunish elif self.dist < 0.35: # Check Goal self.needReset = True if self.boxEnv else False self.EOERewardCounter = self.EOERewardCounter + 1 print("SUCCESS COUNTER:", self.EOERewardCounter) return True, self.EOEReward else: return False, 0 def render(self, mode='human', close=False): pass def setVelocities(self, linearV, angularV): R = 0.033 # Wheel radius L = 0.138 # Wheelbase length # vr = (((2 * linearV) + (angularV * L)) / (2 * R)) vr = ((linearV + (L / 2) * angularV)) / R vl = ((linearV - (L / 2) * angularV)) / R return [vl, vr] def _getDirection(self): # Get direction of goal from Robot robgoaly = self.goal[1] - self.position[1] robgoalx = self.goal[0] - self.position[0] goal_angle = math.atan2(robgoalx, robgoaly) # Get difference between goal direction and orientation heading = goal_angle - self.orientation if heading > pi: # Wrap around pi heading -= 2 * pi elif heading < -pi: heading += 2 * pi return heading def _calculateReward(self): action1 = np.round(self.action[0], 1) action2 = np.round(self.action[1], 1) if self.counter % self.rewardInterval != 0: return 0 # Check if it's Reward Time distRate = self.prevDist - self.dist if self.prevDist == 0: self.prevDist = self.dist return 0 moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate) self.moveTowardGoalTotalReward += moveTowardsGoalReward obsProximityPunish = self._rewardObstacleProximity() self.obsProximityTotalPunish += obsProximityPunish reward = moveTowardsGoalReward + obsProximityPunish * 3 totalRewardDic = { "faceObstacleTotalPunish": self.faceObstacleTotalPunish, "obsProximityTotalPunish": self.obsProximityTotalPunish, "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward } if self.logging: self._printRewards(totalRewardDic) print(reward) self.prevDist = self.dist return reward def _rewardMoveTowardsGoal(self, distRate): if distRate <= 0: return self.moveAwayParam * (distRate / 0.0044) else: return self.moveTowardsParam * (distRate / 0.0044) def _rewardObstacleProximity(self): closestObstacle = min(self.lidarRanges) if closestObstacle <= self.safetyLimit: return self.obsProximityParam * -( 1 - (closestObstacle / self.safetyLimit)) else: return 0 def _startEpisode(self): if self.needReset: # Reset object(s) / counters / rewards print('Resetting Robot...') self._resetObject(self.robot, self.robotResetLocation) self.counter = 0 self.totalreward = 0 self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 self.prevDist = 0 def _getState(self): self.lidarRanges = self._trimLidarReadings( self.lidar.getRangeImage()) # Get LidarInfo self.position = self.robot.getPosition( )[::2] # Get Position (x,y) and orientation self.orientation = self.orientationField.getSFRotation()[3] self.dist = self._getDist() # Get Eucledian distance from the goal self.direction = self._getDirection() def _printRewards(self, r, interval=1): if self.counter % interval == 0: print("\n\033[1mEpisode {}, timestep {}\033[0m".format( self.currentEpisode, self.counter)) for k, v in r.items(): print("\t" + str(k) + ":\t" + str(v)) def _printMax(self, reward): maxR = reward if self.prevMax < reward else self.prevMax self.prevMax = maxR print("max: {}".format(maxR)) def _printLidarRanges(self): lidarFormatted = [] for i in range(len(self.lidarRanges)): lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i])) print(lidarFormatted) def _logEpisode(self): self.duration = time.time() - self.start self.epInfo.append([ round(self.duration, 3), round(self.totalreward, 3), self.epConc, self.counter, "Goal:", self.goal, "Start position:", self.startPosition, "Rewards:", self.moveTowardGoalTotalReward, self.obsProximityTotalPunish, self.faceObstacleTotalPunish ])
class P7RLEnv(gym.Env): metadata = {'render.modes': ['human']} motors = [] timeStep = 32 restrictedGoals = True boxEnv = False isFixedGoal = False fixedGoal = [3.5, 0] logging = True maxTimestep = 20000 robotResetLocation = [0, 0, 0] def __init__(self): # Initialize TurtleBot specifics self.maxAngSpeed = 1 # 2.84 max self.maxLinSpeed = 0.14 # 0.22 max # Initialize reward parameters: self.moveTowardsParam = 1 self.moveAwayParam = 1.1 self.safetyLimit = 0.5 self.obsProximityParam = 0.01 self.faceObstacleParam = 10 # Total reward counter for each component self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 # Initialize RL specific variables self.rewardInterval = 1 # How often do you want to get rewards self.epConc = '' # Conculsion of episode [Collision, Success, TimeOut] self.reward = 0 # Reward gained in the timestep self.totalreward = 0 # Reward gained throughout the episode self.counter = 0 # Timestep counter self.needReset = True # self.state = [] # State of the environment self.done = False # If the episode is done self.action = [0, 0] # Current action chosen self.prevAction = [0, 0] # Past action chosen self.goal = [] # Goal self.goals = [x / 100 for x in range(-450, 451, 150)] self.dist = 0 # Distance to the goal self.prevDist = 0 # Previous distance to the goal # Logging variables self.logDir = "" self.path = "" self.currentEpisode = 0 # Current episode number self.start = 0 # Timer for starting self.duration = 0 # Episode duration in seconds self.epInfo = [] # Logging info for the episode self.startPosition = [] self.prevMax = 0 # temporary variable # Initialize a supervisor self.supervisor = Supervisor() # Initialize robot self.robot = self.supervisor.getFromDef('TB') self.translationField = self.robot.getField('translation') self.orientationField = self.robot.getField('rotation') # Initialize lidar self.lidar = self.supervisor.getLidar('TurtleBotLidar') self.lidar.enable(self.timeStep) self.lidarRanges = [] # Initialize Motors self.motors.append(self.supervisor.getMotor("left wheel motor")) self.motors.append(self.supervisor.getMotor("right wheel motor")) self.motors[0].setPosition(float("inf")) self.motors[1].setPosition(float("inf")) self.motors[0].setVelocity(0.0) self.motors[1].setVelocity(0.0) self.direction = [] self.position = [] self.orientation = [] # Initialize Goal Object self.goalObject = self.supervisor.getFromDef('GOAL') self.goalTranslationField = self.goalObject.getField('translation') # Initialize action-space and observation-space self.action_space = spaces.Box( low=np.array([-self.maxLinSpeed, -self.maxAngSpeed]), high=np.array([self.maxLinSpeed, self.maxAngSpeed]), dtype=np.float32) self.observation_space = spaces.Box(low=-10, high=10, shape=(24, ), dtype=np.float16) def reset(self): if not self.logDir: self._getLogDirectory() self._startEpisode() # Reset evetything needs resetting self.supervisor.step(1) # Make it Happen self.goal = self._setGoal() # Set Goal self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]]) self._getState() self.startPosition = self.position[:] self.state = [ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges # Set State return np.asarray(self.state) def step(self, action): self.prevAction = self.action[:] # Set previous action self.action = action # Take action self._take_action(action) self.supervisor.step(1) self.counter += 1 # Get State(dist from obstacle + lidar) and convert to numpyarray self._getState() # Observe new state self.state = np.asarray([ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges) self.reward = self._calculateReward() # get Reward self.done, extraReward = self._isDone( ) # determine if state is Done, extra reward/punishment self.reward += extraReward self.totalreward += self.reward return [self.state, self.reward, self.done, {}] def _trimLidarReadings(self, lidar): lidarReadings = [] new_lidars = [x if x != 0 else 3.5 for x in lidar] # Replace 0's with 3.5 (max reading) for x in range(0, 360, 18): end = x + 18 lidarReadings.append(min( new_lidars[x:end])) # Take the minimum of lidar Readings return lidarReadings def _resetObject(self, object, coordinates): object.getField('translation').setSFVec3f(coordinates) object.getField('rotation').setSFRotation([0, 1, 0, 0]) object.resetPhysics() def _setGoal(self): if not self.isFixedGoal: while True: if self.restrictedGoals: gs = random.sample(self.goals, 1) gs.append(random.randrange(-45, 45) / 10) g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]] elif self.boxEnv: xGoal = random.randrange( -40, -25) / 10 if np.random.randint( 2) == 0 else random.randrange(25, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [yGoal, xGoal ] if np.random.randint(2) == 0 else [xGoal, yGoal] else: xGoal = random.randrange(-40, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [xGoal, yGoal] pos1 = self.robot.getPosition()[0] pos2 = self.robot.getPosition()[2] distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5 if distFromGoal >= 1: break else: g = self.fixedGoal print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1])) return g def _getDist(self): return ((self.goal[0] - self.position[0])**2 + (self.goal[1] - self.position[1])**2)**0.5 def _take_action(self, action): if self.logging: print( '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m' .format(action[0], action[1])) convertedVelocity = self.setVelocities(action[0], action[1]) self.motors[0].setVelocity(float(convertedVelocity[0])) self.motors[1].setVelocity(float(convertedVelocity[1])) def _isDone(self): if self.counter >= self.maxTimestep: # Check maximum timestep self.needReset = True self._endEpisode('timeout') return True, 0 minScan = min(list(filter(lambda a: a != 0, self.lidarRanges[:]))) # Check LiDar if minScan < 0.25: self.needReset = True self._endEpisode('collision') return True, -500 elif self.dist < 0.35: # Check Goal self.needReset = True if self.boxEnv else False self._endEpisode('success') return True, 500 else: return False, 0 def render(self, mode='human', close=False): pass def setVelocities(self, linearV, angularV): R = 0.033 # Wheel radius L = 0.138 # Wheelbase length vr = (2 * linearV + angularV * L) / (2 * R) vl = (2 * linearV - angularV * L) / (2 * R) #print('\nCommanded linear:\t{} angular:\t {}'.format(linearV, angularV)) #print('Calculated right wheel:\t{}, left wheel:\t {}'.format(vr, vl)) return [vl, vr] def _getDirection(self): # Get direction of goal from Robot robgoaly = self.goal[1] - self.position[1] robgoalx = self.goal[0] - self.position[0] goal_angle = math.atan2(robgoalx, robgoaly) # Get difference between goal direction and orientation heading = goal_angle - self.orientation if heading > pi: # Wrap around pi heading -= 2 * pi elif heading < -pi: heading += 2 * pi return heading def _calculateReward(self): action1 = np.round(self.action[0], 1) action2 = np.round(self.action[1], 1) if self.counter % self.rewardInterval != 0: return 0 # Check if it's Reward Time distRate = self.prevDist - self.dist if self.prevDist == 0: self.prevDist = self.dist return 0 moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate) self.moveTowardGoalTotalReward += moveTowardsGoalReward obsProximityPunish = self._rewardObstacleProximity() self.obsProximityTotalPunish += obsProximityPunish faceObstaclePunish = -self._rewardFacingObstacle() self.faceObstacleTotalPunish += faceObstaclePunish reward = moveTowardsGoalReward + obsProximityPunish + faceObstaclePunish #self._printMax(moveTowardsGoalReward) totalRewardDic = { "faceObstacleTotalPunish": self.faceObstacleTotalPunish, "obsProximityTotalPunish": self.obsProximityTotalPunish, "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward } rewardDic = { "faceObstaclePunish": faceObstaclePunish, "obsProximityPunish": obsProximityPunish, "moveTowardsGoalReward": moveTowardsGoalReward, "Total": reward, "\033[1mTotalReward\033[0m": self.totalreward } if self.logging: self._printRewards(rewardDic) self._printRewards(totalRewardDic) self.prevDist = self.dist return reward def _rewardFacingObstacle(self): rewardFaceObs = 0 # Check forward lidars if moving forward, backward lidars if moving backward lidarRange = [-2, 2] if self.action[0] >= 0 else [7, 11] for i in range(lidarRange[0], lidarRange[1]): scale = 0.15 if i in [-2, 1, 7, 10 ] else 0.35 #Side readings get less weight rewardFaceObs += scale * ( 1 - (self.lidarRanges[i] / self.safetyLimit) ) if self.lidarRanges[i] < self.safetyLimit else 0 return self.faceObstacleParam * rewardFaceObs def _rewardMoveTowardsGoal(self, distRate): if distRate <= 0: return self.moveAwayParam * (distRate / 0.0044) else: return self.moveTowardsParam * (distRate / 0.0044) def _rewardObstacleProximity(self): closestObstacle = min(self.lidarRanges) if closestObstacle <= self.safetyLimit: return self.obsProximityParam * -( 1 - (closestObstacle / self.safetyLimit)) else: return 0 def SaveAndQuit(self): self._write_file('eps.txt', self.currentEpisode) with open(self.path + 'P7_NoObstacles.txt', 'a+') as f: print('\nSave and Quit \n') f.write(repr(self.epInfo)) f.write('\n') f.close() print('\tEpisodes Saved') self.supervisor.worldReload() def _write_file(self, filename, intval): with open(self.path + filename, 'w') as fp: print(f'Episodes saved: {intval}') fp.write(str(intval)) fp.close() def _read_file(self, filename): with open(self.path + filename) as fp: return int(fp.read()) def _startEpisode(self): if self.start != 0: self._logEpisode() # Read episode number if self.currentEpisode == 0 and os.path.exists(self.path + 'eps.txt'): self.currentEpisode = self._read_file('eps.txt') else: self.currentEpisode += 1 print('\n\t\t\t\t EPISODE No. {} \n'.format(self.currentEpisode)) self.start = time.time() if self.needReset: # Reset object(s) / counters / rewards print('Resetting Robot...') self._resetObject(self.robot, self.robotResetLocation) self.counter = 0 self.totalreward = 0 self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 self.prevDist = 0 def _endEpisode(self, ending): self.epConc = ending prevMode = self.supervisor.simulationGetMode() self.supervisor.simulationSetMode(0) self.supervisor.exportImage( self.logDir + "screenshots/" + ending + "/" + str(self.currentEpisode) + ".jpg", 100) self.supervisor.simulationSetMode(prevMode) print(ending) def _getState(self): self.lidarRanges = self._trimLidarReadings( self.lidar.getRangeImage()) # Get LidarInfo #if self.logging: self._printLidarRanges() self.position = self.robot.getPosition( )[::2] # Get Position (x,y) and orientation self.orientation = self.orientationField.getSFRotation()[3] self.dist = self._getDist() # Get Eucledian distance from the goal self.direction = self._getDirection() def _printRewards(self, r, interval=1): if self.counter % interval == 0: print("\n\033[1mEpisode {}, timestep {}\033[0m".format( self.currentEpisode, self.counter)) for k, v in r.items(): print("\t" + str(k) + ":\t" + str(v)) def _printMax(self, reward): maxR = reward if self.prevMax < reward else self.prevMax self.prevMax = maxR print("max: {}".format(maxR)) def _printLidarRanges(self): lidarFormatted = [] for i in range(len(self.lidarRanges)): lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i])) print(lidarFormatted) def _getLogDirectory(self): loggingDir = "Logging/" latestFile = "" latestMod = 0 for file in os.listdir(loggingDir): fileModTime = os.path.getmtime(loggingDir + file) if fileModTime > latestMod: latestMod = fileModTime latestFile = file self.logDir = loggingDir + latestFile + "/" self.path = self.logDir + "log/" def _logEpisode(self): self.duration = time.time() - self.start self.epInfo.append([ round(self.duration, 3), round(self.totalreward, 3), self.epConc, self.counter, "Goal:", self.goal, "Start position:", self.startPosition, "Rewards:", self.moveTowardGoalTotalReward, self.obsProximityTotalPunish, self.faceObstacleTotalPunish ])