class Atlas(gym.Env): """ Y axis is the vertical axis. Base class for Webots actors in a Scene. These environments create single-player scenes and behave like normal Gym environments, if you don't use multiplayer. """ electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself joints_at_limit_cost = -0.2 # discourage stuck joints episode_reward = 0 frame = 0 _max_episode_steps = 1000 initial_y = None body_xyz = None joint_angles = None joint_exceed_limit = False ignore_frame = 1 def __init__(self, action_dim, obs_dim): self.robot = Supervisor() solid_def_names = self.read_all_def() self.def_node_field_list = self.get_all_fields(solid_def_names) self.robot_node = self.robot.getFromDef('Atlas') self.boom_base = self.robot.getFromDef('BoomBase') self.boom_base_trans_field = self.boom_base.getField("translation") self.timeStep = int(self.robot.getBasicTimeStep() * self.ignore_frame) # ms self.find_and_enable_devices() high = np.ones([action_dim]) self.action_space = gym.spaces.Box(-high, high, dtype=np.float32) high = np.inf*np.ones([obs_dim]) self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32) def read_all_def(self, file_name ='../../worlds/atlas_change_foot.wbt'): no_solid_str_list = ['HingeJoint', 'BallJoint', 'Hinge2Joint', 'Shape', 'Group', 'Physics'] with open(file_name) as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line content = [x.strip() for x in content] def_str_list = [] for x in content: if 'DEF' in x: def_str_list.append(x) for sub_str in no_solid_str_list: for i in range(len(def_str_list)): if sub_str in def_str_list[i]: def_str_list[i] = 'remove_def' solid_def_names = [] for def_str in def_str_list: if 'remove_def' != def_str: def_str_temp_list = def_str.split() solid_def_names.append(def_str_temp_list[def_str_temp_list.index('DEF') + 1]) print(solid_def_names) print('There are duplicates: ',len(solid_def_names) != len(set(solid_def_names))) return solid_def_names def get_all_fields(self, solid_def_names): def_node_field_list = [] for def_name in solid_def_names: def_node = self.robot.getFromDef(def_name) node_trans_field = def_node.getField("translation") node_rot_field = def_node.getField("rotation") # print(def_name) node_ini_trans = node_trans_field.getSFVec3f() node_ini_rot = node_rot_field.getSFRotation() def_node_field_list.append({'def_node': def_node, 'node_trans_field': node_trans_field, 'node_rot_field': node_rot_field, 'node_ini_trans': node_ini_trans, 'node_ini_rot': node_ini_rot}) return def_node_field_list def reset_all_fields(self): for def_node_field in self.def_node_field_list: def_node_field['node_trans_field'].setSFVec3f(def_node_field['node_ini_trans']) def_node_field['node_rot_field'].setSFRotation(def_node_field['node_ini_rot']) def find_and_enable_devices(self): # inertial unit self.inertial_unit = self.robot.getInertialUnit("inertial unit") self.inertial_unit.enable(self.timeStep) # gps self.gps = self.robot.getGPS("gps") self.gps.enable(self.timeStep) # foot sensors self.fsr = [self.robot.getTouchSensor("RFsr"), self.robot.getTouchSensor("LFsr")] for i in range(len(self.fsr)): self.fsr[i].enable(self.timeStep) # all motors # motor_names = [# 'HeadPitch', 'HeadYaw', # 'LLegUay', 'LLegLax', 'LLegKny', # 'LLegLhy', 'LLegMhx', 'LLegUhz', # 'RLegUay', 'RLegLax', 'RLegKny', # 'RLegLhy', 'RLegMhx', 'RLegUhz', # ] motor_names = [ # 'HeadPitch', 'HeadYaw', 'BackLbz', 'BackMby', 'BackUbx', 'NeckAy', 'LLegLax', 'LLegMhx', 'LLegUhz', 'RLegLax', 'RLegMhx', 'RLegUhz', ] self.motors = [] for i in range(len(motor_names)): self.motors.append(self.robot.getMotor(motor_names[i])) # leg pitch motors self.legPitchMotor = [self.robot.getMotor('RLegLhy'), self.robot.getMotor('RLegKny'), self.robot.getMotor('RLegUay'), self.robot.getMotor('LLegLhy'), self.robot.getMotor('LLegKny'), self.robot.getMotor('LLegUay')] for i in range(len(self.legPitchMotor)): self.legPitchMotor[i].enableTorqueFeedback(self.timeStep) # leg pitch sensors self.legPitchSensor =[self.robot.getPositionSensor('RLegLhyS'), self.robot.getPositionSensor('RLegKnyS'), self.robot.getPositionSensor('RLegUayS'), self.robot.getPositionSensor('LLegLhyS'), self.robot.getPositionSensor('LLegKnyS'), self.robot.getPositionSensor('LLegUayS')] for i in range(len(self.legPitchSensor)): self.legPitchSensor[i].enable(self.timeStep) def apply_action(self, a): assert (np.isfinite(a).all()) for n, j in enumerate(self.legPitchMotor): max_joint_angle = j.getMaxPosition() min_joint_angle = j.getMinPosition() mean_angle = 0.5 * (max_joint_angle + min_joint_angle) half_range_angle = 0.5 * (max_joint_angle - min_joint_angle) j.setPosition(mean_angle + half_range_angle * float(np.clip(a[n], -1, +1))) # joint_angle = self.read_joint_angle(joint_idx=n) # torque = 0.5 * j.getMaxTorque() * float(np.clip(a[n], -1, +1)) # if joint_angle > max_joint_angle: # j.setPosition(max_joint_angle - 0.1) # # j.setTorque(-1.0 * abs(torque)) # elif joint_angle < min_joint_angle: # j.setPosition(min_joint_angle + 0.1) # # j.setTorque(abs(torque)) # else: # j.setTorque(torque) def read_joint_angle(self, joint_idx): joint_angle = self.legPitchSensor[joint_idx].getValue() % (2.0 * np.pi) if joint_angle > np.pi: joint_angle -= 2.0 * np.pi max_joint_angle = self.legPitchMotor[joint_idx].getMaxPosition() min_joint_angle = self.legPitchMotor[joint_idx].getMinPosition() if joint_angle > max_joint_angle + 0.05 or joint_angle < min_joint_angle - 0.05: self.joint_exceed_limit = True return joint_angle def calc_state(self): joint_states = np.zeros(2*len(self.legPitchMotor)) # even elements [0::2] position, scaled to -1..+1 between limits for r in range(6): joint_angle = self.read_joint_angle(joint_idx=r) if r in [0, 3]: joint_states[2 * r] = (-joint_angle - np.deg2rad(35)) / np.deg2rad(80) elif r in [1, 4]: joint_states[2 * r] = 1 - joint_angle / np.deg2rad(75) elif r in [2, 5]: joint_states[2 * r] = -joint_angle / np.deg2rad(45) # odd elements [1::2] angular speed, scaled to show -1..+1 for r in range(6): if self.joint_angles is None: joint_states[2 * r + 1] = 0.0 else: joint_states[2 * r + 1] = 0.5 * (joint_states[2*r] - self.joint_angles[r]) self.joint_angles = np.copy(joint_states[0::2]) self.joint_speeds = joint_states[1::2] self.joints_at_limit = np.count_nonzero(np.abs(joint_states[0::2]) > 0.99) self.joint_torques = np.zeros(len(self.legPitchMotor)) for i in range(len(self.legPitchMotor)): self.joint_torques[i] = self.legPitchMotor[i].getTorqueFeedback() \ / self.legPitchMotor[i].getAvailableTorque() if self.body_xyz is None: self.body_xyz = np.asarray(self.gps.getValues()) self.body_speed = np.zeros(3) else: self.body_speed = (np.asarray(self.gps.getValues()) - self.body_xyz) / (self.timeStep * 1e-3) self.body_xyz = np.asarray(self.gps.getValues()) body_local_speed = np.copy(self.body_speed) body_local_speed[0], body_local_speed[2] = self.calc_local_speed() # print('speed: ', np.linalg.norm(self.body_speed)) y = self.body_xyz[1] if self.initial_y is None: self.initial_y = y self.body_rpy = self.inertial_unit.getRollPitchYaw() ''' The roll angle indicates the unit's rotation angle about its x-axis, in the interval [-π,π]. The roll angle is zero when the InertialUnit is horizontal, i.e., when its y-axis has the opposite direction of the gravity (WorldInfo defines the gravity vector). The pitch angle indicates the unit's rotation angle about is z-axis, in the interval [-π/2,π/2]. The pitch angle is zero when the InertialUnit is horizontal, i.e., when its y-axis has the opposite direction of the gravity. If the InertialUnit is placed on the Robot with a standard orientation, then the pitch angle is negative when the Robot is going down, and positive when the robot is going up. The yaw angle indicates the unit orientation, in the interval [-π,π], with respect to WorldInfo.northDirection. The yaw angle is zero when the InertialUnit's x-axis is aligned with the north direction, it is π/2 when the unit is heading east, and -π/2 when the unit is oriented towards the west. The yaw angle can be used as a compass. ''' more = np.array([ y - self.initial_y, 0, 0, 0.3 * body_local_speed[0], 0.3 * body_local_speed[1], 0.3 * body_local_speed[2], # 0.3 is just scaling typical speed into -1..+1, no physical sense here self.body_rpy[0] / np.pi, self.body_rpy[1] / np.pi], dtype=np.float32) self.feet_contact = np.zeros(2) for j in range(len(self.fsr)): self.feet_contact[j] = self.fsr[j].getValue() return np.clip(np.concatenate([more] + [joint_states] + [self.feet_contact]), -5, +5) def calc_local_speed(self): ''' # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), # all rewards have rew/frame units and close to 1.0 ''' direction_r = self.body_xyz - np.asarray(self.boom_base_trans_field.getSFVec3f()) # print('robot_xyz: {}, boom_base_xyz: {}'.format(self.body_xyz, # np.asarray(self.boom_base_trans_field.getSFVec3f()))) direction_r = direction_r[[0, 2]] / np.linalg.norm(direction_r[[0, 2]]) direction_t = np.dot(np.asarray([[0, 1], [-1, 0]]), direction_r.reshape((-1, 1))) return np.dot(self.body_speed[[0, 2]], direction_t), np.dot(self.body_speed[[0, 2]], direction_r) def alive_bonus(self, y, pitch): return +1 if abs(y) > 0.5 and abs(pitch) < 1.0 else -1 def render(self, mode='human'): file_name = 'img.jpg' self.robot.exportImage(file=file_name, quality=100) return cv2.imread(file_name, -1) def step(self, action): for i in range(self.ignore_frame): self.apply_action(action) simulation_state = self.robot.step(self.timeStep) state = self.calc_state() # also calculates self.joints_at_limit # state[0] is body height above ground, body_rpy[1] is pitch alive = float(self.alive_bonus(state[0] + self.initial_y, self.body_rpy[1])) progress, _ = self.calc_local_speed() # print('progress: {}'.format(progress)) feet_collision_cost = 0.0 ''' let's assume we have DC motor with controller, and reverse current braking ''' electricity_cost = self.electricity_cost * float(np.abs( self.joint_torques * self.joint_speeds).mean()) electricity_cost += self.stall_torque_cost * float(np.square(self.joint_torques).mean()) joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit) rewards = [ alive, progress, electricity_cost, joints_at_limit_cost, feet_collision_cost ] self.episode_reward += progress self.frame += 1 done = (-1 == simulation_state) or (self._max_episode_steps <= self.frame) \ or (alive < 0) or (not np.isfinite(state).all()) # print('frame: {}, alive: {}, done: {}, body_xyz: {}'.format(self.frame, alive, done, self.body_xyz)) # print('state_{} \n action_{}, reward_{}'.format(state, action, sum(rewards))) return state, sum(rewards), done, {} def run(self): # Main loop. for i in range(self._max_episode_steps): action = np.random.uniform(-1, 1, 6) state, reward, done, _ = self.step(action) # print('state_{} \n action_{}, reward_{}'.format(state, action, reward)) if done: break def reset(self, is_eval_only = False): self.initial_y = None self.body_xyz = None self.joint_angles = None self.frame = 0 self.episode_reward = 0 self.joint_exceed_limit = False for i in range(100): for j in self.motors: j.setPosition(0) for k in range(len(self.legPitchMotor)): j = self.legPitchMotor[k] j.setPosition(0) self.robot.step(self.timeStep) self.robot.simulationResetPhysics() self.reset_all_fields() for i in range(10): self.robot_node.moveViewpoint() self.robot.step(self.timeStep) if is_eval_only: time.sleep(1) return self.calc_state()
class P7RLEnv(gym.Env): metadata = {'render.modes': ['human']} motors = [] timeStep = 32 restrictedGoals = True boxEnv = False isFixedGoal = False fixedGoal = [3.5, 0] logging = True maxTimestep = 20000 robotResetLocation = [0, 0, 0] def __init__(self): # Initialize TurtleBot specifics self.maxAngSpeed = 1 # 2.84 max self.maxLinSpeed = 0.14 # 0.22 max # Initialize reward parameters: self.moveTowardsParam = 1 self.moveAwayParam = 1.1 self.safetyLimit = 0.5 self.obsProximityParam = 0.01 self.faceObstacleParam = 10 # Total reward counter for each component self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 # Initialize RL specific variables self.rewardInterval = 1 # How often do you want to get rewards self.epConc = '' # Conculsion of episode [Collision, Success, TimeOut] self.reward = 0 # Reward gained in the timestep self.totalreward = 0 # Reward gained throughout the episode self.counter = 0 # Timestep counter self.needReset = True # self.state = [] # State of the environment self.done = False # If the episode is done self.action = [0, 0] # Current action chosen self.prevAction = [0, 0] # Past action chosen self.goal = [] # Goal self.goals = [x / 100 for x in range(-450, 451, 150)] self.dist = 0 # Distance to the goal self.prevDist = 0 # Previous distance to the goal # Logging variables self.logDir = "" self.path = "" self.currentEpisode = 0 # Current episode number self.start = 0 # Timer for starting self.duration = 0 # Episode duration in seconds self.epInfo = [] # Logging info for the episode self.startPosition = [] self.prevMax = 0 # temporary variable # Initialize a supervisor self.supervisor = Supervisor() # Initialize robot self.robot = self.supervisor.getFromDef('TB') self.translationField = self.robot.getField('translation') self.orientationField = self.robot.getField('rotation') # Initialize lidar self.lidar = self.supervisor.getLidar('TurtleBotLidar') self.lidar.enable(self.timeStep) self.lidarRanges = [] # Initialize Motors self.motors.append(self.supervisor.getMotor("left wheel motor")) self.motors.append(self.supervisor.getMotor("right wheel motor")) self.motors[0].setPosition(float("inf")) self.motors[1].setPosition(float("inf")) self.motors[0].setVelocity(0.0) self.motors[1].setVelocity(0.0) self.direction = [] self.position = [] self.orientation = [] # Initialize Goal Object self.goalObject = self.supervisor.getFromDef('GOAL') self.goalTranslationField = self.goalObject.getField('translation') # Initialize action-space and observation-space self.action_space = spaces.Box( low=np.array([-self.maxLinSpeed, -self.maxAngSpeed]), high=np.array([self.maxLinSpeed, self.maxAngSpeed]), dtype=np.float32) self.observation_space = spaces.Box(low=-10, high=10, shape=(24, ), dtype=np.float16) def reset(self): if not self.logDir: self._getLogDirectory() self._startEpisode() # Reset evetything needs resetting self.supervisor.step(1) # Make it Happen self.goal = self._setGoal() # Set Goal self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]]) self._getState() self.startPosition = self.position[:] self.state = [ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges # Set State return np.asarray(self.state) def step(self, action): self.prevAction = self.action[:] # Set previous action self.action = action # Take action self._take_action(action) self.supervisor.step(1) self.counter += 1 # Get State(dist from obstacle + lidar) and convert to numpyarray self._getState() # Observe new state self.state = np.asarray([ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges) self.reward = self._calculateReward() # get Reward self.done, extraReward = self._isDone( ) # determine if state is Done, extra reward/punishment self.reward += extraReward self.totalreward += self.reward return [self.state, self.reward, self.done, {}] def _trimLidarReadings(self, lidar): lidarReadings = [] new_lidars = [x if x != 0 else 3.5 for x in lidar] # Replace 0's with 3.5 (max reading) for x in range(0, 360, 18): end = x + 18 lidarReadings.append(min( new_lidars[x:end])) # Take the minimum of lidar Readings return lidarReadings def _resetObject(self, object, coordinates): object.getField('translation').setSFVec3f(coordinates) object.getField('rotation').setSFRotation([0, 1, 0, 0]) object.resetPhysics() def _setGoal(self): if not self.isFixedGoal: while True: if self.restrictedGoals: gs = random.sample(self.goals, 1) gs.append(random.randrange(-45, 45) / 10) g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]] elif self.boxEnv: xGoal = random.randrange( -40, -25) / 10 if np.random.randint( 2) == 0 else random.randrange(25, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [yGoal, xGoal ] if np.random.randint(2) == 0 else [xGoal, yGoal] else: xGoal = random.randrange(-40, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [xGoal, yGoal] pos1 = self.robot.getPosition()[0] pos2 = self.robot.getPosition()[2] distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5 if distFromGoal >= 1: break else: g = self.fixedGoal print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1])) return g def _getDist(self): return ((self.goal[0] - self.position[0])**2 + (self.goal[1] - self.position[1])**2)**0.5 def _take_action(self, action): if self.logging: print( '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m' .format(action[0], action[1])) convertedVelocity = self.setVelocities(action[0], action[1]) self.motors[0].setVelocity(float(convertedVelocity[0])) self.motors[1].setVelocity(float(convertedVelocity[1])) def _isDone(self): if self.counter >= self.maxTimestep: # Check maximum timestep self.needReset = True self._endEpisode('timeout') return True, 0 minScan = min(list(filter(lambda a: a != 0, self.lidarRanges[:]))) # Check LiDar if minScan < 0.25: self.needReset = True self._endEpisode('collision') return True, -500 elif self.dist < 0.35: # Check Goal self.needReset = True if self.boxEnv else False self._endEpisode('success') return True, 500 else: return False, 0 def render(self, mode='human', close=False): pass def setVelocities(self, linearV, angularV): R = 0.033 # Wheel radius L = 0.138 # Wheelbase length vr = (2 * linearV + angularV * L) / (2 * R) vl = (2 * linearV - angularV * L) / (2 * R) #print('\nCommanded linear:\t{} angular:\t {}'.format(linearV, angularV)) #print('Calculated right wheel:\t{}, left wheel:\t {}'.format(vr, vl)) return [vl, vr] def _getDirection(self): # Get direction of goal from Robot robgoaly = self.goal[1] - self.position[1] robgoalx = self.goal[0] - self.position[0] goal_angle = math.atan2(robgoalx, robgoaly) # Get difference between goal direction and orientation heading = goal_angle - self.orientation if heading > pi: # Wrap around pi heading -= 2 * pi elif heading < -pi: heading += 2 * pi return heading def _calculateReward(self): action1 = np.round(self.action[0], 1) action2 = np.round(self.action[1], 1) if self.counter % self.rewardInterval != 0: return 0 # Check if it's Reward Time distRate = self.prevDist - self.dist if self.prevDist == 0: self.prevDist = self.dist return 0 moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate) self.moveTowardGoalTotalReward += moveTowardsGoalReward obsProximityPunish = self._rewardObstacleProximity() self.obsProximityTotalPunish += obsProximityPunish faceObstaclePunish = -self._rewardFacingObstacle() self.faceObstacleTotalPunish += faceObstaclePunish reward = moveTowardsGoalReward + obsProximityPunish + faceObstaclePunish #self._printMax(moveTowardsGoalReward) totalRewardDic = { "faceObstacleTotalPunish": self.faceObstacleTotalPunish, "obsProximityTotalPunish": self.obsProximityTotalPunish, "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward } rewardDic = { "faceObstaclePunish": faceObstaclePunish, "obsProximityPunish": obsProximityPunish, "moveTowardsGoalReward": moveTowardsGoalReward, "Total": reward, "\033[1mTotalReward\033[0m": self.totalreward } if self.logging: self._printRewards(rewardDic) self._printRewards(totalRewardDic) self.prevDist = self.dist return reward def _rewardFacingObstacle(self): rewardFaceObs = 0 # Check forward lidars if moving forward, backward lidars if moving backward lidarRange = [-2, 2] if self.action[0] >= 0 else [7, 11] for i in range(lidarRange[0], lidarRange[1]): scale = 0.15 if i in [-2, 1, 7, 10 ] else 0.35 #Side readings get less weight rewardFaceObs += scale * ( 1 - (self.lidarRanges[i] / self.safetyLimit) ) if self.lidarRanges[i] < self.safetyLimit else 0 return self.faceObstacleParam * rewardFaceObs def _rewardMoveTowardsGoal(self, distRate): if distRate <= 0: return self.moveAwayParam * (distRate / 0.0044) else: return self.moveTowardsParam * (distRate / 0.0044) def _rewardObstacleProximity(self): closestObstacle = min(self.lidarRanges) if closestObstacle <= self.safetyLimit: return self.obsProximityParam * -( 1 - (closestObstacle / self.safetyLimit)) else: return 0 def SaveAndQuit(self): self._write_file('eps.txt', self.currentEpisode) with open(self.path + 'P7_NoObstacles.txt', 'a+') as f: print('\nSave and Quit \n') f.write(repr(self.epInfo)) f.write('\n') f.close() print('\tEpisodes Saved') self.supervisor.worldReload() def _write_file(self, filename, intval): with open(self.path + filename, 'w') as fp: print(f'Episodes saved: {intval}') fp.write(str(intval)) fp.close() def _read_file(self, filename): with open(self.path + filename) as fp: return int(fp.read()) def _startEpisode(self): if self.start != 0: self._logEpisode() # Read episode number if self.currentEpisode == 0 and os.path.exists(self.path + 'eps.txt'): self.currentEpisode = self._read_file('eps.txt') else: self.currentEpisode += 1 print('\n\t\t\t\t EPISODE No. {} \n'.format(self.currentEpisode)) self.start = time.time() if self.needReset: # Reset object(s) / counters / rewards print('Resetting Robot...') self._resetObject(self.robot, self.robotResetLocation) self.counter = 0 self.totalreward = 0 self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 self.prevDist = 0 def _endEpisode(self, ending): self.epConc = ending prevMode = self.supervisor.simulationGetMode() self.supervisor.simulationSetMode(0) self.supervisor.exportImage( self.logDir + "screenshots/" + ending + "/" + str(self.currentEpisode) + ".jpg", 100) self.supervisor.simulationSetMode(prevMode) print(ending) def _getState(self): self.lidarRanges = self._trimLidarReadings( self.lidar.getRangeImage()) # Get LidarInfo #if self.logging: self._printLidarRanges() self.position = self.robot.getPosition( )[::2] # Get Position (x,y) and orientation self.orientation = self.orientationField.getSFRotation()[3] self.dist = self._getDist() # Get Eucledian distance from the goal self.direction = self._getDirection() def _printRewards(self, r, interval=1): if self.counter % interval == 0: print("\n\033[1mEpisode {}, timestep {}\033[0m".format( self.currentEpisode, self.counter)) for k, v in r.items(): print("\t" + str(k) + ":\t" + str(v)) def _printMax(self, reward): maxR = reward if self.prevMax < reward else self.prevMax self.prevMax = maxR print("max: {}".format(maxR)) def _printLidarRanges(self): lidarFormatted = [] for i in range(len(self.lidarRanges)): lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i])) print(lidarFormatted) def _getLogDirectory(self): loggingDir = "Logging/" latestFile = "" latestMod = 0 for file in os.listdir(loggingDir): fileModTime = os.path.getmtime(loggingDir + file) if fileModTime > latestMod: latestMod = fileModTime latestFile = file self.logDir = loggingDir + latestFile + "/" self.path = self.logDir + "log/" def _logEpisode(self): self.duration = time.time() - self.start self.epInfo.append([ round(self.duration, 3), round(self.totalreward, 3), self.epConc, self.counter, "Goal:", self.goal, "Start position:", self.startPosition, "Rewards:", self.moveTowardGoalTotalReward, self.obsProximityTotalPunish, self.faceObstacleTotalPunish ])