def _setup(self): # Setup environment frame = startCarlaSims() self.modelName = settings.MODEL_NAME sql = Sql() self.sessionId = sql.INSERT_newSession(self.modelName) self.frameNumber = Value(c_uint64, frame) # self.env = SubprocVecEnv([lambda i=i: gym.make('CarlaGym-v0', name=self.modelName, carlaInstance=i) for i in range(settings.CARLA_SIMS_NO)]) self.env = SubprocVecEnv([self.make_env(i) for i in range(settings.CARS_PER_SIM)]) # Decide which RL module and policy self.rlModule = getattr(sys.modules[__name__], settings.MODEL_RL_MODULE) self.policy = getattr(sys.modules[__name__], settings.MODEL_POLICY) self.modelNum = settings.MODEL_NUMBER if settings.MODEL_NUMBER is not None else 0
def importFromDataBase(): sql = Sql() for sessionList in sessions.values(): for name, sessionId in sessionList.items(): for eval in range(2): fileName = name + '.csv' if eval == 0 else name + '_eval.csv' with open('../../data/' + fileName, mode='w', newline='') as csvFile: dataWriter = csv.writer(csvFile, delimiter=',') fieldnames = ['instance', 'episode', 'reward'] dataWriter.writerow(fieldnames) data = sql.SELECT_trainingData(sessionId, eval) for dataRow in data: dataWriter.writerow(dataRow)
def __init__(self, carlaInstance=0, world_ticks=None, name="NoNameWasGiven", serverIndex=0): # Connect a client self.client = carla.Client(*settings.CARLA_SIMS[serverIndex][:2]) self.client.set_timeout(2.0) self.modelName = name self.world_ticks = world_ticks # Set necessary instance variables related to client self.world = self.client.get_world() self.blueprintLibrary = self.world.get_blueprint_library() self.carlaInstance = carlaInstance # Sensors and helper lists self.actorList = [] self.imgWidth = settings.CARLA_IMG_WIDTH self.imgHeight = settings.CARLA_IMG_HEIGHT self.episodeTicks = 0 self.totalTicks = 0 self.frameNumber = 0 # Video variables self.episodeNr = 0 # TODO WARNING: be careful using this as it also counts validation episodes self.sql = Sql() self.sessionId = self.sql.INSERT_newSession( self.modelName) if (self.carlaInstance == 0) else None # Early stopping variables self.grassLocation = None self.grassStuckTick = 0 # Declare variables for later use self.vehicle = None self.segSensor = None self.grassSensor = None self.splineSensor = None self.imgFrame = None self.wheelsOnGrass = None self.episodeStartTime = 0 self.episodeReward = None self.distanceOnSpline = None self.splineMaxDistance = None self.previousDistanceOnSpline = None self.queues = [] # List of tuples (queue, dataProcessingFunction) # Declare reward dependent values self.car_last_tick_pos = None self.car_last_tick_transform = None self.car_last_tick_wheels_on_road = None self.car_last_episode_time = None # Declare classes self.reward = Reward(self) self.mediaHandler = MediaHandler(self) # Defines image space as a box which can look at standard rgb images of size imgWidth by imgHeight imageSpace = Box(low=0, high=255, shape=(self.imgHeight, self.imgWidth, 3), dtype=np.uint8) # Defines observation and action spaces self.observation_space = imageSpace if settings.MODEL_ACTION_TYPE == ActionType.DISCRETE.value: self.action_space = Discrete(len(DISCRETE_ACTIONS)) elif settings.MODEL_ACTION_TYPE == ActionType.MULTI_DISCRETE.value: # 1) Throttle: Discrete 4 - [0]:0.0, [1]:0.3, [2]:0.6, [3]:1.0 # 2) Brake: Discrete 3 - [0]:0.0, [1]:0.5, [2]:1 # 3) Steer: Discrete 5 - [0]:-1.0, [1]:-0.5, [2]:0.0, [3]:0.5, [4]:1.0 self.action_space = MultiDiscrete([4, 3, 21]) self.throttleMapLen = float(self.action_space.nvec[0] - 1) self.brakeMapLen = float(self.action_space.nvec[1] - 1) self.steerMapLen = float(self.action_space.nvec[2] - 1) / 2 elif settings.MODEL_ACTION_TYPE == ActionType.BOX.value: # [Throttle, Steer, brake] self.action_space = Box(np.array([0, 0, -0.5]), np.array([+1, +1, +0.5]), dtype=np.float32) else: raise Exception("No such action type, change settings") if settings.AGENT_SYNCED: self.tick(30)
class CarlaEnv(gym.Env): """Sets up CARLA simulation and declares necessary instance variables""" def __init__(self, carlaInstance=0, world_ticks=None, name="NoNameWasGiven", serverIndex=0): # Connect a client self.client = carla.Client(*settings.CARLA_SIMS[serverIndex][:2]) self.client.set_timeout(2.0) self.modelName = name self.world_ticks = world_ticks # Set necessary instance variables related to client self.world = self.client.get_world() self.blueprintLibrary = self.world.get_blueprint_library() self.carlaInstance = carlaInstance # Sensors and helper lists self.actorList = [] self.imgWidth = settings.CARLA_IMG_WIDTH self.imgHeight = settings.CARLA_IMG_HEIGHT self.episodeTicks = 0 self.totalTicks = 0 self.frameNumber = 0 # Video variables self.episodeNr = 0 # TODO WARNING: be careful using this as it also counts validation episodes self.sql = Sql() self.sessionId = self.sql.INSERT_newSession( self.modelName) if (self.carlaInstance == 0) else None # Early stopping variables self.grassLocation = None self.grassStuckTick = 0 # Declare variables for later use self.vehicle = None self.segSensor = None self.grassSensor = None self.splineSensor = None self.imgFrame = None self.wheelsOnGrass = None self.episodeStartTime = 0 self.episodeReward = None self.distanceOnSpline = None self.splineMaxDistance = None self.previousDistanceOnSpline = None self.queues = [] # List of tuples (queue, dataProcessingFunction) # Declare reward dependent values self.car_last_tick_pos = None self.car_last_tick_transform = None self.car_last_tick_wheels_on_road = None self.car_last_episode_time = None # Declare classes self.reward = Reward(self) self.mediaHandler = MediaHandler(self) # Defines image space as a box which can look at standard rgb images of size imgWidth by imgHeight imageSpace = Box(low=0, high=255, shape=(self.imgHeight, self.imgWidth, 3), dtype=np.uint8) # Defines observation and action spaces self.observation_space = imageSpace if settings.MODEL_ACTION_TYPE == ActionType.DISCRETE.value: self.action_space = Discrete(len(DISCRETE_ACTIONS)) elif settings.MODEL_ACTION_TYPE == ActionType.MULTI_DISCRETE.value: # 1) Throttle: Discrete 4 - [0]:0.0, [1]:0.3, [2]:0.6, [3]:1.0 # 2) Brake: Discrete 3 - [0]:0.0, [1]:0.5, [2]:1 # 3) Steer: Discrete 5 - [0]:-1.0, [1]:-0.5, [2]:0.0, [3]:0.5, [4]:1.0 self.action_space = MultiDiscrete([4, 3, 21]) self.throttleMapLen = float(self.action_space.nvec[0] - 1) self.brakeMapLen = float(self.action_space.nvec[1] - 1) self.steerMapLen = float(self.action_space.nvec[2] - 1) / 2 elif settings.MODEL_ACTION_TYPE == ActionType.BOX.value: # [Throttle, Steer, brake] self.action_space = Box(np.array([0, 0, -0.5]), np.array([+1, +1, +0.5]), dtype=np.float32) else: raise Exception("No such action type, change settings") if settings.AGENT_SYNCED: self.tick(30) def close(self): self._resetActorList() ''':returns initial observation''' def reset(self): self.episodeNr += 1 # Count episodes TODO WARNING: be careful using this as it also counts validation episodes # Print episode and reward for that episode if self.carlaInstance == 0 and self.car_last_episode_time is not None: print( f"Episode: {self.episodeNr} - Reward: {self.episodeReward} \t - Time: {time.time() - self.car_last_episode_time}" ) # Frames are only added, if it's a video episode, so if there are frames it means that last episode # was a video episode, so we should export it, before we reset the frames list below if self.mediaHandler.episodeFrames: self.mediaHandler.exportAndUploadVideoToDB() # Reset actors, variables and rewards for next episode self._resetActorList() self._resetInstanceVariables() self.episodeReward = 0 # Create new actors and add to actor list self._createActors() # Workaround to start episode as quickly as possible self._applyActionDiscrete(Action.BRAKE.value) # Wait for camera to send first image self._waitForWorldToBeReady() # Set last tick variables to equal starting pos information self.car_last_tick_pos = self.vehicle.get_location() self.car_last_tick_transform = self.vehicle.get_transform() self.car_last_tick_wheels_on_road = 4 # Disengage brakes from earlier workaround self._applyActionDiscrete(Action.DO_NOTHING.value) return self.imgFrame # Returns initial observation (First image) ''':returns (obs, reward, done, extra)''' def step(self, action): self.episodeTicks += 1 # self.totalTicks += 1 # Do action if settings.MODEL_ACTION_TYPE == ActionType.DISCRETE.value: self._applyActionDiscrete(action) elif settings.MODEL_ACTION_TYPE == ActionType.MULTI_DISCRETE.value: self._applyActionMultiDiscrete(action) elif settings.MODEL_ACTION_TYPE == ActionType.BOX.value: self._applyActionBox(action) else: raise Exception("No such action type, change settings") if settings.AGENT_SYNCED: self.tick(60) is_done = self._isDone() # Must be calculated before rewards # Update reward reward = self.reward.calcReward() self.episodeReward += reward # if is_done and self.carlaInstance == 0 and self.mediaHandler.episodeFrames: # extra = {"episode": {"episodeNr": self.episodeNr, "frames": self.mediaHandler.episodeFrames}} # else: # extra = {} return self.imgFrame, reward, is_done, {} # extra def tick(self, timeout): self.world.tick() data = [ self._retrieve_data(queueTuple, timeout) for queueTuple in self.queues ] # assert all(x.frame == self.frameNumber.value for x in data) return data def tick_unsync(self, timeout): self.frameNumber.value = self.world.tick() data = [ self._retrieve_data(queueTuple, timeout) for queueTuple in self.queues ] # assert all(x.frame == self.frameNumber.value for x in data) return data def _makeQueue(self, registerEvent, processData): q = queue.Queue() registerEvent(q.put) self.queues.append((q, processData)) def _retrieve_data(self, queueTuple, timeout): while True: data = queueTuple[0].get(timeout=timeout) dataProcessFunction = queueTuple[1] if data.frame == self.frameNumber.value: dataProcessFunction(data) # Process data return data def _resetInstanceVariables(self): # Declare variables for later use self.vehicle = None self.segSensor = None self.grassSensor = None self.imgFrame = None self.wheelsOnGrass = None self.episodeTicks = 0 self.episodeReward = None self.queues = [] # Early stopping self.grassLocation = None self.grassStuckTick = 0 # Declare reward dependent values self.car_last_tick_pos = None self.car_last_tick_transform = None self.car_last_tick_wheels_on_road = None self.car_last_episode_time = time.time() self.previousDistanceOnSpline = None # Video self.mediaHandler.episodeFrames = [] def _createActors(self): # Spawn vehicle self.vehicle = self._createNewVehicle() #print(self.vehicle.attributes) #self.vehicle.attributes['color'] = '255,0,0' #print(self.vehicle.attributes) self.actorList.append( self.vehicle ) # Add to list of actors which makes it easy to clean up later # Make segmentation sensor blueprint self.segSensor = self._createSegmentationSensor() self.actorList.append(self.segSensor) # Create grass sensor self.grassSensor = self._createGrassSensor() self.actorList.append(self.grassSensor) self.splineSensor = self._createSplineSensor() self.actorList.append(self.splineSensor) # Destroy all previous actors, and clear actor list def _resetActorList(self): # Destroy all actors from previous episode for actor in self.actorList: actor.destroy() # Clear all actors from the list from previous episode self.actorList = [] # Waits until the world is ready for training def _waitForWorldToBeReady(self): self.tick_lock.acquire() while self._isWorldNotReady(): if settings.AGENT_SYNCED: self.tick_unsync(60) self.tick_lock.release() self.tick(30) # Returns true if the world is not yet ready for training def _isWorldNotReady(self): # print(self.wheelsOnGrass) return self.imgFrame is None or self.wheelsOnGrass != 0 # Creates a new vehicle and spawns it into the world as an actor # Returns the vehicle def _createNewVehicle(self): vehicle_blueprint = self.blueprintLibrary.filter('test')[0] color = random.choice( vehicle_blueprint.get_attribute('color').recommended_values) vehicle_blueprint.set_attribute('color', '0,255,0') vehicle_spawn_transforms = self.world.get_map().get_spawn_points() if settings.USE_RANDOM_SPAWN_POINTS: vehicle_spawn_transform = random.choice( vehicle_spawn_transforms) # Pick a random spawn point else: vehicle_spawn_transform = vehicle_spawn_transforms[ 0] # Use the first spawn point return self.world.spawn_actor(vehicle_blueprint, vehicle_spawn_transform) # Spawn vehicle # Creates a new segmentation sensor and spawns it into the world as an actor # Returns the sensor def _createSegmentationSensor(self): # Make segmentation sensor blueprint seg_sensor_blueprint = self.blueprintLibrary.find( 'sensor.camera.modified_semantic_segmentation') seg_sensor_blueprint.set_attribute('image_size_x', str(self.imgWidth)) seg_sensor_blueprint.set_attribute('image_size_y', str(self.imgHeight)) seg_sensor_blueprint.set_attribute('fov', '110') relative_transform_sensor = carla.Transform( carla.Location(x=3, z=3), carla.Rotation(pitch=-45)) # Place sensor on the front of car # Spawn semantic segmentation sensor, start listening for data and add to actorList seg_sensor = self.world.spawn_actor(seg_sensor_blueprint, relative_transform_sensor, attach_to=self.vehicle) self._makeQueue(seg_sensor.listen, processData=self.mediaHandler.processImage) # seg_sensor.listen(self.mediaHandler.processImage) return seg_sensor # Creates a new grass sensor and spawns it into the world as an actor # Returns the sensor def _createGrassSensor(self): # Sensor blueprint grass_blueprint = self.blueprintLibrary.find( 'sensor.other.safe_distance') grass_blueprint.set_attribute('safe_distance_z_height', '60') grass_blueprint.set_attribute('safe_distance_z_origin', '10') # Grass sensor actor grass_sensor = self.world.spawn_actor(grass_blueprint, carla.Transform(), attach_to=self.vehicle) self._makeQueue(grass_sensor.listen, processData=self._grass_data) # grass_sensor.listen(self._grass_data) # Return created actor return grass_sensor # Creates a new spline distance sensor and spawns it into the world as an actor # Returns the sensor def _createSplineSensor(self): # Sensor blueprint spline_blueprint = self.blueprintLibrary.find( 'sensor.other.spline_distance') # Grass sensor actor spline_sensor = self.world.spawn_actor(spline_blueprint, carla.Transform(), attach_to=self.vehicle) self._makeQueue(spline_sensor.listen, processData=self._spline_data) # Return created actor return spline_sensor # Applies a discrete action to the vehicle def _applyActionDiscrete(self, action): # If action does something, apply action self.vehicle.apply_control( carla.VehicleControl(throttle=DISCRETE_ACTIONS[Action(action)][0], brake=DISCRETE_ACTIONS[Action(action)][1], steer=DISCRETE_ACTIONS[Action(action)][2])) def _applyActionMultiDiscrete(self, action): # If action does something, apply action self.vehicle.apply_control( carla.VehicleControl(throttle=action[0] / self.throttleMapLen, brake=action[1] / self.brakeMapLen, steer=(action[2] / self.steerMapLen) - 1)) # Applies a box action to the vehicle def _applyActionBox(self, action): self.vehicle.apply_control( carla.VehicleControl( throttle=float(action[0]), brake=float(action[1]), steer=float(action[2]), )) # Returns the amount of meters traveled since last tick # and updated last pos to current pos def metersTraveledSinceLastTick(self): # Calculate meters driven last = self.car_last_tick_pos current = self.vehicle.get_location() x_diff = current.x - last.x y_diff = current.y - last.y z_diff = current.z - last.z distance_traveled = math.sqrt(x_diff**2 + y_diff**2 + z_diff**2) # Return distance traveled in meters return distance_traveled # Returns the amount of wheels on the road def wheelsOnRoad(self): return 4 - self.wheelsOnGrass # Returns the cars current velocity in km/h def getCarVelocity(self): if self.vehicle is None or not self.vehicle.is_alive: return 0 vel_vec = self.vehicle.get_velocity() # The velocity vector mps = math.sqrt(vel_vec.x**2 + vel_vec.y**2 + vel_vec.z**2) # Meter pr. second kph = mps * 3.6 # Speed in km/h (From m/s) # Km pr hour return kph def getDistanceMovedAlongSpline(self): distanceAlongSpline = self.distanceOnSpline - self.previousDistanceOnSpline if self.previousDistanceOnSpline is not None else 0 if distanceAlongSpline < -0.8 * self.splineMaxDistance: # If the car has completed an entire loop the distance moved will be a negative number close to the max spline distance distanceAlongSpline = self.splineMaxDistance - self.previousDistanceOnSpline + self.distanceOnSpline # Should instead be the distance to the finish line + the distance past the finish line elif distanceAlongSpline > 0.8 * self.splineMaxDistance: # If the car somehow reverses by the finish line it will have moved a distance close to max spline distance distanceAlongSpline = -( self.previousDistanceOnSpline + (self.splineMaxDistance - self.distanceOnSpline) ) # Should instead be the negative distance that the vehicle moved backwards elif abs(distanceAlongSpline) > 1000: distanceAlongSpline = 0 return distanceAlongSpline / 100 # Returns true if the current episode should be stopped def _isDone(self): # If episode length is exceeded it is done episode_expired = self._isEpisodeExpired() is_stuck_on_grass = self._isStuckOnGrass() car_on_grass = self._isCarOnGrass() max_negative_reward = self._isMaxNegativeRewardAccumulated() return episode_expired #or is_stuck_on_grass # or car_on_grass or max_negative_reward # Returns true if the current max episode time has elapsed def _isEpisodeExpired(self): if settings.CARLA_SECONDS_MODE_LINEAR: scale = min((self.episodeNr / settings.CARLA_SECONDS_PER_EPISODE_EPISODE_RANGE), 1) # Calculate scale depending on episode nr range_diff = settings.CARLA_SECONDS_PER_EPISODE_LINEAR_MAX - settings.CARLA_SECONDS_PER_EPISODE_LINEAR_MIN # Calculate min / max difference total_seconds = settings.CARLA_SECONDS_PER_EPISODE_LINEAR_MIN + ( range_diff * scale) # Calculate current total episodes # if self.carlaInstance == 0: # print(str(total_seconds)) total_max_episode_ticks = int( total_seconds * (1 / settings.AGENT_TIME_STEP_SIZE) ) # Calculate new total_max_episode_ticks else: total_max_episode_ticks = settings.CARLA_TICKS_PER_EPISODE_STATIC return self.episodeTicks > total_max_episode_ticks # Returns true if all four wheels are not on the road def _isCarOnGrass(self): return self.wheelsOnGrass == 4 # Returns true if the maximum negative reward has been accumulated def _isMaxNegativeRewardAccumulated(self): return self.episodeReward < -500 def _isStuckOnGrass(self): if self.wheelsOnGrass == 4 and self.metersTraveledSinceLastTick( ) < 0.5: self.grassStuckTick += 1 return self.grassStuckTick > 5 else: self.grassStuckTick = 0 return False '''Each time step, model predicts and steps an action, after which render is called''' def render(self, mode='human'): pass def _grass_data(self, event): self.wheelsOnGrass = event[0] + event[1] + event[2] + event[3] # print(f"({event[0]},{event[1]},{event[2]},{event[3]})") def _spline_data(self, event): self.distanceOnSpline = event[0] self.splineMaxDistance = event[1]
def __init__(self, runner): self.runner = runner self.nEpisodes = 0 self.prev_episode = 0 self.maxRewardAchieved = float('-inf') self.sql = Sql()
class Callback: def __init__(self, runner): self.runner = runner self.nEpisodes = 0 self.prev_episode = 0 self.maxRewardAchieved = float('-inf') self.sql = Sql() def callback(self, runner_locals, _locals): self.nEpisodes += 1 self._updateLearningRate(_locals) self._updateClipRange(_locals) self._updatePollingRate(_locals) self._storeTensorBoardData(_locals) self._exportBestModel(runner_locals, _locals) self._exportGpsData(_locals) self._printCallbackStats(_locals) self._exportRewardsToDB(self._getAllCarRewards()) if self.nEpisodes % settings.CARLA_EVALUATION_RATE == 0: self._testVehicles(_locals, runner_locals) return True def _exportRewardsToDB(self, rewards, evaluation=False): sessionId = self.runner.sessionId for instance in range(len(rewards)): self.sql.INSERT_newEpisode(sessionId, instance, self._getEpisodeCount(), rewards[instance], evaluation=evaluation) def _getEpisodeCount(self): return len(self.runner.env.get_attr('episode_rewards', 0)[0]) def _getAllCarRewards(self): all_rewards = self.runner.env.get_attr( 'episode_rewards', [i for i in range(settings.CARS_PER_SIM)]) values = [array[-1] for array in all_rewards] return values def _getAllCarlaEnvironmentGpsDatas(self): return self.runner.env.env_method( 'get_location', indices=[i for i in range(settings.CARS_PER_SIM)]) def _maxCarGpsData(self, gps_data): return gps_data[np.argmax(self._getAllCarRewards())] def _minCarGpsData(self, gps_data): return gps_data[np.argmin(self._getAllCarRewards())] def _medianCarGpsData(self, gps_data): rewards = self._getAllCarRewards() median_index = np.argsort(rewards)[len(rewards) // 2] return gps_data[median_index] def _exportGpsData(self, _locals): gps_data = self._getAllCarlaEnvironmentGpsDatas() self._exportGpsDataForEnvironment(self._maxCarGpsData(gps_data), "max") self._exportGpsDataForEnvironment(self._minCarGpsData(gps_data), "min") self._exportGpsDataForEnvironment(self._medianCarGpsData(gps_data), "median") def _exportGpsDataForEnvironment(self, gps_data, name_prefix=""): # Export the image image_dir = f"GpsData/{self.runner.modelName}" if not os.path.exists(image_dir): os.makedirs(image_dir) with open( f"{image_dir}/gps_{name_prefix}_{self._getEpisodeCount()}.txt", "w+") as file: for data in gps_data: file.write(f"{data[0]};{data[1]}\n") def getReferenceImagePath(self, name): return f"{self._getGpsReferenceBaseFolder(name)}/map.png" def getReferenceCoordinates(self, name): reference_points_path = f"{self._getGpsReferenceBaseFolder(name)}/ref.txt" file = open(reference_points_path, "r") ref_data = [] for line in file: data_split = line.split(";") data = np.array([float(data_split[i]) for i in range(4)]).reshape( (2, 2)) ref_data.append(data) return tuple(ref_data) def _getGpsReferenceBaseFolder(self, name): return f"GpsData/A_referenceData/{name}" def _exportBestModel(self, runner_locals, _locals): # info = _locals["ep_infos"] # print(f"{self.nSteps}: {info}") mean = np.sum(np.asarray(runner_locals['mb_rewards'])) if self.maxRewardAchieved < mean: self.maxRewardAchieved = mean if self.nEpisodes > 10: print( f"Saving best model: step {self.nEpisodes} reward: {mean}") _locals['self'].save( f"TrainingLogs/BaselineAgentLogs/{self.runner.modelName}_{self.runner.modelNum}_best" ) def _storeTensorBoardData(self, _locals): n_episodes = self._getEpisodeCount() if n_episodes > self.prev_episode: self.prev_episode = n_episodes values = self._getAllCarRewards() median = np.median(values) summary = tf.Summary(value=[ tf.Summary.Value(tag='episodeRewardMedian', simple_value=median) ]) _locals['writer'].add_summary(summary, n_episodes) max = np.max(values) summary = tf.Summary(value=[ tf.Summary.Value(tag='episodeRewardMax', simple_value=max) ]) _locals['writer'].add_summary(summary, n_episodes) mean = np.mean(values) summary = tf.Summary(value=[ tf.Summary.Value(tag='episodeRewardMean', simple_value=mean) ]) _locals['writer'].add_summary(summary, n_episodes) def _updateLearningRate(self, _locals): new_learning_rate = self._calcluateNewLearningRate_Exponential() _locals['self'].learning_rate = lambda frac: new_learning_rate def _updatePollingRate(self, _locals): new_polling_rate = self._calculateNewPollingRate_Linear() _locals['self'].polling_rate = lambda: new_polling_rate def _updateClipRange(self, _locals): new_clip_range = self._calculateNewClipRange_Linear() _locals['self'].cliprange = lambda frac: new_clip_range # _locals['self'].cliprange_vf = lambda frac: new_clip_range # _locals.update({'cliprange_vf': lambda frac: new_clip_range}) def _calculateNewClipRange_Linear(self): scale = self._getEpisodeScaleTowardsZero() clip_diff = settings.MODEL_CLIP_RANGE - settings.MODEL_CLIP_RANGE_MIN newClip = max(settings.MODEL_CLIP_RANGE_MIN + (clip_diff * scale), 0) return newClip def _calculateNewPollingRate_Linear(self): scale = self._getEpisodeScaleTowardsZero() poll_diff = settings.TRANSFER_POLLING_RATE_START - settings.TRANSFER_POLLING_RATE_MIN newPoll = max(settings.TRANSFER_POLLING_RATE_MIN + (poll_diff * scale), 0) return newPoll def _calcluateNewLearningRate_Exponential(self): n_episodes = self._getEpisodeCount() newLearningRate = settings.MODEL_LEARNING_RATE * ( 1 / (1 + (settings.MODEL_LEARNING_RATE * 20) * n_episodes)) return max(newLearningRate, settings.MODEL_LEARNING_RATE_MIN) def _calculateNewLearningRate_Linear(self): scale = self._getEpisodeScaleTowardsZero( ) # Calculate scale depending on max episode progress new_learning_rate = max(settings.MODEL_LEARNING_RATE * scale, 0) # Calculate new learning rate return max( new_learning_rate, settings.MODEL_LEARNING_RATE_MIN ) # Return the learning rate, while respecting minimum learning rate def _getEpisodeScaleTowardsZero(self): n_episodes = self._getEpisodeCount() return max(1 - (n_episodes / settings.MODEL_MAX_EPISODES), 0) def _printCallbackStats(self, _locals): # Print stats every 100 calls if self.nEpisodes % settings.MODEL_EXPORT_RATE == 0: print(f"Saving new model: step {self.nEpisodes}") _locals['self'].save( f"TrainingLogs/BaselineAgentLogs/{self.runner.modelName}_{self.runner.modelNum}" ) self.runner.modelNum += 1 def _testVehicles(self, _locals, runner_locals): print("Evaluating vehicles...") # Evaluate agent in environment #director = self.runner._getModelImitation() # TODO: If we want to evaluate on 'alt' maps, load them here!!! old_map = settings.CARLA_SIMS[0][2] load_new_map = old_map in settings.CARLA_EVALUATION_MAPS if load_new_map: self.runner.env.env_method( 'prepare_for_world_change', indices=[i for i in range(settings.CARS_PER_SIM)]) changeMap(settings.CARLA_EVALUATION_MAPS[old_map]) self.runner.env.env_method( 'reset_actors', indices=[i for i in range(settings.CARS_PER_SIM)]) # obs = self.runner.env.reset() # # state = None # # When using VecEnv, done is a vector # done = [False for _ in range(self.runner.env.num_envs)] # rewards_accum = np.zeros(settings.CARS_PER_SIM) # for _ in range(settings.CARLA_TICKS_PER_EPISODE_STATIC): # # We need to pass the previous state and a mask for recurrent policies # # to reset lstm state when a new episode begin # action, state = self.runner.model.predict(obs, state=state, mask=done, deterministic=False) # obs, rewards, done, _ = self.runner.env.step(action) # rewards_accum += rewards rewards_accum = np.zeros(settings.CARS_PER_SIM) state = np.zeros((21, 512)) done = [False for _ in range(settings.CARS_PER_SIM)] observations = self.runner.env.reset() obs = observations[0] # Play some limited number of steps for i in range(settings.CARLA_TICKS_PER_EPISODE_STATIC): #action_prob = self.runner.model.proba_step(obs, state, done) #director_action_prob = director.proba_step(obs, state, done) action, value, state, neglog = self.runner.model.step( obs, state=state, mask=done, deterministic=False) # with open("evaluation_approx_kl", 'a+') as file: # approx_kl = 0 # for i in range(len(action_prob)): # np_director_action_prob = np.concatenate(director_action_prob[i]).ravel() # np_action_prob = np.concatenate(action_prob[i]).ravel() # approx_kl += np.mean(np.square(np.subtract(np_director_action_prob, np_action_prob))) # approx_kl /= len(action_prob) # file.write(f"{approx_kl}\n") observations, rewards, done, info = self.runner.env.step(action) obs = observations[0] rewards_accum += rewards if load_new_map: self.runner.env.env_method( 'prepare_for_world_change', indices=[i for i in range(settings.CARS_PER_SIM)]) changeMap(old_map) self.runner.env.env_method( 'reset_actors', indices=[i for i in range(settings.CARS_PER_SIM)]) self.runner.env.reset() self._exportRewardsToDB(rewards_accum, True) mean = rewards_accum.mean() print(f"Done evaluating: {mean}") summary = tf.Summary( value=[tf.Summary.Value(tag='EvaluationMean', simple_value=mean)]) _locals['writer'].add_summary(summary, self.nEpisodes)