def _update(self): temp_moving_obstacles = [self.main_vessel] for vessel in self.moving_distances: if len(vessel.reachable_vessels): temp_moving_obstacles.append(vessel) self.moving_obstacles = temp_moving_obstacles while len(self.moving_obstacles) < MAX_VESSELS: new_vessel_count = self._vessel_count + 1 vessel = Vessel(self.config, width=self.config["vessel_width"], index=new_vessel_count, vessel_pos=self.main_vessel.position) self.rewarder_dict[vessel.index] = ColavRewarder(vessel) self.moving_obstacles.append(vessel) print(f'vessel {i} has been created') self._vessel_count += 1 for vessel in self.moving_obstacles: other_vessels = [ x for x in self.moving_obstacles if x.index != vessel.index ] vessel.obstacles = np.hstack( [self.static_obstacles, other_vessels]) self.moving_obstacles.sort(key=lambda x: x.index) return self.moving_obstacles
def _generate(self): waypoints = np.vstack([[250, 100], [250, 300]]).T self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle]), width=self.config["vessel_width"]) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.obstacles = [] self.vessel_obstacles = [] for vessel_idx in range(5): other_vessel_trajectory = [] trajectory_shift = self.rng.rand()() * 2 * np.pi trajectory_radius = self.rng.rand()() * 40 + 30 trajectory_speed = self.rng.rand()() * 0.003 + 0.003 for i in range(10000): #other_vessel_trajectory.append((10*i, (250, 400-10*i))) other_vessel_trajectory.append( (1 * i, (250 + trajectory_radius * np.cos(trajectory_speed * i + trajectory_shift), 150 + 70 * vessel_idx + trajectory_radius * np.sin(trajectory_speed * i + trajectory_shift)))) other_vessel_obstacle = VesselObstacle( width=6, trajectory=other_vessel_trajectory) self.obstacles.append(other_vessel_obstacle) self.vessel_obstacles.append(other_vessel_obstacle) for vessel_idx in range(5): other_vessel_trajectory = [] trajectory_start = self.rng.rand()() * 200 + 150 trajectory_speed = self.rng.rand()() * 0.03 + 0.03 trajectory_shift = 10 * self.rng.rand()() for i in range(10000): other_vessel_trajectory.append( (i, (245 + 2.5 * vessel_idx + trajectory_shift, trajectory_start - 10 * trajectory_speed * i))) other_vessel_obstacle = VesselObstacle( width=6, trajectory=other_vessel_trajectory) self.obstacles.append(other_vessel_obstacle) self.vessel_obstacles.append(other_vessel_obstacle) if self.render_mode == '3d': self.all_terrain = np.load(TERRAIN_DATA_PATH)[1950:2450, 5320:5820] / 7.5 #terrain = np.zeros((500, 500), dtype=float) # for x in range(10, 40): # for y in range(10, 40): # z = 0.5*np.sqrt(max(0, 15**2 - (25.0-x)**2 - (25.0-y)**2)) # terrain[x][y] = z self._viewer3d.create_world(self.all_terrain, 0, 0, 500, 500)
def _generate(self): waypoints = np.vstack([[0, 0], [0, 500]]).T self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog vessel_pos = self.vessel.position trajectory_shift = -50 * deg2rad #random.uniform(-5*deg2rad, 5*deg2rad) #2*np.pi*(rng.rand() - 0.5) trajectory_radius = 200 trajectory_speed = 0.5 start_angle = 70 * deg2rad start_x = vessel_pos[0] + trajectory_radius * np.sin(start_angle) start_y = vessel_pos[1] + trajectory_radius * np.cos(start_angle) # vessel_trajectory = [[0, (vessel_pos[1], trajectory_radius+vessel_pos[0])]] # in front, ahead vessel_trajectory = [[0, (start_x, start_y)]] for i in range(1, 5000): vessel_trajectory.append( (1 * i, (start_x + trajectory_speed * np.sin(trajectory_shift) * i, start_y + trajectory_speed * np.cos(trajectory_shift) * i))) self.obstacles = [ VesselObstacle(width=30, trajectory=vessel_trajectory) ] self._update()
def _generate(self): init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.all_obstacles = [] self.obstacles = [] for obstacle_perimeter in self.obstacle_perimeters: if len(obstacle_perimeter) > 3: obstacle = PolygonObstacle(obstacle_perimeter) if obstacle.boundary.is_valid: self.all_obstacles.append(obstacle) for vessel_width, vessel_trajectory, vessel_name in self.other_vessels: # for k in range(0, len(vessel_trajectory)-1): # vessel_obstacle = VesselObstacle(width=int(vessel_width), trajectory=vessel_trajectory[k:]) # self.all_obstacles.append(vessel_obstacle) if len(vessel_trajectory) > 2: vessel_obstacle = VesselObstacle(width=int(vessel_width), trajectory=vessel_trajectory, name=vessel_name) self.all_obstacles.append(vessel_obstacle) self._update()
def _generate(self): print('In GENERATE in MA') self.main_vessel = Vessel(self.config, width=self.config["vessel_width"]) self.rewarder_dict[self.main_vessel.index] = ColavRewarder( self.main_vessel) self.rewarder = self.rewarder_dict[self.main_vessel.index] prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog print(f'Ownvessel created!') self.moving_obstacles = [self.main_vessel] self.static_obstacles = [] self._vessel_count = 1 #Adding moving obstacles (vessels) curr_vessel_count = self._vessel_count for i in range(curr_vessel_count, curr_vessel_count + MAX_VESSELS - 1): vessel = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) self.rewarder_dict[vessel.index] = ColavRewarder(vessel) self.moving_obstacles.append(vessel) print(f'vessel {i} has been created') self._vessel_count += 1 for vessel in self.moving_obstacles: other_vessels = [ x for x in self.moving_obstacles if x.index != vessel.index ] vessel.obstacles = np.hstack( [self.static_obstacles, other_vessels]) #Adding static obstacles for _ in range(8): obstacle = CircularObstacle(*helpers.generate_obstacle( self.rng, self.path, self.vessel, displacement_dist_std=500)) self.static_obstacles.append(obstacle) print('Exiting GENERATE in MA')
def _generate(self): print('Generating') self.obstacle_perimeters = None self.all_terrain = np.load(TERRAIN_DATA_PATH) / 7.5 path_length = 1.2 * (100 + self.rng.randint(400)) while 1: x0 = self.rng.randint(1000, self.all_terrain.shape[0] - 1000) y0 = self.rng.randint(1000, self.all_terrain.shape[1] - 1000) dir = self.rng.rand() * 2 * np.pi waypoints = [[x0, x0 + path_length * np.cos(dir)], [y0, y0 + path_length * np.sin(dir)]] close_proximity = self.all_terrain[x0 - 50:x0 + 50, y0 - 50:y0 + 50] path_center = [ x0 + path_length / 2 * np.cos(dir), y0 + path_length / 2 * np.sin(dir) ] path_end = [ x0 + path_length * np.cos(dir), y0 + path_length * np.sin(dir) ] proximity = self.all_terrain[x0 - 250:x0 + 250, y0 - 250:y0 + 250] if proximity.max() > 0 and close_proximity.max() == 0: break self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) self.rewarder = ColregRewarder(self.vessel, test_mode=True) self._rewarder_class = ColregRewarder prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.obstacles, self.all_obstacles = [], [] for i in range(1): trajectory_speed = 0.4 + 0.2 * self.rng.rand() start_x = path_end[0] start_y = path_end[1] vessel_trajectory = [[0, (start_x, start_y)]] for t in range(1, 10000): vessel_trajectory.append( (1 * t, (start_x - trajectory_speed * np.cos(dir) * t, start_y - trajectory_speed * np.sin(dir) * t))) vessel_obstacle = VesselObstacle(width=10, trajectory=vessel_trajectory) self.obstacles.append(vessel_obstacle) self.all_obstacles.append(vessel_obstacle) print('Updating') self._update(force=True)
def _generate(self): # Initializing path nwaypoints = int(np.floor(4 * self.rng.rand() + 2)) self.path = RandomCurveThroughOrigin(self.rng, nwaypoints, length=800) # Initializing vessel init_state = self.path(0) init_angle = self.path.get_direction(0) init_state[0] += 50 * (self.rng.rand() - 0.5) init_state[1] += 50 * (self.rng.rand() - 0.5) init_angle = geom.princip(init_angle + 2 * np.pi * (self.rng.rand() - 0.5)) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle]), width=self.config["vessel_width"]) prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.obstacles = [] # Adding moving obstacles for _ in range(self._n_moving_obst): other_vessel_trajectory = [] obst_position, obst_radius = helpers.generate_obstacle( self.rng, self.path, self.vessel, obst_radius_mean=10, displacement_dist_std=500) obst_direction = self.rng.rand() * 2 * np.pi obst_speed = np.random.choice(vessel_speed_vals, p=vessel_speed_density) for i in range(10000): other_vessel_trajectory.append( (i, (obst_position[0] + i * obst_speed * np.cos(obst_direction), obst_position[1] + i * obst_speed * np.sin(obst_direction)))) other_vessel_obstacle = VesselObstacle( width=obst_radius, trajectory=other_vessel_trajectory) self.obstacles.append(other_vessel_obstacle) # Adding static obstacles for _ in range(self._n_static_obst): obstacle = CircularObstacle(*helpers.generate_obstacle( self.rng, self.path, self.vessel, displacement_dist_std=250)) self.obstacles.append(obstacle) # Resetting rewarder instance self.rewarder = self._rewarder_class(self.vessel, self.test_mode) self._update()
def _generate(self): print('In GENERATE in MA') self.main_vessel = Vessel(self.config, width=self.config["vessel_width"]) prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.rewarder = MultiRewarder(self.main_vessel) print(f'Ownship created!') self.moving_obstacles = [self.main_vessel] self.static_obstacles = [] self.queued_vessels = [] #Adding static obstacles #for _ in range(8): # obstacle = CircularObstacle(*helpers.generate_obstacle(self.rng, self.main_vessel.path, self.main_vessel)) # self.static_obstacles.append(obstacle) self._vessel_count = 1 #Adding moving obstacles (ships) curr_vessel_count = self._vessel_count for i in range(curr_vessel_count, curr_vessel_count + 5): #obst_speed = np.random.random() ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) self.moving_obstacles.append(ship) print(f'Ship {i} has been created') self._vessel_count += 1 for ship in self.moving_obstacles: other_ships = [ x for x in self.moving_obstacles if x.index != ship.index ] #ship.obstacles.extend(other_ships) ship.obstacles = np.hstack([self.static_obstacles, other_ships]) print('Exiting GENERATE in MA')
def _generate(self): waypoints = np.vstack([[25, 10], [25, 200]]).T self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle]), width=self.config["vessel_width"]) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog if self.render_mode == '3d': self.all_terrain = np.zeros((50, 50), dtype=float) self._viewer3d.create_world(self.all_terrain, 0, 0, 50, 50)
def _generate(self): self.path = Path([[0, 1100], [0, 1100]]) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog obst_arclength = 30 for o in range(20): obst_radius = 10 + 10 * o**1.5 obst_arclength += obst_radius * 2 + 30 obst_position = self.path(obst_arclength) self.obstacles.append(CircularObstacle(obst_position, obst_radius))
def _generate(self): waypoint_array = [] for t in range(500): x = t * np.cos(t / 100) y = 2 * t waypoint_array.append([x, y]) waypoints = np.vstack(waypoint_array).T self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog obst_arclength = 30 obst_radius = 5 while True: obst_arclength += 2 * obst_radius if (obst_arclength >= self.path.length): break obst_displacement_dist = 140 - 120 / ( 1 + np.exp(-0.005 * obst_arclength)) obst_position = self.path(obst_arclength) obst_displacement_angle = self.path.get_direction( obst_arclength) - np.pi / 2 obst_displacement = obst_displacement_dist * np.array([ np.cos(obst_displacement_angle), np.sin(obst_displacement_angle) ]) self.obstacles.append( CircularObstacle(obst_position + obst_displacement, obst_radius)) self.obstacles.append( CircularObstacle(obst_position - obst_displacement, obst_radius))
def _generate(self): waypoints = np.vstack([[0, 0], [0, 500]]).T self.path = Path(waypoints) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog N_obst = 20 N_dist = 100 for n in range(N_obst + 1): obst_radius = 25 angle = np.pi/4 + n/N_obst * np.pi/2 obst_position = np.array([np.cos(angle)*N_dist, np.sin(angle)*N_dist]) self.obstacles.append(CircularObstacle(obst_position, obst_radius))
def _generate(self): waypoints1 = np.vstack([[0, 0], [0, 500]]).T path1 = Path(waypoints1) init_pos1 = path1(0) init_angle1 = path1.get_direction(0) init_state1 = np.hstack([init_pos1, init_angle1]) self.main_vessel = Vessel(self.config, init_state=init_state1, init_path=path1, width=2) #self.config["vessel_width"]) self.main_vessel.path = path1 self.rewarder_dict[self.main_vessel.index] = ColavRewarder( self.main_vessel) self.rewarder = self.rewarder_dict[self.main_vessel.index] prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.moving_obstacles = [self.main_vessel] #Adding moving obstacle waypoints2 = np.vstack([[0, 150], [0, -400]]).T path2 = Path(waypoints2) init_pos2 = path2(0) init_angle2 = path2.get_direction(0) init_state2 = np.hstack([init_pos2, init_angle2]) vessel = Vessel(self.config, init_state=init_state2, init_path=path2, index=1, width=2) #self.config["vessel_width"]) self.rewarder_dict[vessel.index] = ColavRewarder(vessel) self.moving_obstacles.append(vessel) vessel.path = path2 for vessel in self.moving_obstacles: other_vessels = [ x for x in self.moving_obstacles if x.index != vessel.index ] vessel.obstacles = np.hstack([other_vessels]) print('Generated vessels!')
def _generate(self): self.path = Path([[0, 0, 50, 50], [0, 500, 600, 1000]]) init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle])) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog obst_arclength = 50 for o in range(9): obst_radius = 20 obst_arclength += obst_radius * 2 + 170 obst_position = self.path(obst_arclength) obst_displacement = np.array( [obst_radius * (-1)**(o + 1), obst_radius]) self.obstacles.append( CircularObstacle(obst_position + obst_displacement, obst_radius))
def _generate(self): vessel_trajectories = [] if self.vessel_data_path is not None: df = pd.read_csv(self.vessel_data_path) vessels = dict(tuple(df.groupby('Vessel_Name'))) vessel_names = sorted(list(vessels.keys())) #print('Preprocessing traffic...') while len(vessel_trajectories) < self.n_vessels: if len(vessel_names) == 0: break vessel_idx = self.rng.randint(0, len(vessel_names)) vessel_name = vessel_names.pop(vessel_idx) vessels[vessel_name]['AIS_Timestamp'] = pd.to_datetime( vessels[vessel_name]['AIS_Timestamp']) vessels[vessel_name]['AIS_Timestamp'] -= vessels[ vessel_name].iloc[0]['AIS_Timestamp'] start_timestamp = None last_timestamp = pd.to_timedelta(0, unit='D') last_east = None last_north = None cutoff_dt = pd.to_timedelta(0.1, unit='D') path = [] for _, row in vessels[vessel_name].iterrows(): east = row['AIS_East'] / 10.0 north = row['AIS_North'] / 10.0 if row['AIS_Length_Overall'] < 12: continue if len(path) == 0: start_timestamp = row['AIS_Timestamp'] timedelta = row['AIS_Timestamp'] - last_timestamp if timedelta < cutoff_dt: if last_east is not None: dx = east - last_east dy = north - last_north distance = np.sqrt(dx**2 + dy**2) seconds = timedelta.seconds speed = distance / seconds if speed < VESSEL_SPEED_RANGE_LOWER or speed > VESSEL_SPEED_RANGE_UPPER: path = [] continue path.append((int((row['AIS_Timestamp'] - start_timestamp).total_seconds()), (east - self.x0, north - self.y0))) else: if len(path) > 1 and not np.isnan( row['AIS_Length_Overall'] ) and row['AIS_Length_Overall'] > 0: start_index = self.rng.randint(0, len(path) - 1) vessel_trajectories.append( (row['AIS_Length_Overall'] / 10.0, path[start_index:], vessel_name)) path = [] last_timestamp = row['AIS_Timestamp'] last_east = east last_north = north #if self.other_vessels: # print(vessel_name, path[0], len(path)) #print('Completed traffic preprocessing') other_vessel_indeces = self.rng.choice(list( range(len(vessel_trajectories))), min(len(vessel_trajectories), self.n_vessels), replace=False) self.other_vessels = [ vessel_trajectories[idx] for idx in other_vessel_indeces ] init_state = self.path(0) init_angle = self.path.get_direction(0) self.vessel = Vessel(self.config, np.hstack([init_state, init_angle]), width=self.config["vessel_width"]) prog = self.path.get_closest_arclength(self.vessel.position) self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.all_obstacles = [] self.obstacles = [] if self.obstacle_perimeters is not None: for obstacle_perimeter in self.obstacle_perimeters: if len(obstacle_perimeter) > 3: obstacle = PolygonObstacle(obstacle_perimeter) assert obstacle.boundary.is_valid, 'The added obstacle is invalid!' self.all_obstacles.append(obstacle) self.obstacles.append(obstacle) if self.verbose: print('Added {} obstacles'.format(len(self.obstacles))) if self.verbose: print('Generating {} vessel trajectories'.format( len(self.other_vessels))) for vessel_width, vessel_trajectory, vessel_name in self.other_vessels: # for k in range(0, len(vessel_trajectory)-1): # vessel_obstacle = VesselObstacle(width=int(vessel_width), trajectory=vessel_trajectory[k:]) # self.all_obstacles.append(vessel_obstacle) if len(vessel_trajectory) > 2: vessel_obstacle = VesselObstacle(width=int(vessel_width), trajectory=vessel_trajectory, name=vessel_name) self.all_obstacles.append(vessel_obstacle) self.obstacles.append(vessel_obstacle) # if self.render_mode == '3d': # if self.verbose: # print('Loading nearby 3D terrain...') # xlow = 0 # xhigh = self.all_terrain.shape[0] # ylow = 0 # yhigh = self.all_terrain.shape[1] # self._viewer3d.create_world(self.all_terrain, xlow, ylow, xhigh, yhigh) # if self.verbose: # print('Loaded nearby 3D terrain ({}-{}, {}-{})'.format(xlow, xhigh, ylow, yhigh)) self._update()
class MultiAgent_DDPG(BaseEnvironment): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': render2d.FPS } def __init__(self, env_config, test_mode=False, render_mode='2d', verbose=False): """ The __init__ method declares all class atributes and calls the self.reset() to intialize them properly. Parameters ---------- env_config : dict Configuration parameters for the environment. The default values are set in __init__.py test_mode : bool If test_mode is True, the environment will not be autonatically reset due to too low cumulative reward or too large distance from the path. render_mode : {'2d', '3d', 'both'} Whether to use 2d or 3d rendering. 'both' is currently broken. verbose Whether to print debugging information. """ self.test_mode = test_mode self.render_mode = render_mode self.verbose = verbose self.config = env_config # Setting dimension of observation vector self.n_observations = len( Vessel.NAVIGATION_FEATURES) + 4 * self.config["n_sectors"] self.episode = 0 self.total_t_steps = 0 self.t_step = 0 self.history = [] # Declaring attributes #self.obstacles = None self.main_vessel = None #self.path = None self.reached_goal = None self.collision = None self.progress = None self.cumulative_reward = None self.last_reward = None self.last_episode = None self.rng = None self._tmp_storage = None self._action_space = gym.spaces.Box(low=np.array([-1, -1]), high=np.array([1, 1]), dtype=np.float32) self._observation_space = gym.spaces.Box( low=np.array([-1] * self.n_observations), high=np.array([1] * self.n_observations), dtype=np.float32) # Initializing rendering self.viewer2d = None self.viewer3d = None if self.render_mode == '2d' or self.render_mode == 'both': render2d.init_env_viewer(self) if self.render_mode == '3d' or self.render_mode == 'both': render3d.init_env_viewer(self, autocamera=self.config["autocamera3d"]) self.reset() @property def path(self): return self.main_vessel.path @property def obstacles(self): return self.main_vessel.obstacles @property def vessel(self): return self.main_vessel def _generate(self): print('In GENERATE in MA') self.main_vessel = Vessel(self.config, width=self.config["vessel_width"]) prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.rewarder = MultiRewarder(self.main_vessel) print(f'Ownship created!') self.moving_obstacles = [self.main_vessel] self.static_obstacles = [] self.queued_vessels = [] #Adding static obstacles #for _ in range(8): # obstacle = CircularObstacle(*helpers.generate_obstacle(self.rng, self.main_vessel.path, self.main_vessel)) # self.static_obstacles.append(obstacle) self._vessel_count = 1 #Adding moving obstacles (ships) curr_vessel_count = self._vessel_count for i in range(curr_vessel_count, curr_vessel_count + 5): #obst_speed = np.random.random() ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) self.moving_obstacles.append(ship) print(f'Ship {i} has been created') self._vessel_count += 1 for ship in self.moving_obstacles: other_ships = [ x for x in self.moving_obstacles if x.index != ship.index ] #ship.obstacles.extend(other_ships) ship.obstacles = np.hstack([self.static_obstacles, other_ships]) print('Exiting GENERATE in MA') def step(self, action: list) -> (np.ndarray, float, bool, dict): """ Steps the environment by one timestep. Returns observation, reward, done, info. Parameters ---------- action : np.ndarray [thrust_input, torque_input]. Returns ------- obs : np.ndarray Observation of the environment after action is performed. reward : double The reward for performing action at his timestep. done : bool If True the episode is ended, due to either a collision or having reached the goal position. info : dict Dictionary with data used for reporting or debugging """ print('IN STEP') if len(self.queued_vessels) == 0: current_vessel = self.main_vessel else: current_vessel = self.queued_vessels.pop(0) current_index = current_vessel.index print(f'Current vessel is ship {current_index}') #[vessel.update_without_agent(self.config["t_step_size"]) for vessel in self.moving_obstacles if vessel.index != current_index] action[0] = (action[0] + 1) / 2 current_vessel.step(action) reward = self.rewarder.calculate(current_vessel) self.cumulative_reward += reward vessel_data = self.main_vessel.req_latest_data() self.collision = vessel_data['collision'] self.reached_goal = vessel_data['reached_goal'] self.progress = vessel_data['progress'] info = {} info['collision'] = self.collision info['reached_goal'] = self.reached_goal info['progress'] = self.progress done = self._isdone() self._save_latest_step() self.moving_obstacles = self.main_vessel.nearby_vessels #Adding moving obstacles (ships) if not self.t_step % 150: #print(f'Time step: {self.t_step}, position of vessel: {self.main_vessel.position}') curr_vessel_count = self._vessel_count for i in range(curr_vessel_count, curr_vessel_count + 5): #obst_speed = np.random.random() ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) self.moving_obstacles.append(ship) print(f'Ship {i} has been created') self._vessel_count += 1 for ship in self.moving_obstacles: other_ships = [ x for x in self.moving_obstacles if x.index != ship.index ] #ship.obstacles.extend(other_ships) ship.obstacles = np.hstack( [self.static_obstacles, other_ships]) if len(self.queued_vessels) == 0: self.queued_vessels = [ x for x in self.moving_obstacles if x.index != 0 ] next_vessel = self.main_vessel else: next_vessel = self.queued_vessels[0] obs = next_vessel.observe() self.t_step += 1 print('EXITIG STEP') return (obs, reward, done, info) # [obst.update(self.config["t_step_size"]) for obst in self.moving_obstacles if obst.index != 0] # # # action[0] = (action[0] + 1)/2 # Done to be compatible with RL algorithms that require symmetric action spaces # if np.isnan(action).any(): action = np.zeros(action.shape) # self.main_vessel.step(action) # # # # Getting observation vector # obs = self.observe() # vessel_data = self.main_vessel.req_latest_data() # self.collision = vessel_data['collision'] # self.reached_goal = vessel_data['reached_goal'] # self.progress = vessel_data['progress'] # # # Receiving agent's reward # reward = self.rewarder.calculate() # self.last_reward = reward # self.cumulative_reward += reward # # info = {} # info['collision'] = self.collision # info['reached_goal'] = self.reached_goal # info['progress'] = self.progress # # # Testing criteria for ending the episode # done = self._isdone() # # self._save_latest_step() # # If the environment is dynamic, calling self.update will change it. # self._update() # # self.t_step += 1 # # #Adding moving obstacles (ships) # if not self.t_step % 100: # #print(f'Time step: {self.t_step}, position of vessel: {self.main_vessel.position}') # curr_vessel_count = self._vessel_count # for i in range(curr_vessel_count ,curr_vessel_count+5): # #obst_speed = np.random.random() # ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) # # self.moving_obstacles.append(ship) # print(f'Ship {i} has been created') # self._vessel_count += 1 # # # for ship in self.moving_obstacles: # other_ships = [x for x in self.moving_obstacles if x.index != ship.index] # #ship.obstacles.extend(other_ships) # ship.obstacles = np.hstack([self.static_obstacles, other_ships]) # # return (obs, reward, done, info) def _update(self): valid_ships = [self.main_vessel] for ship in self.moving_obstacles: if (not ship.collision ) and ship.index != 0: # and ship.reachable : valid_ships.append(ship) # print(f'Time: {self.t_step}') print([x.index for x in valid_ships]) self.moving_obstacles = valid_ships for ship in self.moving_obstacles: other_ships = [ x for x in self.moving_obstacles if x.index != ship.index ] #ship.obstacles.extend(other_ships) ship.obstacles = np.hstack([self.static_obstacles, other_ships]) #print('Exiting UPDATE in MA') def observe(self): navigation_states = self.main_vessel.navigate(self.path) sector_closenesses, sector_velocities, sector_moving_obstacles = self.main_vessel.perceive( self.obstacles) obs = np.concatenate([ navigation_states, sector_closenesses, sector_velocities, sector_moving_obstacles ]) return (obs)
def step(self, action: list) -> (np.ndarray, float, bool, dict): """ Steps the environment by one timestep. Returns observation, reward, done, info. Parameters ---------- action : np.ndarray [thrust_input, torque_input]. Returns ------- obs : np.ndarray Observation of the environment after action is performed. reward : double The reward for performing action at his timestep. done : bool If True the episode is ended, due to either a collision or having reached the goal position. info : dict Dictionary with data used for reporting or debugging """ print('IN STEP') if len(self.queued_vessels) == 0: current_vessel = self.main_vessel else: current_vessel = self.queued_vessels.pop(0) current_index = current_vessel.index print(f'Current vessel is ship {current_index}') #[vessel.update_without_agent(self.config["t_step_size"]) for vessel in self.moving_obstacles if vessel.index != current_index] action[0] = (action[0] + 1) / 2 current_vessel.step(action) reward = self.rewarder.calculate(current_vessel) self.cumulative_reward += reward vessel_data = self.main_vessel.req_latest_data() self.collision = vessel_data['collision'] self.reached_goal = vessel_data['reached_goal'] self.progress = vessel_data['progress'] info = {} info['collision'] = self.collision info['reached_goal'] = self.reached_goal info['progress'] = self.progress done = self._isdone() self._save_latest_step() self.moving_obstacles = self.main_vessel.nearby_vessels #Adding moving obstacles (ships) if not self.t_step % 150: #print(f'Time step: {self.t_step}, position of vessel: {self.main_vessel.position}') curr_vessel_count = self._vessel_count for i in range(curr_vessel_count, curr_vessel_count + 5): #obst_speed = np.random.random() ship = Vessel(self.config, width=self.config["vessel_width"], index=i, vessel_pos=self.main_vessel.position) self.moving_obstacles.append(ship) print(f'Ship {i} has been created') self._vessel_count += 1 for ship in self.moving_obstacles: other_ships = [ x for x in self.moving_obstacles if x.index != ship.index ] #ship.obstacles.extend(other_ships) ship.obstacles = np.hstack( [self.static_obstacles, other_ships]) if len(self.queued_vessels) == 0: self.queued_vessels = [ x for x in self.moving_obstacles if x.index != 0 ] next_vessel = self.main_vessel else: next_vessel = self.queued_vessels[0] obs = next_vessel.observe() self.t_step += 1 print('EXITIG STEP') return (obs, reward, done, info)
class TwoVessel_HeadOn(BaseEnvironment): def __init__(self, env_config, test_mode=False, render_mode='2d', verbose=False): """ The __init__ method declares all class atributes and calls the self.reset() to intialize them properly. Parameters ---------- env_config : dict Configuration parameters for the environment. The default values are set in __init__.py test_mode : bool If test_mode is True, the environment will not be autonatically reset due to too low cumulative reward or too large distance from the path. render_mode : {'2d', '3d', 'both'} Whether to use 2d or 3d rendering. 'both' is currently broken. verbose Whether to print debugging information. """ self.test_mode = test_mode self.render_mode = render_mode self.verbose = verbose self.config = env_config # Setting dimension of observation vector self.n_observations = len( Vessel.NAVIGATION_FEATURES ) + 3 * self.config["n_sectors"] + ColavRewarder.N_INSIGHTS self.episode = 0 self.total_t_steps = 0 self.t_step = 0 self.history = [] # Declaring attributes #self.obstacles = None self.main_vessel = None #self.agent = None #self.path = None self.reached_goal = None self.collision = None self.progress = None self.cumulative_reward = None self.last_reward = None self.last_episode = None self.rng = None self._tmp_storage = None self._action_space = gym.spaces.Box(low=np.array([-1, -1]), high=np.array([1, 1]), dtype=np.float32) self._observation_space = gym.spaces.Box( low=np.array([-1] * self.n_observations), high=np.array([1] * self.n_observations), dtype=np.float32) # Initializing rendering self._viewer2d = None self._viewer3d = None if self.render_mode == '2d' or self.render_mode == 'both': render2d.init_env_viewer(self) if self.render_mode == '3d' or self.render_mode == 'both': render3d.init_env_viewer(self, autocamera=self.config["autocamera3d"]) # self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1589625657ppo/6547288.pkl') #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590746004ppo/2927552.pkl') # self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590827849ppo/4070808.pkl') #'C:/Users/amalih/OneDrive - NTNU/github/logs/agents/MultiAgentPPO-v0/1064190.pkl' #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/logs/agents/MovingObstacles-v0/1590705511ppo/4425456.pkl') #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/gym-auv-master/logs/agents/MovingObstacles-v0/1589130704ppo/6916896.pkl') #self.agent = PPO2.load('C:/Users/amalih/Documents/gym-auv-master/gym-auv-master/logs/agents/MovingObstacles-v0/1589031909ppo/1760568.pkl') self.agent = PPO2.load( 'C:/Users/amalih/OneDrive - NTNU/github/logs/agents/MultiAgentPPO-v0/1591171914ppo/79288.pkl' ) self.rewarder_dict = {} self.reset() print('Init done') def _generate(self): waypoints1 = np.vstack([[0, 0], [0, 500]]).T path1 = Path(waypoints1) init_pos1 = path1(0) init_angle1 = path1.get_direction(0) init_state1 = np.hstack([init_pos1, init_angle1]) self.main_vessel = Vessel(self.config, init_state=init_state1, init_path=path1, width=2) #self.config["vessel_width"]) self.main_vessel.path = path1 self.rewarder_dict[self.main_vessel.index] = ColavRewarder( self.main_vessel) self.rewarder = self.rewarder_dict[self.main_vessel.index] prog = 0 self.path_prog_hist = np.array([prog]) self.max_path_prog = prog self.moving_obstacles = [self.main_vessel] #Adding moving obstacle waypoints2 = np.vstack([[0, 150], [0, -400]]).T path2 = Path(waypoints2) init_pos2 = path2(0) init_angle2 = path2.get_direction(0) init_state2 = np.hstack([init_pos2, init_angle2]) vessel = Vessel(self.config, init_state=init_state2, init_path=path2, index=1, width=2) #self.config["vessel_width"]) self.rewarder_dict[vessel.index] = ColavRewarder(vessel) self.moving_obstacles.append(vessel) vessel.path = path2 for vessel in self.moving_obstacles: other_vessels = [ x for x in self.moving_obstacles if x.index != vessel.index ] vessel.obstacles = np.hstack([other_vessels]) print('Generated vessels!') #self._update() @property def path(self): return self.main_vessel.path @property def obstacles(self): return self.main_vessel.obstacles @property def vessel(self): return self.main_vessel def step(self, action: list) -> (np.ndarray, float, bool, dict): """ Steps the environment by one timestep. Returns observation, reward, done, info. Parameters ---------- action : np.ndarray [thrust_input, torque_input]. Returns ------- obs : np.ndarray Observation of the environment after action is performed. reward : double The reward for performing action at his timestep. done : bool If True the episode is ended, due to either a collision or having reached the goal position. info : dict Dictionary with data used for reporting or debugging """ action[0] = ( action[0] + 1 ) / 2 # Done to be compatible with RL algorithms that require symmetric action spaces if np.isnan(action).any(): action = np.zeros(action.shape) self.main_vessel.step(action) for vessel in self.moving_obstacles: if vessel.index != 0: obs = vessel.observe() reward = self.rewarder_dict[vessel.index].calculate() insight = self.rewarder_dict[vessel.index].insight() #print(f'Reward for vessel {vessel.index}: {reward} -- lambda: {insight}') obs = np.concatenate([insight, obs]) action, _states = self.agent.predict(obs, deterministic=True) action[0] = (action[0] + 1) / 2 vessel.step(action) # Testing criteria for ending the episode done = self._isdone() self._save_latest_step() # Getting observation vector obs = self.observe() vessel_data = self.main_vessel.req_latest_data() self.collision = vessel_data['collision'] self.reached_goal = vessel_data['reached_goal'] self.progress = vessel_data['progress'] # Receiving agent's reward reward = self.rewarder.calculate() self.last_reward = reward #self.cumulative_reward += reward info = {} info['collision'] = self.collision info['reached_goal'] = self.reached_goal info['progress'] = self.progress self.t_step += 1 return (obs, reward, done, info)