def __init__( self, action_type='discrete', # 'discrete' {0,1} or 'continuous' [0,1] debug=False, max_steps=30, action_prob=0.5, bridge_prob=-1): self.debug = debug self.n_states = len(adjacency.keys()) self.n_max_action = max([len(v) for v in adjacency.values()]) self.action_space = Space(size=self.n_max_action) self.observation_space = Space(size=self.n_states) self.max_steps = max_steps self.action_prob = action_prob self.bridge_prob = action_prob if bridge_prob < 0 else bridge_prob self.all_action_probs = self._get_probs() self.step_reward = -0.1 self.self_loop_reward = -1 self.goal_reward = 10 # self.start_state = np.random.randint(self.n_states) self.goal_state = 0 self.reset()
def __init__( self, action_type='discrete', # 'discrete' {0,1} or 'continuous' [0,1] n_actions=4, oracle=-1, speed=4, debug=True, max_step_length=0.15, max_steps=2): n_actions = 4 self.debug = debug # NS Specific settings self.oracle = oracle self.speed = speed self.frequency = self.speed * 0.001 self.episode = 0 self.n_actions = n_actions self.action_space = Space(size=n_actions) self.observation_space = Space(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32), dtype=np.float32) self.disp_flag = False self.motions = self.get_action_motions(self.n_actions) self.wall_width = 0.05 self.step_unit = self.wall_width - 0.005 self.repeat = int(max_step_length / self.step_unit) self.max_horizon = int(max_steps / max_step_length) self.step_reward = -0.5 self.collision_reward = 0 # -0.05 self.movement_reward = 0 # 1 self.randomness = 0.25 # No lidars used self.n_lidar = 0 self.angles = np.linspace( 0, 2 * np.pi, self.n_lidar + 1)[:-1] # Get 10 lidar directions,(11th and 0th are same) self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles))) self.static_obstacles = self.get_static_obstacles() if debug: self.heatmap_scale = 99 self.heatmap = np.zeros( (self.heatmap_scale + 1, self.heatmap_scale + 1)) self.reset()
def __init__( self, action_type='discrete', # 'discrete' {0,1} or 'continuous' [0,1] n_actions=8, debug=True, max_step_length=0.25, max_steps=20, difficulty=1, action_prob=0.8): print("difficulty", difficulty) self.debug = debug self.difficulty = difficulty self._n_episodes = 0 self.action_prob = action_prob self.n_actions = n_actions self.action_space = Space(size=n_actions) self.observation_space = Space(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32), dtype=np.float32) self.disp_flag = False self.motions = self.get_action_motions(self.n_actions) self.wall_width = 0.05 self.step_unit = self.wall_width - 0.005 self.repeat = int(max_step_length / self.step_unit) self.max_steps = int(max_steps / max_step_length) self.step_reward = -0.05 self.collision_reward = 0 # -0.05 self.movement_reward = 0 # 1 self.randomness = 0.2 self.n_lidar = 0 self.angles = np.linspace( 0, 2 * np.pi, self.n_lidar + 1)[:-1] # Get 10 lidar directions,(11th and 0th are same) self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles))) self.static_obstacles = self.get_static_obstacles() if debug: self.heatmap_scale = 99 self.heatmap = np.zeros( (self.heatmap_scale + 1, self.heatmap_scale + 1)) self.reset()
def __init__(self, speed=2, oracle=-1, debug=True): self.debug = debug self.n_max_actions = 5 self.state_dim = 1 self.max_horizon = 1 self.speed = speed self.oracle = oracle # The state and action space of the domain. self.action_space = Space(size=self.n_max_actions) self.observation_space = Space(low=np.array([0]), high=np.array([1]), dtype=np.float32) self.state = np.array([1]) # State is always 1 # Time counter self.episode = 0 # Reward associated with each arm is computed based on # sinusoidal wave of varying amplitude and frequency rng = np.random.RandomState(1) self.amplitude = rng.rand(self.n_max_actions) rng = np.random.RandomState(0) self.frequency = rng.rand(self.n_max_actions) * self.speed * 0.005 # Add noise of different variances to each arm rng = np.random.RandomState(0) self.stds = rng.rand(self.n_max_actions) * 0.01 if self.oracle >= 0: self.amplitude = self.amplitude * np.sin( self.oracle * self.frequency) self.speed = 0 print("Reward Amplitudes: {} :: Avg {} ".format( self.amplitude, np.mean(self.amplitude))) self.reset()
def __init__(self, action_type='continuous', # 'discrete' {0,1} or 'continuous' [0,1] n_actions=2, debug=True, max_step_length=0.2, max_steps=30): self.debug = debug self.n_actions = 2 self.action_type = action_type self.action_space = Space(low=-np.ones(self.n_actions)/1.415, high=np.ones(self.n_actions)/1.415, dtype=np.float32) #max range is 1/sqrt(2) self.observation_space = Space(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32), dtype=np.float32) self.disp_flag = False self.wall_width = 0.05 self.step_unit = self.wall_width - 0.005 self.repeat = int(max_step_length/self.step_unit) self.max_steps = int(max_steps/max_step_length) self.step_reward = -0.05 self.collision_reward = 0#-0.05 self.movement_reward = 0#1 self.randomness = 0.1 self.n_lidar = 0 self.angles = np.linspace(0, 2*np.pi, self.n_lidar+1)[:-1] # Get 10 lidar directions, (11th and 0th are same) # self.lidar_angles = np.array(list(zip(np.cos(self.angles), np.sin(self.angles))), dtype=np.float32) self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles))) self.static_obstacles = self.get_static_obstacles() if debug: self.heatmap_scale = 99 self.heatmap = np.zeros((self.heatmap_scale+1, self.heatmap_scale+1)) self.reset()
def __init__(self, n_actions=4, debug=True, max_step_length=0.2, max_steps=30, max_episodes=1e5, change_interval=-1, change_count=3): self.debug = debug self.ep_count = 0 # General parameters for the environemnt self.n_actions = n_actions self.action_space = Space(size=2**n_actions) self.observation_space = Space(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32), dtype=np.float32) self.disp_flag = False self.movement = self.get_movements(self.n_actions) self.motions = self.get_action_motions(self.n_actions) self.wall_width = 0.05 self.step_unit = self.wall_width - 0.005 self.repeat = int(max_step_length / self.step_unit) self.max_steps = int(max_steps / max_step_length) self.step_reward = -0.05 self.collision_reward = 0 # -0.05 self.movement_reward = 0 # 1 self.randomness = 0.1 self.n_lidar = 0 self.angles = np.linspace( 0, 2 * np.pi, self.n_lidar + 1)[:-1] # Get 10 lidar directions,(11th and 0th are same) # self.lidar_angles = np.array(list(zip(np.cos(self.angles), np.sin(self.angles))), dtype=np.float32) self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles))) self.static_obstacles = self.get_static_obstacles() # Continual Learning parameters self.rng = np.random.RandomState(0) if change_interval > 0: self.change_interval = change_interval else: self.change_interval = max_episodes // change_count self.change_add_count = int(1.0 / change_count * 2**self.n_actions) self.action_mask = np.zeros( 2** n_actions) # Mask to indicate the currently active set of actions self.action_tracker = np.zeros( 2**n_actions ) # Tracks all the actions that have been made available till now if debug: self.heatmap_scale = 99 self.heatmap = np.zeros( (self.heatmap_scale + 1, self.heatmap_scale + 1)) self.reset()