예제 #1
0
    def __init__(
            self,
            action_type='discrete',  # 'discrete' {0,1} or 'continuous' [0,1]
            debug=False,
            max_steps=30,
            action_prob=0.5,
            bridge_prob=-1):

        self.debug = debug

        self.n_states = len(adjacency.keys())
        self.n_max_action = max([len(v) for v in adjacency.values()])

        self.action_space = Space(size=self.n_max_action)
        self.observation_space = Space(size=self.n_states)

        self.max_steps = max_steps
        self.action_prob = action_prob
        self.bridge_prob = action_prob if bridge_prob < 0 else bridge_prob
        self.all_action_probs = self._get_probs()
        self.step_reward = -0.1
        self.self_loop_reward = -1
        self.goal_reward = 10

        # self.start_state = np.random.randint(self.n_states)
        self.goal_state = 0

        self.reset()
예제 #2
0
    def __init__(
            self,
            action_type='discrete',  # 'discrete' {0,1} or 'continuous' [0,1]
            n_actions=4,
            oracle=-1,
            speed=4,
            debug=True,
            max_step_length=0.15,
            max_steps=2):

        n_actions = 4
        self.debug = debug

        # NS Specific settings
        self.oracle = oracle
        self.speed = speed
        self.frequency = self.speed * 0.001
        self.episode = 0

        self.n_actions = n_actions
        self.action_space = Space(size=n_actions)
        self.observation_space = Space(low=np.zeros(2, dtype=np.float32),
                                       high=np.ones(2, dtype=np.float32),
                                       dtype=np.float32)
        self.disp_flag = False

        self.motions = self.get_action_motions(self.n_actions)

        self.wall_width = 0.05
        self.step_unit = self.wall_width - 0.005
        self.repeat = int(max_step_length / self.step_unit)

        self.max_horizon = int(max_steps / max_step_length)
        self.step_reward = -0.5
        self.collision_reward = 0  # -0.05
        self.movement_reward = 0  # 1
        self.randomness = 0.25

        # No lidars used
        self.n_lidar = 0
        self.angles = np.linspace(
            0, 2 * np.pi, self.n_lidar +
            1)[:-1]  # Get 10 lidar directions,(11th and 0th are same)
        self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles)))
        self.static_obstacles = self.get_static_obstacles()

        if debug:
            self.heatmap_scale = 99
            self.heatmap = np.zeros(
                (self.heatmap_scale + 1, self.heatmap_scale + 1))

        self.reset()
예제 #3
0
    def __init__(
            self,
            action_type='discrete',  # 'discrete' {0,1} or 'continuous' [0,1]
            n_actions=8,
            debug=True,
            max_step_length=0.25,
            max_steps=20,
            difficulty=1,
            action_prob=0.8):

        print("difficulty", difficulty)
        self.debug = debug
        self.difficulty = difficulty
        self._n_episodes = 0

        self.action_prob = action_prob
        self.n_actions = n_actions
        self.action_space = Space(size=n_actions)
        self.observation_space = Space(low=np.zeros(2, dtype=np.float32),
                                       high=np.ones(2, dtype=np.float32),
                                       dtype=np.float32)
        self.disp_flag = False

        self.motions = self.get_action_motions(self.n_actions)

        self.wall_width = 0.05
        self.step_unit = self.wall_width - 0.005
        self.repeat = int(max_step_length / self.step_unit)

        self.max_steps = int(max_steps / max_step_length)
        self.step_reward = -0.05
        self.collision_reward = 0  # -0.05
        self.movement_reward = 0  # 1
        self.randomness = 0.2

        self.n_lidar = 0
        self.angles = np.linspace(
            0, 2 * np.pi, self.n_lidar +
            1)[:-1]  # Get 10 lidar directions,(11th and 0th are same)
        self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles)))
        self.static_obstacles = self.get_static_obstacles()

        if debug:
            self.heatmap_scale = 99
            self.heatmap = np.zeros(
                (self.heatmap_scale + 1, self.heatmap_scale + 1))

        self.reset()
예제 #4
0
    def __init__(self, speed=2, oracle=-1, debug=True):

        self.debug = debug
        self.n_max_actions = 5
        self.state_dim = 1
        self.max_horizon = 1
        self.speed = speed
        self.oracle = oracle

        # The state and action space of the domain.
        self.action_space = Space(size=self.n_max_actions)
        self.observation_space = Space(low=np.array([0]),
                                       high=np.array([1]),
                                       dtype=np.float32)
        self.state = np.array([1])  # State is always 1

        # Time counter
        self.episode = 0

        # Reward associated with each arm is computed based on
        # sinusoidal wave of varying amplitude and frequency
        rng = np.random.RandomState(1)
        self.amplitude = rng.rand(self.n_max_actions)

        rng = np.random.RandomState(0)
        self.frequency = rng.rand(self.n_max_actions) * self.speed * 0.005

        # Add noise of different variances to each arm
        rng = np.random.RandomState(0)
        self.stds = rng.rand(self.n_max_actions) * 0.01

        if self.oracle >= 0:
            self.amplitude = self.amplitude * np.sin(
                self.oracle * self.frequency)
            self.speed = 0

        print("Reward Amplitudes: {} :: Avg {} ".format(
            self.amplitude, np.mean(self.amplitude)))

        self.reset()
예제 #5
0
    def __init__(self,
                 action_type='continuous',  # 'discrete' {0,1} or 'continuous' [0,1]
                 n_actions=2,
                 debug=True,
                 max_step_length=0.2,
                 max_steps=30):

        self.debug = debug

        self.n_actions = 2
        self.action_type = action_type
        self.action_space = Space(low=-np.ones(self.n_actions)/1.415, high=np.ones(self.n_actions)/1.415, dtype=np.float32) #max range is 1/sqrt(2)
        self.observation_space = Space(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32), dtype=np.float32)
        self.disp_flag = False

        self.wall_width = 0.05
        self.step_unit = self.wall_width  - 0.005
        self.repeat = int(max_step_length/self.step_unit)

        self.max_steps = int(max_steps/max_step_length)
        self.step_reward = -0.05
        self.collision_reward = 0#-0.05
        self.movement_reward = 0#1
        self.randomness = 0.1

        self.n_lidar = 0
        self.angles = np.linspace(0, 2*np.pi, self.n_lidar+1)[:-1] # Get 10 lidar directions, (11th and 0th are same)
        # self.lidar_angles = np.array(list(zip(np.cos(self.angles), np.sin(self.angles))), dtype=np.float32)
        self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles)))
        self.static_obstacles = self.get_static_obstacles()

        if debug:
            self.heatmap_scale = 99
            self.heatmap = np.zeros((self.heatmap_scale+1, self.heatmap_scale+1))

        self.reset()
    def __init__(self,
                 n_actions=4,
                 debug=True,
                 max_step_length=0.2,
                 max_steps=30,
                 max_episodes=1e5,
                 change_interval=-1,
                 change_count=3):

        self.debug = debug
        self.ep_count = 0

        # General parameters for the environemnt
        self.n_actions = n_actions
        self.action_space = Space(size=2**n_actions)
        self.observation_space = Space(low=np.zeros(2, dtype=np.float32),
                                       high=np.ones(2, dtype=np.float32),
                                       dtype=np.float32)
        self.disp_flag = False

        self.movement = self.get_movements(self.n_actions)
        self.motions = self.get_action_motions(self.n_actions)

        self.wall_width = 0.05
        self.step_unit = self.wall_width - 0.005
        self.repeat = int(max_step_length / self.step_unit)

        self.max_steps = int(max_steps / max_step_length)
        self.step_reward = -0.05
        self.collision_reward = 0  # -0.05
        self.movement_reward = 0  # 1
        self.randomness = 0.1

        self.n_lidar = 0
        self.angles = np.linspace(
            0, 2 * np.pi, self.n_lidar +
            1)[:-1]  # Get 10 lidar directions,(11th and 0th are same)
        # self.lidar_angles = np.array(list(zip(np.cos(self.angles), np.sin(self.angles))), dtype=np.float32)
        self.lidar_angles = list(zip(np.cos(self.angles), np.sin(self.angles)))
        self.static_obstacles = self.get_static_obstacles()

        # Continual Learning parameters
        self.rng = np.random.RandomState(0)
        if change_interval > 0:
            self.change_interval = change_interval
        else:
            self.change_interval = max_episodes // change_count

        self.change_add_count = int(1.0 / change_count * 2**self.n_actions)
        self.action_mask = np.zeros(
            2**
            n_actions)  #  Mask to indicate the currently active set of actions
        self.action_tracker = np.zeros(
            2**n_actions
        )  # Tracks all the actions that have been made available till now

        if debug:
            self.heatmap_scale = 99
            self.heatmap = np.zeros(
                (self.heatmap_scale + 1, self.heatmap_scale + 1))

        self.reset()