Exemple #1
0
 def __init__(self):
     """ init env """
     gym.Env.__init__(self)
     Environment.__init__(self)
     self.width, self.height, self.channel = 16, 16, 1
     self.episode_length = 3
     self.batch_size = 3
     self.observation_shape = [self.height, self.width, self.channel]
     self.observation_space = self.height * self.width * self.channel
     self._step = 0
     #self.train_patterns = [(1, 3, 5, 7, 9, 11, 13, 15)]
     #self.train_patterns = [(0, 2, 4, 6, 8, 10), (1, 3, 5, 7, 9, 11), (2,
     #4, 6, 8, 10, 12)]
     self.train_patterns = [[0, 2, 4], [1, 3, 5], [2, 4, 6]]
     #self.train_patterns = [[1], [11], [4]]
     #self.train_patterns = [[1, 3, 5], [11, 13, 15], [5, 7, 9]]
     #self.train_patterns = [[1, 3, 5], [11, 13, 15], [10, 12, 14]]
     #self.train_patterns = [[1], [7], [14]]
     #self.train_patterns = [[1, 3, 5], [11, 13, 15], [10, 12, 14], [3, 5,
     #7], [4, 6, 8], [7, 9, 11]]
     #self.train_patterns = np.random.randint(low=0, high=15, size=(100, 3))
     #self.test_patterns = np.random.randint(low=0, high=15, size=(100, 3))
     #self.test_patterns = [[2], [5]]
     self.test_patterns = [[3, 5, 7], [4, 6, 8]]
     #self.test_patterns = [[2], [7], [9]]
     #self.test_patterns = [[3, 5, 7, 9, 11, 13], [4, 6, 8, 10, 12, 14]]
     self.train, self.test = self.__get_data()
     self.reset()
Exemple #2
0
 def __init__(self, model_path, epsilon, epsilon_min, epsilon_decay,
              max_steps):
     self.epsilon = epsilon
     self.epsilon_min = epsilon_min
     self.epsilon_decay = epsilon_decay
     self.model3D = load_model_from_path(model_path)
     self.sim = MjSim(self.model3D)
     self.q_network = None
     self.max_steps = max_steps
     Environment.__init__(self, self.sim)
 def __init__(self,
              max_braid_index=5,
              max_braid_length=7,
              inaction_penalty=0.05,
              start_states_buffer=None,
              action_probabilities=[0.3, 0.5],
              seed_prob=0.5,
              uniform=False):
     assert type(
         start_states_buffer
     ) is not None, "A start_states_buffer must be passed into the SliceEnvironmentWrapper constructor"
     assert len(action_probabilities
                ) == 2, "Length of action_probabilities must be 2"
     Environment.__init__(self, num_actions=13)
     self.max_braid_index = max_braid_index
     self.max_braid_length = max_braid_length
     self.start_states_buffer = start_states_buffer
     self.slice = self.start_states_buffer.sample_state()
     self.action_probabilities = action_probabilities
     self.seed_prob = seed_prob
     self.uniform = uniform
Exemple #4
0
 def __init__(self, color, type, speed):
     Environment.__init__(self, type, speed)
     self.color = color
Exemple #5
0
    def __init__(self, lake, slip, max_steps, seed=None):
        """
        lake: A matrix that represents the lake. For example:
         lake =  [['&', '.', '.', '.'],
                  ['.', '#', '.', '#'],
                  ['.', '.', '.', '#'],
                  ['#', '.', '.', '$']]
        slip: The probability that the agent will slip
        max_steps: The maximum number of time steps in an episode
        seed: A seed to control the random number generator (optional)
        """

        # start (&), frozen (.), hole (#), goal ($)
        self.lake = np.array(lake)
        self.lake_flat = self.lake.reshape(-1)

        self.slip = slip

        n_states = self.lake.size + 1
        n_actions = 4

        pi = np.zeros(n_states, dtype=float)
        pi[np.where(self.lake_flat == '&')[0]] = 1.0

        self.absorbing_state = n_states - 1

        # TODO:
        # Call parent constructor
        Environment.__init__(self, n_states, n_actions, max_steps, pi, seed)

        # Up, left, down, right (corresponding to w, a, s, d)
        self.actions = [(-1, 0), (0, -1), (1, 0), (0, 1)]

        # Matrix containing rewards for TAKING AN ACTION at a state
        self.reward_map = np.zeros(self.lake.shape, dtype=np.float)
        # Set goal state to 1
        self.reward_map[np.where(self.lake == '$')] = 1

        # Matrix indicating where the absorbing states are (holes & goal states are 1, others are 0)
        self.abs_states = np.zeros(self.lake.shape, dtype=np.float)
        # Set goal state to 1
        self.abs_states[np.where(self.lake == '$')] = 1
        self.abs_states[np.where(self.lake == '#')] = 1

        # Helpers for conversions from indices to states (coordinates) and states (coordinates) to indices
        self.state_idx_to_coords = list(product(range(self.reward_map.shape[0]), range(self.reward_map.shape[1])))
        self.coords_to_state_idx = {s: i for (i, s) in enumerate(self.state_idx_to_coords)}

        # Precompute probabilities for transitions
        # self.probabilities = np.zeros((self.n_states, self.n_states, self.n_actions))
        self.probabilities = {state: {action: [] for action in range(n_actions)} for state in range(n_states)}

        def increment(row, col, action):
            """
            Helper function for generating transition probability matrix.
            Makes sure that our agents don't leave the playing field.
            :param row: Current row
            :param col: Current column
            :param action: Incoming action
            :return: New row and column coordinates
            """
            # Boundary checks to make sure that agents don't leave the field
            if action == 0:  # up
                row = max(row - 1, 0)
            elif action == 1:  # left
                col = max(col - 1, 0)
            elif action == 2:  # down
                row = min(row + 1, self.lake.shape[0] - 1)
            elif action == 3:
                col = min(col + 1, self.lake.shape[1] - 1)
            return row, col

        def update_prob_matrix(row, col, action):
            """
            Helper function for generating transition probability matrix.
            Takes a current position (row, col) and an action and returns
            the new state, the reward for the state and whether the agent is done (won or fell in hole)
            :param row:
            :param col:
            :param action:
            :return:
            """
            # Get next field coordinates with boundary checks
            new_row, new_col = increment(row, col, action)
            # Convert coordinates to state ID
            new_state = self.coords_to_state_idx[(new_row, new_col)]
            # Get new coordinate type
            f_type = self.lake[row][column]
            # Check whether we reached the goal or fell in hole
            done = f_type == '$' or f_type == '#'
            # Reward = 1 if agent is at goal, 0 otherwise
            reward = float(f_type == '$')
            return new_state, reward, done

        # Generate transition probabilities
        # Adapted from the openai gym implementation
        # https://gym.openai.com/envs/FrozenLake-v0/
        # Go through all fields and
        for row in range(self.lake.shape[0]):
            for column in range(self.lake.shape[1]):
                # Get state index from coordinates
                state_idx = self.coords_to_state_idx[(row, column)]
                # Go through all actions (up, left, down, right)
                for action in range(n_actions):
                    # Get transition probabilities for current state and action
                    current_list = self.probabilities[state_idx][action]
                    # Check if field is goal or hole
                    field_type = self.lake[row][column]
                    # If goal or hole, make inescapable (probability 1.0)
                    if field_type == '$':  # goal => reward 1, done = True
                        current_list.append((1.0, state_idx, 1.0, True))
                    elif field_type == '#':  # hole => reward 0, done = True
                        current_list.append((1.0, state_idx, 0.0, True))
                    # Otherwise check where we can go from here:
                    else:
                        # Add probabilities for successful action and slips
                        for b in range(n_actions):
                            if b == action:
                                # Successful action
                                # The asterisk ('*') unpacks the return value of the update_prob_matrix() function
                                # The probability for a successful action is 1 minus the slip probability
                                # for each remaining move
                                # Note: For this to work the slip probability has to be in the range [0...1]
                                current_list.append(
                                    (1.0 - (n_actions - 1.0) * self.slip, *update_prob_matrix(row, column, b))
                                )
                            else:
                                # Slip :(
                                current_list.append((self.slip, *update_prob_matrix(row, column, b)))
 def __init__(self,worldMap,width,height,color):
     Environment.__init__(self,worldMap,'food',width,height,color)
 def __init__(self, worldMap, width, height, color):
     Environment.__init__(self, worldMap, 'obstacle', width, height, color)
     self.remainArea = self.width * self.height
Exemple #8
0
                                      FuzzySet.Triangles(-1.5, 0, 1.5),
                                      FuzzySet.Triangles(0, 1.5, 3.1459))
x4 = StateVariable.InputStateVariable(FuzzySet.Triangles(-3.14159, -1.5, 0),
                                      FuzzySet.Triangles(-1.5, 0, 1.5),
                                      FuzzySet.Triangles(0, 1.5, 3.1459))
fis = FIS.Build(x1, x2, x3, x4)

# Create Model
angel_list = []
model = FQL.Model(gamma=0.9,
                  alpha=0.1,
                  ee_rate=0.999,
                  q_initial_value='random',
                  action_set_length=21,
                  fis=fis)
env = Environment()
for iteration in range(0, 5000):
    if iteration % 100 == 0 or reward == -1:
        env.__init__()
        action = model.get_initial_action(env.state)
        reward, state_value = env.apply_action(action)
    action = model.run(state_value, reward)
    reward, state_value = env.apply_action(action)
    if reward != -1:
        angel_list.append(state_value[2])

plt.figure(figsize=(14, 3))
plt.plot(angel_list)
plt.ylabel('Pole Angel')
plt.show()