def __init__(self): """ init env """ gym.Env.__init__(self) Environment.__init__(self) self.width, self.height, self.channel = 16, 16, 1 self.episode_length = 3 self.batch_size = 3 self.observation_shape = [self.height, self.width, self.channel] self.observation_space = self.height * self.width * self.channel self._step = 0 #self.train_patterns = [(1, 3, 5, 7, 9, 11, 13, 15)] #self.train_patterns = [(0, 2, 4, 6, 8, 10), (1, 3, 5, 7, 9, 11), (2, #4, 6, 8, 10, 12)] self.train_patterns = [[0, 2, 4], [1, 3, 5], [2, 4, 6]] #self.train_patterns = [[1], [11], [4]] #self.train_patterns = [[1, 3, 5], [11, 13, 15], [5, 7, 9]] #self.train_patterns = [[1, 3, 5], [11, 13, 15], [10, 12, 14]] #self.train_patterns = [[1], [7], [14]] #self.train_patterns = [[1, 3, 5], [11, 13, 15], [10, 12, 14], [3, 5, #7], [4, 6, 8], [7, 9, 11]] #self.train_patterns = np.random.randint(low=0, high=15, size=(100, 3)) #self.test_patterns = np.random.randint(low=0, high=15, size=(100, 3)) #self.test_patterns = [[2], [5]] self.test_patterns = [[3, 5, 7], [4, 6, 8]] #self.test_patterns = [[2], [7], [9]] #self.test_patterns = [[3, 5, 7, 9, 11, 13], [4, 6, 8, 10, 12, 14]] self.train, self.test = self.__get_data() self.reset()
def __init__(self, model_path, epsilon, epsilon_min, epsilon_decay, max_steps): self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.model3D = load_model_from_path(model_path) self.sim = MjSim(self.model3D) self.q_network = None self.max_steps = max_steps Environment.__init__(self, self.sim)
def __init__(self, max_braid_index=5, max_braid_length=7, inaction_penalty=0.05, start_states_buffer=None, action_probabilities=[0.3, 0.5], seed_prob=0.5, uniform=False): assert type( start_states_buffer ) is not None, "A start_states_buffer must be passed into the SliceEnvironmentWrapper constructor" assert len(action_probabilities ) == 2, "Length of action_probabilities must be 2" Environment.__init__(self, num_actions=13) self.max_braid_index = max_braid_index self.max_braid_length = max_braid_length self.start_states_buffer = start_states_buffer self.slice = self.start_states_buffer.sample_state() self.action_probabilities = action_probabilities self.seed_prob = seed_prob self.uniform = uniform
def __init__(self, color, type, speed): Environment.__init__(self, type, speed) self.color = color
def __init__(self, lake, slip, max_steps, seed=None): """ lake: A matrix that represents the lake. For example: lake = [['&', '.', '.', '.'], ['.', '#', '.', '#'], ['.', '.', '.', '#'], ['#', '.', '.', '$']] slip: The probability that the agent will slip max_steps: The maximum number of time steps in an episode seed: A seed to control the random number generator (optional) """ # start (&), frozen (.), hole (#), goal ($) self.lake = np.array(lake) self.lake_flat = self.lake.reshape(-1) self.slip = slip n_states = self.lake.size + 1 n_actions = 4 pi = np.zeros(n_states, dtype=float) pi[np.where(self.lake_flat == '&')[0]] = 1.0 self.absorbing_state = n_states - 1 # TODO: # Call parent constructor Environment.__init__(self, n_states, n_actions, max_steps, pi, seed) # Up, left, down, right (corresponding to w, a, s, d) self.actions = [(-1, 0), (0, -1), (1, 0), (0, 1)] # Matrix containing rewards for TAKING AN ACTION at a state self.reward_map = np.zeros(self.lake.shape, dtype=np.float) # Set goal state to 1 self.reward_map[np.where(self.lake == '$')] = 1 # Matrix indicating where the absorbing states are (holes & goal states are 1, others are 0) self.abs_states = np.zeros(self.lake.shape, dtype=np.float) # Set goal state to 1 self.abs_states[np.where(self.lake == '$')] = 1 self.abs_states[np.where(self.lake == '#')] = 1 # Helpers for conversions from indices to states (coordinates) and states (coordinates) to indices self.state_idx_to_coords = list(product(range(self.reward_map.shape[0]), range(self.reward_map.shape[1]))) self.coords_to_state_idx = {s: i for (i, s) in enumerate(self.state_idx_to_coords)} # Precompute probabilities for transitions # self.probabilities = np.zeros((self.n_states, self.n_states, self.n_actions)) self.probabilities = {state: {action: [] for action in range(n_actions)} for state in range(n_states)} def increment(row, col, action): """ Helper function for generating transition probability matrix. Makes sure that our agents don't leave the playing field. :param row: Current row :param col: Current column :param action: Incoming action :return: New row and column coordinates """ # Boundary checks to make sure that agents don't leave the field if action == 0: # up row = max(row - 1, 0) elif action == 1: # left col = max(col - 1, 0) elif action == 2: # down row = min(row + 1, self.lake.shape[0] - 1) elif action == 3: col = min(col + 1, self.lake.shape[1] - 1) return row, col def update_prob_matrix(row, col, action): """ Helper function for generating transition probability matrix. Takes a current position (row, col) and an action and returns the new state, the reward for the state and whether the agent is done (won or fell in hole) :param row: :param col: :param action: :return: """ # Get next field coordinates with boundary checks new_row, new_col = increment(row, col, action) # Convert coordinates to state ID new_state = self.coords_to_state_idx[(new_row, new_col)] # Get new coordinate type f_type = self.lake[row][column] # Check whether we reached the goal or fell in hole done = f_type == '$' or f_type == '#' # Reward = 1 if agent is at goal, 0 otherwise reward = float(f_type == '$') return new_state, reward, done # Generate transition probabilities # Adapted from the openai gym implementation # https://gym.openai.com/envs/FrozenLake-v0/ # Go through all fields and for row in range(self.lake.shape[0]): for column in range(self.lake.shape[1]): # Get state index from coordinates state_idx = self.coords_to_state_idx[(row, column)] # Go through all actions (up, left, down, right) for action in range(n_actions): # Get transition probabilities for current state and action current_list = self.probabilities[state_idx][action] # Check if field is goal or hole field_type = self.lake[row][column] # If goal or hole, make inescapable (probability 1.0) if field_type == '$': # goal => reward 1, done = True current_list.append((1.0, state_idx, 1.0, True)) elif field_type == '#': # hole => reward 0, done = True current_list.append((1.0, state_idx, 0.0, True)) # Otherwise check where we can go from here: else: # Add probabilities for successful action and slips for b in range(n_actions): if b == action: # Successful action # The asterisk ('*') unpacks the return value of the update_prob_matrix() function # The probability for a successful action is 1 minus the slip probability # for each remaining move # Note: For this to work the slip probability has to be in the range [0...1] current_list.append( (1.0 - (n_actions - 1.0) * self.slip, *update_prob_matrix(row, column, b)) ) else: # Slip :( current_list.append((self.slip, *update_prob_matrix(row, column, b)))
def __init__(self,worldMap,width,height,color): Environment.__init__(self,worldMap,'food',width,height,color)
def __init__(self, worldMap, width, height, color): Environment.__init__(self, worldMap, 'obstacle', width, height, color) self.remainArea = self.width * self.height
FuzzySet.Triangles(-1.5, 0, 1.5), FuzzySet.Triangles(0, 1.5, 3.1459)) x4 = StateVariable.InputStateVariable(FuzzySet.Triangles(-3.14159, -1.5, 0), FuzzySet.Triangles(-1.5, 0, 1.5), FuzzySet.Triangles(0, 1.5, 3.1459)) fis = FIS.Build(x1, x2, x3, x4) # Create Model angel_list = [] model = FQL.Model(gamma=0.9, alpha=0.1, ee_rate=0.999, q_initial_value='random', action_set_length=21, fis=fis) env = Environment() for iteration in range(0, 5000): if iteration % 100 == 0 or reward == -1: env.__init__() action = model.get_initial_action(env.state) reward, state_value = env.apply_action(action) action = model.run(state_value, reward) reward, state_value = env.apply_action(action) if reward != -1: angel_list.append(state_value[2]) plt.figure(figsize=(14, 3)) plt.plot(angel_list) plt.ylabel('Pole Angel') plt.show()