def learn(self, episodes, visualize=False): ''' Learn best Q-values :param episodes: Number of episodes to learn :param visualize: if True, render environment when learning ''' for ep in range(episodes): state = self.env.reset() t = 0 while True: if visualize: self.env.render() action = self.act(discretize(state), episode=ep) ''' TODO: 1) Run the enviroment with chosen action (action) 2) call update Qs (don't forget to discretize states) 3) S = S' ''' raise NotImplementedError('Please implement the loop body') t += 1 if done: print("Episode {} finished after {} timesteps".format( ep, t)) break
def learn(self, episodes, visualize=False): ''' Learn best Q-values :param episodes: Number of episodes to learn :param visualize: if True, render environment when learning ''' for ep in range(episodes): state = self.env.reset() t = 0 while True: if visualize: self.env.render() action = self.act(discretize(state), episode=ep) new_state, reward, done, info = self.env.step(action) self.updateQ(discretize(state), action, reward, discretize(new_state)) state = new_state t += 1 if done: print("Episode {} finished after {} timesteps".format( ep, t)) break
def edgeLink(M, Mag, Ori): # Multiply magnitude with binary_mag = np.multiply(M, Mag) row, column = binary_mag.shape edge_map = np.empty([row, column]) # Declare low and high threshold threshold_low = 0.03 * np.amax(binary_mag) threshold_high = 2.5 * threshold_low # Set thresholds for j in range(0, column): for i in range(0, row): if binary_mag[i, j] > threshold_high: edge_map[i, j] = 1 elif binary_mag[i, j] <= threshold_low: edge_map[i, j] = 0 else: edge_map[i, j] = 0.5 # Hysteresis for j in range(0, column): for i in range(0, row): # Start at strong edge if edge_map[i, j] == 0.5: pi = math.pi # Orientation of gradient angle = Ori[i, j] # Orientations of edge perpendicular to gradient direction angle_neg = discretize(angle - np.cos(angle)) angle_pos = discretize(angle + np.cos(angle)) neighbor_1 = 0.0 neighbor_2 = 0.0 row_1 = 0 row_2 = 0 column_1 = 0 column_2 = 0 if angle_neg == 0 or angle_neg == pi or angle_pos == 0 or angle_pos == pi: if j + 1 < column: neighbor_1 = edge_map[i][j + 1] row_1 = i column_1 = j + 1 if j - 1 >= 0: neighbor_2 = edge_map[i][j - 1] row_2 = i column_2 = j - 1 elif angle_neg == pi / 2 or angle_neg == 3 * pi / 2 or angle_pos == pi / 2 or angle_pos == 3 * pi / 2: if i + 1 < row: neighbor_1 = edge_map[i + 1][j] row_1 = i + 1 column_1 = j if i - 1 >= 0: neighbor_2 = edge_map[i - 1][j] row_2 = i - 1 column_2 = j elif angle_neg == pi / 4 or angle_neg == 5 * pi / 4 or angle_pos == pi / 4 or angle_pos == 5 * pi / 4: if i + 1 < row and j + 1 < column: neighbor_1 = edge_map[i + 1][j + 1] row_1 = i + 1 column_1 = j + 1 if i - 1 >= 0 and j - 1 >= 0: neighbor_2 = edge_map[i - 1][j - 1] row_2 = i - 1 column_2 = j - 1 elif angle_neg == 3 * pi / 4 or angle_neg == 7 * pi / 4 or angle_pos == 3 * pi / 4 or angle_pos == 7 * pi / 4: if i - 1 >= 0 and j + 1 < column: neighbor_1 = edge_map[i - 1][j + 1] row_1 = i - 1 column_1 = j + 1 if i + 1 < row and j - 1 >= 0: neighbor_2 = edge_map[i + 1][j - 1] row_2 = i + 1 column_2 = j - 1 ori_difference_1 = abs(Ori[i][j] - Ori[row_1][column_1]) ori_difference_2 = abs(Ori[i][j] - Ori[row_2][column_2]) if neighbor_1 < 1 and neighbor_2 < 1: edge_map[i][j] = 0 elif neighbor_1 == 1 and neighbor_2 < 1: if ori_difference_1 >= threshold_low and ori_difference_1 <= threshold_high: edge_map[i][j] = 1 else: edge_map[i][j] = 0 elif neighbor_2 == 1 and neighbor_1 < 1: if ori_difference_2 >= threshold_low and ori_difference_2 <= threshold_high: edge_map[i][j] = 1 else: edge_map[i][j] = 0 elif neighbor_1 == 1 and neighbor_2 == 1: if ori_difference_1 >= threshold_low and ori_difference_1 <= threshold_high or ori_difference_2 >= threshold_low and ori_difference_2 <= threshold_high: edge_map[i][j] = 1 else: edge_map[i][j] = 0 # Final for loop to clear weak values for j in range(0, column): for i in range(0, row): if edge_map[i][j] == 0.5: edge_map[i][j] = 0 return edge_map
def preprocess_state(self, state): """Map a continuous state to its discretized representation.""" return tuple(discretize(state,self.state_grid))
TRIALS = 10 ######## CONSTANTS ######## env = gym.make('MountainCar-v0') agent = QLAgent(env=env, n_states=N_STATES, n_actions=N_ACTIONS, lr=0.5, discount=0.95) # Train agent agent.learn(episodes=EPISODES) # Evaluate success = 0 for tr in range(TRIALS): state = env.reset() t = 0 while True: env.render() action = agent.act(discretize(state)) state, reward, done, info = env.step(action) t += 1 if done: print("Trial {} finished after {} timesteps".format(tr, t)) if t < 200: success += 1 break print("Success: %d/%d" % (success, TRIALS)) env.close()