コード例 #1
0
ファイル: QLearning.py プロジェクト: Omar1986-prog/demo1986
    def learn(self, episodes, visualize=False):
        '''
		Learn best Q-values

		:param episodes: Number of episodes to learn
		:param visualize: if True, render environment when learning
		'''
        for ep in range(episodes):
            state = self.env.reset()
            t = 0
            while True:
                if visualize:
                    self.env.render()
                action = self.act(discretize(state), episode=ep)
                '''
				TODO:
				1) Run the enviroment with chosen action (action)
				2) call update Qs (don't forget to discretize states)
				3) S = S'
				'''
                raise NotImplementedError('Please implement the loop body')
                t += 1
                if done:
                    print("Episode {} finished after {} timesteps".format(
                        ep, t))
                    break
コード例 #2
0
    def learn(self, episodes, visualize=False):
        '''
		Learn best Q-values

		:param episodes: Number of episodes to learn
		:param visualize: if True, render environment when learning
		'''
        for ep in range(episodes):
            state = self.env.reset()
            t = 0
            while True:
                if visualize:
                    self.env.render()
                action = self.act(discretize(state), episode=ep)
                new_state, reward, done, info = self.env.step(action)
                self.updateQ(discretize(state), action, reward,
                             discretize(new_state))
                state = new_state
                t += 1
                if done:
                    print("Episode {} finished after {} timesteps".format(
                        ep, t))
                    break
コード例 #3
0
def edgeLink(M, Mag, Ori):

    # Multiply magnitude with
    binary_mag = np.multiply(M, Mag)

    row, column = binary_mag.shape

    edge_map = np.empty([row, column])

    # Declare low and high threshold
    threshold_low = 0.03 * np.amax(binary_mag)
    threshold_high = 2.5 * threshold_low

    # Set thresholds
    for j in range(0, column):
        for i in range(0, row):
            if binary_mag[i, j] > threshold_high:
                edge_map[i, j] = 1
            elif binary_mag[i, j] <= threshold_low:
                edge_map[i, j] = 0
            else:
                edge_map[i, j] = 0.5

    # Hysteresis
    for j in range(0, column):
        for i in range(0, row):
            # Start at strong edge
            if edge_map[i, j] == 0.5:

                pi = math.pi

                # Orientation of gradient
                angle = Ori[i, j]

                # Orientations of edge perpendicular to gradient direction
                angle_neg = discretize(angle - np.cos(angle))
                angle_pos = discretize(angle + np.cos(angle))

                neighbor_1 = 0.0
                neighbor_2 = 0.0
                row_1 = 0
                row_2 = 0
                column_1 = 0
                column_2 = 0

                if angle_neg == 0 or angle_neg == pi or angle_pos == 0 or angle_pos == pi:
                    if j + 1 < column:
                        neighbor_1 = edge_map[i][j + 1]
                        row_1 = i
                        column_1 = j + 1
                    if j - 1 >= 0:
                        neighbor_2 = edge_map[i][j - 1]
                        row_2 = i
                        column_2 = j - 1
                elif angle_neg == pi / 2 or angle_neg == 3 * pi / 2 or angle_pos == pi / 2 or angle_pos == 3 * pi / 2:
                    if i + 1 < row:
                        neighbor_1 = edge_map[i + 1][j]
                        row_1 = i + 1
                        column_1 = j
                    if i - 1 >= 0:
                        neighbor_2 = edge_map[i - 1][j]
                        row_2 = i - 1
                        column_2 = j
                elif angle_neg == pi / 4 or angle_neg == 5 * pi / 4 or angle_pos == pi / 4 or angle_pos == 5 * pi / 4:
                    if i + 1 < row and j + 1 < column:
                        neighbor_1 = edge_map[i + 1][j + 1]
                        row_1 = i + 1
                        column_1 = j + 1
                    if i - 1 >= 0 and j - 1 >= 0:
                        neighbor_2 = edge_map[i - 1][j - 1]
                        row_2 = i - 1
                        column_2 = j - 1
                elif angle_neg == 3 * pi / 4 or angle_neg == 7 * pi / 4 or angle_pos == 3 * pi / 4 or angle_pos == 7 * pi / 4:
                    if i - 1 >= 0 and j + 1 < column:
                        neighbor_1 = edge_map[i - 1][j + 1]
                        row_1 = i - 1
                        column_1 = j + 1
                    if i + 1 < row and j - 1 >= 0:
                        neighbor_2 = edge_map[i + 1][j - 1]
                        row_2 = i + 1
                        column_2 = j - 1

                ori_difference_1 = abs(Ori[i][j] - Ori[row_1][column_1])
                ori_difference_2 = abs(Ori[i][j] - Ori[row_2][column_2])

                if neighbor_1 < 1 and neighbor_2 < 1:
                    edge_map[i][j] = 0
                elif neighbor_1 == 1 and neighbor_2 < 1:
                    if ori_difference_1 >= threshold_low and ori_difference_1 <= threshold_high:
                        edge_map[i][j] = 1
                    else:
                        edge_map[i][j] = 0
                elif neighbor_2 == 1 and neighbor_1 < 1:
                    if ori_difference_2 >= threshold_low and ori_difference_2 <= threshold_high:
                        edge_map[i][j] = 1
                    else:
                        edge_map[i][j] = 0
                elif neighbor_1 == 1 and neighbor_2 == 1:
                    if ori_difference_1 >= threshold_low and ori_difference_1 <= threshold_high or ori_difference_2 >= threshold_low and ori_difference_2 <= threshold_high:
                        edge_map[i][j] = 1
                    else:
                        edge_map[i][j] = 0

    # Final for loop to clear weak values
    for j in range(0, column):
        for i in range(0, row):
            if edge_map[i][j] == 0.5:
                edge_map[i][j] = 0

    return edge_map
コード例 #4
0
 def preprocess_state(self, state):
     """Map a continuous state to its discretized representation."""
     return tuple(discretize(state,self.state_grid))
コード例 #5
0
ファイル: main.py プロジェクト: Omar1986-prog/demo1986
TRIALS = 10
######## CONSTANTS ########

env = gym.make('MountainCar-v0')
agent = QLAgent(env=env,
                n_states=N_STATES,
                n_actions=N_ACTIONS,
                lr=0.5,
                discount=0.95)

# Train agent
agent.learn(episodes=EPISODES)

# Evaluate
success = 0
for tr in range(TRIALS):
    state = env.reset()
    t = 0
    while True:
        env.render()
        action = agent.act(discretize(state))
        state, reward, done, info = env.step(action)
        t += 1
        if done:
            print("Trial {} finished after {} timesteps".format(tr, t))
            if t < 200:
                success += 1
            break
print("Success: %d/%d" % (success, TRIALS))

env.close()