コード例 #1
0
ax.legend(('1', '2', '3', '4'))
ax.set_ylabel('motor speeds, rad/s')
ax.grid('major')
ax.set_title('Commands')
M = control['cmd_moment']
ax = axes[1]
ax.plot(time, M[:, 0], 'r.', time, M[:, 1], 'g.', time, M[:, 2], 'b.')
ax.legend(('x', 'y', 'z'))
ax.set_ylabel('moment, N*m')
ax.grid('major')
T = control['cmd_thrust']
ax = axes[2]
ax.plot(time, T, 'k.')
ax.set_ylabel('thrust, N')
ax.set_xlabel('time, s')
ax.grid('major')

# 3D Paths
fig = plt.figure('3D Path')
ax = Axes3Ds(fig)
world.draw(ax)
ax.plot3D(state['x'][:, 0], state['x'][:, 1], state['x'][:, 2], 'b.')
ax.plot3D(flat['x'][:, 0], flat['x'][:, 1], flat['x'][:, 2], 'k')

# Animation (Slow)
# Instead of viewing the animation live, you may provide a .mp4 filename to save.
R = Rotation.from_quat(state['q']).as_dcm()
animate(time, state['x'], R, world=world, filename=None)

plt.show()
コード例 #2
0
        markeredgewidth=3,
        markerfacecolor='none')
world.draw_line(ax, flat['x'], color='black', linewidth=2)
world.draw_points(ax, state['x'], color='blue', markersize=4)
if collision_pts.size > 0:
    ax.plot(collision_pts[0, [0]],
            collision_pts[0, [1]],
            collision_pts[0, [2]],
            'rx',
            markersize=36,
            markeredgewidth=4)
ax.legend(handles=[
    Line2D([], [], color='black', linewidth=2, label='Trajectory'),
    Line2D([], [],
           color='blue',
           linestyle='',
           marker='.',
           markersize=4,
           label='Flight')
],
          loc='upper right')

# Animation (Slow)
#
# Instead of viewing the animation live, you may provide a .mp4 filename to save.

R = Rotation.from_quat(state['q']).as_dcm()
animate(sim_time, state['x'], R, world=world, filename=None, show_axes=True)

plt.show()
コード例 #3
0
def Qlearning(args):
    """
    The main Q-learning function, utilizing the functions implemented above.
    Need to change to choose actions of discretized action space
    """
    reward_list = []
    position_list = []
    success_list = []
    success = 0  # count of number of successes reached

    success_array_5 = 0

    args.log_permit = False

    for i in tqdm(range(args.max_episodes), position=0):
        # Initialize parameters
        done = False  # indicates whether the episode is done
        terminal = False  # indicates whether the episode is done AND the car has reached the flag (>=0.5 position)
        tot_reward = 0  # sum of total reward over a single
        state = args.start
        num_steps = 0
        path_length = 0
        path_list = []
        flag = 0
        delete_action_list = []
        print(f'\n Searching Likelihood: {args.epsilon}')

        while done != True and num_steps <= args.max_steps:
            # Determine next action
            path_list.append(
                (args.occ_map.index_to_metric_center(state)).tolist())
            action, _ = choose_action(args, state, args.epsilon)
            next_state, reward, done = step(args, state, action)
            # Update terminal
            terminal = (done and
                        (np.linalg.norm(next_state - args.goal) <= args.tol))
            # Update Q
            Q = get_target_Q(args, state, next_state, action, reward, terminal,
                             done)
            # Update tot_reward, state_disc, and success (if applicable)
            state_action_pair = get_pair(args, state, action)
            add_replace_element(args, state_action_pair, Q)

            tot_reward += reward
            path_length += np.linalg.norm(next_state - state)
            state = next_state
            if terminal: success += 1
            num_steps += 1

        time = np.zeros((len(path_list), ))
        for j in range(1, len(time)):
            time[j] = time[j - 1] + args.max_time / len(time)

        if args.animate_permit is True:
            position = np.asarray(path_list)
            rotation = np.full((len(time), 3, 3), np.identity(3))
            animate(args.st,
                    args.go,
                    time,
                    position,
                    rotation,
                    args.world,
                    filename='episode_' + str(i) + '.mp4',
                    show_axes=True)

        if terminal and path_length < args.best_path_length:
            args.best_path_length = path_length
            args.final_path = path_list

        args.dataloader = load_dataset(args.train_set,
                                       args.train_labels,
                                       batch_size=20)
        train(args)
        args.epsilon = update_epsilon(
            args.epsilon,
            args.decay_rate)  #Update level of epsilon using update_epsilon()

        # Track rewards
        reward_list.append(tot_reward)
        position_list.append(next_state.tolist())
        success_array_5 += success
        if i == 0 or i % 5 == 4:
            success_list.append(success_array_5 / 5)
            success_array_5 = 0
        success = 0
    return reward_list, position_list, success_list