def policy_rollout(agent, path, t_interval=1, timesteps=200):
    for j in range(1):
        robot = SwimmingRobot(a1=0, a2=0, t_interval=t_interval)
        xs = [robot.x]
        ys = [robot.y]
        thetas = [robot.theta]
        a1s = [robot.a1]
        a2s = [robot.a2]
        steps = [0]
        # robot.randomize_state(enforce_opposite_angle_signs=True)
        robot_params = []
        robot_param = [
            robot.x, robot.y, robot.theta,
            float(robot.a1),
            float(robot.a2), robot.a1dot, robot.a2dot
        ]
        robot_params.append(robot_param)
        print('Beginning', j + 1, 'th Policy Rollout')
        try:
            for i in range(timesteps):
                # rollout
                state = robot.state
                print('In', i + 1, 'th iteration the initial state is: ',
                      state)
                old_x = robot.x
                action = agent.choose_action(state)
                print('In', i + 1, 'th iteration the chosen action is: ',
                      action)
                robot.move(action=action)
                new_x = robot.x
                print('In', i + 1, 'th iteration, the robot moved ',
                      new_x - old_x, ' in x direction')

                # add values to lists
                xs.append(robot.x)
                ys.append(robot.y)
                thetas.append(robot.theta)
                a1s.append(robot.a1)
                a2s.append(robot.a2)
                steps.append(i + 1)
                robot_param = [
                    robot.x, robot.y, robot.theta,
                    float(robot.a1),
                    float(robot.a2), robot.a1dot, robot.a2dot
                ]
                robot_params.append(robot_param)

        except ZeroDivisionError as e:
            print(str(e), 'occured at ', j + 1, 'th policy rollout')

        # plotting
        make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=path)
        generate_csv(robot_params, path + "/policy_rollout.csv")
Beispiel #2
0
    def policy_rollout(self, timesteps=200, random_start=False):
        rollout_path = self.file_path + "/policy_rollout_results"
        if not os.path.exists(rollout_path):
            os.mkdir(rollout_path)
            os.chmod(rollout_path, 0o0777)

        reps = 5 if random_start else 1
        for i in range(reps):
            rep_path = rollout_path + "/" + str(reps)
            if not os.path.exists(rep_path):
                os.mkdir(rep_path)
                os.chmod(rep_path, 0o0777)

            self.robot_in_action.set_state(*self._robot_original_state)
            self.robot_in_action.x = 0
            self.robot_in_action.y = 0
            assert self.robot_in_action.state == self._robot_original_state, 'there is a problem with reset'

            if self.is_physical_robot:
                encoders = [self.robot_in_action.encoder_val]
            else:
                xs = [self.robot_in_action.x]
                ys = [self.robot_in_action.y]
                thetas = [self.robot_in_action.theta]
            a1s = [self.robot_in_action.a1]
            a2s = [self.robot_in_action.a2]
            steps = [0]
            if random_start:
                self.robot_in_action.randomize_state(
                    enforce_opposite_angle_signs=True)
            robot_params = []

            if self.is_physical_robot:
                robot_param = [
                    float(self.robot_in_action.encoder_val),
                    float(self.robot_in_action.a1),
                    float(self.robot_in_action.a2), self.robot_in_action.a1dot,
                    self.robot_in_action.a2dot
                ]
            else:
                robot_param = [
                    self.robot_in_action.x, self.robot_in_action.y,
                    self.robot_in_action.theta,
                    float(self.robot_in_action.a1),
                    float(self.robot_in_action.a2), self.robot_in_action.a1dot,
                    self.robot_in_action.a2dot
                ]
            robot_params.append(robot_param)
            print('Beginning Policy Rollout')
            try:
                for i in range(timesteps):
                    # rollout
                    state = self.robot_in_action.state
                    print('In', i + 1, 'th iteration the initial state is: ',
                          state)
                    if not self.is_physical_robot:
                        old_x = self.robot_in_action.x
                    action = self.choose_action(state)
                    print('In', i + 1, 'th iteration the chosen action is: ',
                          action)
                    self.robot_in_action.move(action=action)

                    if self.is_physical_robot:
                        displacement = self.robot_in_action.encoder_val
                    else:
                        displacement = self.robot_in_action.x - old_x
                    print('In', i + 1, 'th iteration, the robot moved ',
                          displacement, ' in x direction')

                    # add values to lists
                    if self.is_physical_robot:
                        encoders.append(self.robot_in_action.encoder_val)
                    else:
                        xs.append(self.robot_in_action.x)
                        ys.append(self.robot_in_action.y)
                        thetas.append(self.robot_in_action.theta)
                    a1s.append(self.robot_in_action.a1)
                    a2s.append(self.robot_in_action.a2)
                    steps.append(i + 1)
                    if self.is_physical_robot:
                        robot_param = [
                            float(self.robot_in_action.encoder_val),
                            float(self.robot_in_action.a1),
                            float(self.robot_in_action.a2),
                            self.robot_in_action.a1dot,
                            self.robot_in_action.a2dot
                        ]
                    else:
                        robot_param = [
                            self.robot_in_action.x, self.robot_in_action.y,
                            self.robot_in_action.theta,
                            float(self.robot_in_action.a1),
                            float(self.robot_in_action.a2),
                            self.robot_in_action.a1dot,
                            self.robot_in_action.a2dot
                        ]
                    robot_params.append(robot_param)

            except ZeroDivisionError as e:
                print(str(e), 'occured during policy rollout')

            # self.robot_in_action = None

            # plotting
            if self.is_physical_robot:
                make_rollout_graphs_physical(encoders,
                                             a1s,
                                             a2s,
                                             steps,
                                             path=rep_path)
            else:
                make_rollout_graphs(xs,
                                    ys,
                                    thetas,
                                    a1s,
                                    a2s,
                                    steps,
                                    path=rep_path)
            generate_csv(robot_params, rep_path + "/policy_rollout.csv")
Beispiel #3
0
    def perform_DQN(self):
        """
        :param agent: the RL agent
        :param batch_size: size of minibatch sampled from replay buffer
        :param C: network update frequency
        :return: agent, and other information about DQN
        """

        models_path = self.file_path + "/models"
        robot_data_path = self.file_path + "/robot_data"
        if not os.path.exists(models_path):
            os.mkdir(models_path)
            os.chmod(models_path, 0o0777)
        if not os.path.exists(robot_data_path):
            os.mkdir(robot_data_path)
            os.chmod(robot_data_path, 0o0777)

        avg_losses = []
        std_losses = []
        avg_rewards = []
        std_rewards = []
        avg_Qs = []
        std_Qs = []
        num_episodes = []

        try:
            # loop through each episodes
            for e in range(1, self.episodes + 1):

                # save model
                if e % (self.episodes / 10) == 0:
                    self.save_model(models_path, e)

                self.robot_in_action.set_state(*self._robot_original_state)
                self.robot_in_action.x = 0
                self.robot_in_action.y = 0
                assert self.robot_in_action.state == self._robot_original_state, 'there is a problem with reset'

                # theta = random.uniform(-pi/4, pi/4) if self.randomize_theta else 0
                # self.robot_in_action.theta = theta
                state = self.robot_in_action.state
                rewards = []
                losses = []
                Qs = []
                robot_params = []
                if self.is_physical_robot:
                    robot_param = [
                        float(self.robot_in_action.encoder_val),
                        float(self.robot_in_action.a1),
                        float(self.robot_in_action.a2),
                        self.robot_in_action.a1dot, self.robot_in_action.a2dot
                    ]
                else:
                    robot_param = [
                        self.robot_in_action.x, self.robot_in_action.y,
                        self.robot_in_action.theta,
                        float(self.robot_in_action.a1),
                        float(self.robot_in_action.a2),
                        self.robot_in_action.a1dot, self.robot_in_action.a2dot
                    ]
                robot_params.append(robot_param)

                # loop through each iteration
                for i in range(1, self.iterations + 1):
                    # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state)
                    action = self.choose_action(state, epsilon_greedy=True)
                    print(
                        'In {}th epsiode {}th iteration, the chosen action is: {}'
                        .format(e, i, action))
                    reward, next_state = self.act(action=action)
                    if self.is_physical_robot:
                        robot_param = [
                            float(self.robot_in_action.encoder_val),
                            float(self.robot_in_action.a1),
                            float(self.robot_in_action.a2),
                            self.robot_in_action.a1dot,
                            self.robot_in_action.a2dot
                        ]
                    else:
                        robot_param = [
                            self.robot_in_action.x, self.robot_in_action.y,
                            self.robot_in_action.theta,
                            float(self.robot_in_action.a1),
                            float(self.robot_in_action.a2),
                            self.robot_in_action.a1dot,
                            self.robot_in_action.a2dot
                        ]
                    robot_params.append(robot_param)
                    print('The reward is: {}'.format(reward))
                    rewards.append(reward)
                    # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state)
                    self.remember(state, action, reward, next_state)
                    state = next_state
                    if len(self.memory
                           ) > self.memory_size / self.memory_buffer_coef:
                        loss, Q = self.replay()
                        losses.append(loss)
                        Qs.append(Q)
                        print('The average loss is: {}'.format(loss))
                        print('The average Q is: {}'.format(Q))

                    if i % self.network_update_freq == 0:
                        self.update_model()

                num_episodes.append(e)
                avg_rewards.append(np.mean(rewards))
                std_rewards.append(np.std(rewards))
                avg_losses.append(np.mean(losses))
                std_losses.append(np.std(losses))
                avg_Qs.append(np.mean(Qs))
                std_Qs.append(np.std(Qs))

                # self.robot_in_action = None

                generate_csv(robot_params,
                             robot_data_path + "/episode {}.csv".format(e))

        except Exception as e:
            traceback.print_exc()

        finally:

            # save learning data
            learning_path = self.file_path + "/learning_results"
            if not os.path.exists(learning_path):
                os.mkdir(learning_path)
                os.chmod(learning_path, 0o0777)

            save_learning_data(learning_path, num_episodes, avg_rewards,
                               std_rewards, avg_losses, std_losses, avg_Qs,
                               std_Qs)
            return num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs
            # rollout
            state = robot.state
            print('In', i + 1, 'th iteration the initial state is: ', state)
            old_x = robot.x
            action = agent.choose_action(state)
            print('In', i + 1, 'th iteration the chosen action is: ', action)
            robot.move(action=action)
            new_x = robot.x
            print('In', i + 1, 'th iteration, the robot moved ', new_x - old_x,
                  ' in x direction')

            # add values to lists
            xs.append(robot.x)
            a1s.append(robot.a1)
            a2s.append(robot.a2)
            steps.append(i + 1)
            robot_param = [
                robot.x, robot.y, robot.theta,
                float(robot.a1),
                float(robot.a2), robot.a1dot, robot.a2dot
            ]
            robot_params.append(robot_param)

    except ZeroDivisionError as e:
        print(str(e), 'occured at ', j + 1, 'th policy rollout')

    # plotting
    make_graphs(xs, a1s, a2s, steps, j + 1, trial_name=TRIAL_NAME)
    generate_csv(robot_params, PATH + "/" + str(j + 1) + " th rollout.csv")
    try:
        for i in range(TIMESTEPS):

            # rollout
            state = robot.state
            print('In', i+1, 'th iteration the initial state is: ', state)
            old_x, old_y = robot.x, robot.y
            action = agent.choose_action(state)
            print('In', i+1, 'th iteration the chosen action is: ', action)
            robot.move(action=action)
            new_x, new_y = robot.x, robot.y
            print('In', i+1, 'th iteration, the robot moved ', sqrt((new_x-old_x) ** 2 + (new_y-old_y) ** 2))

            # add values to lists
            xs.append(robot.x)
            ys.append(robot.y)
            a1s.append(robot.a1)
            a2s.append(robot.a2)
            steps.append(i+1)
            robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot]
            robot_params.append(robot_param)

    except ZeroDivisionError as e:
        print(str(e), 'occured at ', j+1, 'th policy rollout')

    # plotting
    make_graphs(xs, ys, a1s, a2s, steps, j+1, trial_name=TRIAL_NAME)
    generate_csv(robot_params, "Trial" + TRIAL_NAME + str(j+1) + " th rollout "
                  + str(datetime.datetime.now()).replace(' ', '').replace(':','') + ".csv")

        x_pos.append(robot.x)
        y_pos.append(robot.y)
        thetas.append(robot.theta)
        times.append(t + 1)
        a1.append(robot.a1)
        a2.append(robot.a2)
        robot_param = [robot.x,
                       robot.y,
                       robot.theta,
                       float(robot.a1),
                       float(robot.a2),
                       robot.a1dot,
                       robot.a2dot]
        robot_params.append(robot_param)

    generate_csv(robot_params, "results/RobotTestResults/SwimmerIdealFluid/" + "result.csv")

    # view results
    # print('x positions are: ' + str(x_pos))
    # print('y positions are: ' + str(y_pos))
    # print('thetas are: ' + str(thetas))

    plt.plot(x_pos, y_pos)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('y vs x')
    plt.show()

    plt.plot(times, a1)
    plt.ylabel('a1 displacements')
    plt.show()
Beispiel #7
0
plt.plot(times, a2dots, plot_style, markersize=marker_size)
plt.ylabel('a2dot')
plt.xlabel('time')
plt.savefig(os.path.join(plots_dir, 'a2dot' + '.png'))
# plt.show()
plt.close()

plt.plot(times, ks, plot_style, markersize=marker_size)
plt.ylabel('k')
plt.xlabel('time')
plt.savefig(os.path.join(plots_dir, 'k' + '.png'))
# plt.show()
plt.close()

plt.plot(times, cs, plot_style, markersize=marker_size)
plt.ylabel('c')
plt.xlabel('time')
plt.savefig(os.path.join(plots_dir, 'c' + '.png'))
# plt.show()
plt.close()

plt.plot(times, a1ddots, plot_style, markersize=marker_size)
plt.ylabel('a1ddot')
plt.xlabel('time')
plt.savefig(os.path.join(plots_dir, 'a1ddot' + '.png'))
# plt.show()
plt.close()

csv_path = os.path.join(plots_dir, 'rollout.csv')
generate_csv(robot_params, csv_path)