def evaluate_policy(env,
                    policy,
                    step,
                    L,
                    num_episodes,
                    num_eval_timesteps,
                    video_dir=None,
                    metric=None,
                    show=False):
    returns = []
    start = time.time()
    for i in range(num_episodes):
        # print(f"Eval episode: {i}...")
        # video = VideoRecorder(env, enabled=video_dir is not None and i == 0)
        s = 0
        states = []
        actions = []
        obs = env.reset()
        done = False
        total_reward = 0
        while (not done) and (s < num_eval_timesteps):
            with torch.no_grad():
                with eval_mode(policy):
                    action = policy.select_action(obs)

            action_scale = env.action_space.high * (action + 1) / 2
            obs, reward, done, _ = env.step(action_scale)
            states.append(obs)
            actions.append(action_scale)
            if metric is not None:
                reward = metric(obs, action)
            # video.record()
            total_reward += reward
            s += 1
        returns.append(total_reward)  #/len(states))

        if show:
            plot_rollout(states, actions, pry=[1, 0, 2])
        else:
            plot_rollout(states,
                         actions,
                         pry=[1, 0, 2],
                         save=True,
                         loc=f"/{str(step)}")

    end = time.time()
    print(f"Rollout in {end - start} s, logged {len(states)}")
    L.info(f" - - Evaluated, mean reward {np.mean(returns)}, n={num_episodes}")
    return returns
    def basic_rollout(self, s0, i_model, plot=False):
        # log.info(f"Running rollout from Euler angles Y:{s0[2]}, P:{s0[0]}, R:{s0[1]}, ")
        state_log = []
        action_log = []

        max_len = self.b_cfg.max_length
        cur_action, update = self.policy.get_action(s0)

        state_log.append(s0)
        action_log.append(cur_action)

        next_state, logvars = smart_model_step(i_model, s0, cur_action)
        state = push_history(next_state, s0)
        cost = 0
        for k in range(max_len):
            # print(f"Itr {k}")
            # print(f"Action {cur_action.tolist()}")
            # print(f"State {next_state.tolist()}")
            cur_action, update = self.policy.get_action(next_state)

            state_log.append(state)
            action_log.append(cur_action)

            next_state, logvars = smart_model_step(i_model, state, cur_action)
            state = push_history(next_state, state)
            # print(f"logvars {logvars}")
            # weight = 0 if k < 5 else 1
            if k == (max_len - 1):
                weight = self.t_c
            else:
                weight = self.l_c / max_len
            # cost += weight * get_reward_euler(next_state, cur_action)
            cost += get_reward_euler(next_state,
                                     cur_action,
                                     pry=self.cfg.pid.params.pry)

        if plot:
            plot_rollout(state_log,
                         np.stack(action_log).squeeze(),
                         pry=[self.cfg.pid.params.pry])

        return cost / self.norm_cost, [state_log,
                                       action_log]  # / max_len  # cost
Example #3
0
def mpc(cfg):
    log.info("============= Configuration =============")
    log.info(f"Config:\n{cfg.pretty()}")
    log.info("=========================================")

    env_name = cfg.env.params.name
    env = gym.make(env_name)
    env.reset()
    full_rewards = []

    if cfg.metric.name == 'Living':
        metric = living_reward
    elif cfg.metric.name == 'Rotation':
        metric = rotation_mat
    elif cfg.metric.name == 'Square':
        metric = squ_cost
    elif cfg.metric.name == 'Yaw':
        metric = yaw_r
    else:
        raise ValueError("Improper metric name passed")

    for s in range(cfg.experiment.seeds):
        log.info(f"Random Seed: {s}")
        total_costs = []
        data_rand = []
        total_steps = []
        r = 0
        while r < cfg.experiment.random:
            data_r = rollout(env, RandomController(env, cfg), cfg.experiment, metric=metric)
            plot_rollout(data_r[0], data_r[1], pry=cfg.pid.params.pry, save=cfg.save, loc=f"/R_{r}")
            rews = data_r[-2]
            sim_error = data_r[-1]
            if sim_error:
                print("Repeating strange simulation")
                continue
            # rand_costs.append(np.sum(rews) / len(rews))  # for minimization
            total_costs.append(np.sum(rews))  # for minimization
            # log.info(f" - Cost {np.sum(rews) / cfg.experiment.r_len}")
            r += 1

            # data_sample = subsample(data_r, cfg.policy.params.period)
            data_rand.append(data_r)
            total_steps.append(0)

        X, dX, U = to_XUdX(data_r)
        X, dX, U = combine_data(data_rand[:-1], (X, dX, U))
        msg = "Random Rollouts completed of "
        msg += f"Mean Cumulative reward {np.mean(total_costs)}, "
        msg += f"Mean Flight length {cfg.policy.params.period * np.mean([np.shape(d[0])[0] for d in data_rand])}"
        log.info(msg)

        trial_log = dict(
            env_name=cfg.env.params.name,
            model=None,
            seed=cfg.random_seed,
            raw_data=data_rand,
            trial_num=-1,
            rewards=total_costs,
            steps=total_steps,
            nll=None,
        )
        save_log(cfg, -1, trial_log)

        model, train_log = train_model(X, U, dX, cfg.model)

        for i in range(cfg.experiment.num_roll-cfg.experiment.random):
            controller = MPController(env, model, cfg)

            r = 0
            cum_costs = []
            data_rs = []
            while r < cfg.experiment.repeat:
                data_r = rollout(env, controller, cfg.experiment, metric=metric)
                plot_rollout(data_r[0], data_r[1], pry=cfg.pid.params.pry, save=cfg.save, loc=f"/{str(i)}_{r}")
                rews = data_r[-2]
                sim_error = data_r[-1]

                if sim_error:
                    print("Repeating strange simulation")
                    continue
                # cum_costs.append(np.sum(rews) / len(rews))  # for minimization
                total_costs.append(np.sum(rews))  # for minimization
                # log.info(f" - Cost {np.sum(rews) / cfg.experiment.r_len}")
                r += 1

                # data_sample = subsample(data_r, cfg.policy.params.period)
                data_rs.append(data_r)
                total_steps.append(np.shape(X)[0])

            X, dX, U = combine_data(data_rs, (X, dX, U))
            msg = "Rollouts completed of "
            msg += f"Mean Cumulative reward {np.mean(total_costs)}, " #/ cfg.experiment.r_len
            msg += f"Mean Flight length {cfg.policy.params.period * np.mean([np.shape(d[0])[0] for d in data_rs])}"
            log.info(msg)

            trial_log = dict(
                env_name=cfg.env.params.name,
                model=model,
                seed=cfg.random_seed,
                raw_data=data_rs,
                trial_num=i,
                rewards=total_costs,
                steps=total_steps,
                nll=train_log,
            )
            save_log(cfg, i, trial_log)

            model, train_log = train_model(X, U, dX, cfg.model)

        fig = plot_rewards_over_trials(np.transpose(np.stack([total_costs])), env_name, save=True)
        fig.write_image(os.getcwd() + "/learning-curve.pdf")
def mpc(cfg):
    log.info("============= Configuration =============")
    log.info(f"Config:\n{cfg.pretty()}")
    log.info("=========================================")

    # plot_results_yaw(pts=cfg.data)
    # quit()

    env_name = cfg.env.params.name
    env = gym.make(env_name)
    env.reset()

    env.seed(cfg.random_seed, inertial=cfg.experiment.inertial)
    if cfg.experiment.inertial:
        log.info(
            f"Running experiment with interial prop x:{env.Ixx}, y:{env.Iyy}")

    # full_rewards = []
    # temp = hydra.utils.get_original_cwd() + '/outputs/2020-07-11/17-17-05/trial_3.dat'
    # dat = torch.load(temp)
    # actions = dat['raw_data'][0][1]
    # l = []
    #
    # yaw_actions = np.array([
    #     [1500, 1500, 1500, 1500],
    #     [2000, 1000, 1000, 2000],
    #     [1000, 2000, 2000, 1000],
    #     [2000, 2000, 1000, 1000],
    #     [1000, 1000, 2000, 2000],
    # ])
    #
    # def find_ind(arr):
    #     if np.all(np.equal(arr, [1500, 1500, 1500, 1500])):
    #         return 0
    #     elif np.all(np.equal(arr, [2000, 1000, 1000, 2000])):
    #         return 1
    #     elif np.all(np.equal(arr, [1000, 2000, 2000, 1000])):
    #         return 3
    #     elif np.all(np.equal(arr, [2000, 2000, 1000, 1000])):
    #         return 2
    #     else: # [1000, 1000, 2000, 2000]
    #         return 4
    #
    # for act in actions:
    #     act = act.numpy()
    #     id = find_ind(act)
    #     l.append(id)
    #
    # initial = l[:24]
    # states = dat['raw_data'][0][0][:25]
    # yaw_value = np.rad2deg(states[-1][0])-np.rad2deg(states[0][0])
    # print(f"Yaw after 25 steps{yaw_value}")
    # plot_lie(initial)
    # # plot_rollout(np.stack(dat['raw_data'][0][0])[:500,:3], dat['raw_data'][0][1], loc="/yaw_plt", save=True, only_x=True, legend=False)
    # quit()

    if cfg.metric.name == 'Living':
        metric = living_reward
        log.info(f"Using metric living reward")
    elif cfg.metric.name == 'Rotation':
        metric = rotation_mat
        log.info(f"Using metric rotation matrix")
    elif cfg.metric.name == 'Square':
        metric = squ_cost
        log.info(f"Using metric square cost")
    elif cfg.metric.name == 'Yaw':
        metric = yaw_r
        log.info(f"Using metric yaw sliding mode")
    elif cfg.metric.name == 'Yaw2':
        metric = yaw_r2
        log.info(f"Using metric yaw base")
    elif cfg.metric.name == 'Yaw3':
        metric = yaw_r3
        log.info(f"Using metric yaw rate")
    else:
        raise ValueError("Improper metric name passed")

    for s in range(cfg.experiment.seeds):
        log.info(f"Random Seed: {s}")
        total_costs = []
        data_rand = []
        total_steps = []
        r = 0
        while r < cfg.experiment.random:
            data_r = rollout(env,
                             RandomController(env, cfg),
                             cfg.experiment,
                             metric=metric)
            if env_name != 'CartPoleContEnv-v0':
                plot_rollout(data_r[0],
                             data_r[1],
                             pry=cfg.pid.params.pry,
                             save=cfg.save,
                             loc=f"/R_{r}")
            rews = data_r[-2]
            sim_error = data_r[-1]
            if sim_error:
                print("Repeating strange simulation")
                continue
            # rand_costs.append(np.sum(rews) / len(rews))  # for minimization
            total_costs.append(np.sum(rews))  # for minimization
            # log.info(f" - Cost {np.sum(rews) / cfg.experiment.r_len}")
            r += 1

            # data_sample = subsample(data_r, cfg.policy.params.period)
            data_rand.append(data_r)
            total_steps.append(0)

        X, dX, U = to_XUdX(data_r)
        X, dX, U = combine_data(data_rand[:-1], (X, dX, U))
        msg = "Random Rollouts completed of "
        msg += f"Mean Cumulative reward {np.mean(total_costs)}, "
        msg += f"Mean length {np.mean([len(a[0]) for a in data_rand])}"
        log.info(msg)
        last_yaw = np.max(np.abs(np.stack(data_r[0])[:, 2]))  #data_r[0][-1][2]

        trial_log = dict(
            env_name=cfg.env.params.name,
            # model=model,
            seed=cfg.random_seed,
            raw_data=data_r,
            # yaw_num=last_yaw,
            trial_num=-1,
            rewards=total_costs,
            steps=total_steps,
            # nll=train_log,
        )
        save_log(cfg, -1, trial_log)

        model, train_log = train_model(X.squeeze(), U, dX.squeeze(), cfg.model)

        for i in range(cfg.experiment.num_roll - cfg.experiment.random):
            controller = MPController(env, model, cfg)

            r = 0
            # cum_costs = []
            data_rs = []
            while r < cfg.experiment.repeat:
                data_r = rollout(env,
                                 controller,
                                 cfg.experiment,
                                 metric=metric)
                plot_rollout(data_r[0],
                             data_r[1],
                             pry=cfg.pid.params.pry,
                             save=cfg.save,
                             loc=f"/{str(i)}_{r}")
                rews = data_r[-2]
                sim_error = data_r[-1]

                if sim_error:
                    print("Repeating strange simulation")
                    continue
                # cum_costs.append(np.sum(rews) / len(rews))  # for minimization
                total_costs.append(np.sum(rews))  # for minimization
                # log.info(f" - Cost {np.sum(rews) / cfg.experiment.r_len}")
                r += 1

                # data_sample = subsample(data_r, cfg.policy.params.period)
                data_rs.append(data_r)
                total_steps.append(np.shape(X)[0])

            X, dX, U = combine_data(data_rs, (X, dX, U))
            msg = "Rollouts completed of "
            msg += f"Cumulative reward {total_costs[-1]}, "  # / cfg.experiment.r_len
            msg += f"length {len(data_r[0])}"
            # log.info(f"Final yaw {180*np.array(data_r[0][-1][2])/np.pi}")
            log.info(msg)
            last_yaw = np.max(np.abs(np.stack(
                data_r[0])[:, 2]))  #data_r[0][-1][2]

            trial_log = dict(
                env_name=cfg.env.params.name,
                # model=model,
                seed=cfg.random_seed,
                raw_data=data_r,
                # yaw_num=last_yaw,
                trial_num=i,
                rewards=total_costs,
                steps=total_steps,
                nll=train_log,
            )
            save_log(cfg, i, trial_log)

            model, train_log = train_model(X, U, dX, cfg.model)

        fig = plot_rewards_over_trials(np.transpose(np.stack([total_costs])),
                                       env_name,
                                       save=True)
        fig.write_image(os.getcwd() + "/learning-curve.pdf")