Ejemplo n.º 1
0
    def __init__(self,
                 env,
                 num_init_random_rollouts=10,
                 max_rollout_length=500,
                 num_onplicy_iters=10,
                 num_onpolicy_rollouts=10,
                 training_epochs=60,
                 training_batch_size=512,
                 render=False,
                 mpc_horizon=15,
                 num_random_action_selection=4096,
                 nn_layers=1):
        self._env = env
        self._max_rollout_length = max_rollout_length
        self._num_onpolicy_iters = num_onplicy_iters
        self._num_onpolicy_rollouts = num_onpolicy_rollouts
        self._training_epochs = training_epochs
        self._training_batch_size = training_batch_size
        self._render = render

        logger.info('Gathering random dataset')
        self._random_dataset = self._gather_rollouts(utils.RandomPolicy(env),
                                                     num_init_random_rollouts)

        logger.info('Creating policy')
        self._policy = ModelBasedPolicy(
            env,
            self._random_dataset,
            horizon=mpc_horizon,
            num_random_action_selection=num_random_action_selection,
            nn_layers=nn_layers)

        timeit.reset()
        timeit.start('total')
Ejemplo n.º 2
0
    def run_q1(self):
        """
        Train on a dataset, and see how good the learned dynamics model's predictions are.

        implementation details:
            (i) Train using the self._random_dataset
            (ii) For each rollout, use the initial state and all actions to predict the future states.
                 Store these predicted states in the pred_states list.
                 NOTE: you should *not* be using any of the states in states[1:]. Only use states[0]
            (iii) After predicting the future states, we have provided plotting code that plots the actual vs
                  predicted states and saves these to the experiment's folder. You do not need to modify this code.
        """
        logger.info('Training policy....')
        ### PROBLEM 1
        ### YOUR CODE HERE
        # raise NotImplementedError
        self._train_policy(self._random_dataset)

        logger.info('Evaluating predictions...')
        for r_num, (states, actions, _, _,
                    _) in enumerate(self._random_dataset.rollout_iterator()):
            pred_states = []

            ### PROBLEM 1
            ### YOUR CODE HERE
            # raise NotImplementedError
            state_i = np.asarray(states[0])
            action_i = np.asarray(actions[0])
            pred_states.append(state_i)
            for i in range(len(states) - 1):
                pred_state_i = self._policy.predict(state_i, action_i)
                state_i = pred_state_i
                action_i = np.asarray(actions[i + 1])
                pred_states.append(pred_state_i)

            states = np.asarray(states)
            pred_states = np.asarray(pred_states)

            state_dim = states.shape[1]
            rows = int(np.sqrt(state_dim))
            cols = state_dim // rows
            f, axes = plt.subplots(rows, cols, figsize=(3 * cols, 3 * rows))
            f.suptitle(
                'Model predictions (red) versus ground truth (black) for open-loop predictions'
            )
            for i, (ax, state_i, pred_state_i) in enumerate(
                    zip(axes.ravel(), states.T, pred_states.T)):
                ax.set_title('state {0}'.format(i))
                ax.plot(state_i, color='k')
                ax.plot(pred_state_i, color='r')
            plt.tight_layout()
            plt.subplots_adjust(top=0.90)
            f.savefig(os.path.join(logger.dir,
                                   'prediction_{0:03d}.png'.format(r_num)),
                      bbox_inches='tight')

        logger.info('All plots saved to folder')
Ejemplo n.º 3
0
    def run_bonus_q3(self):
        """
        Starting with the random dataset, train the policy on the dataset, gather rollouts with the policy,
        append the new rollouts to the existing dataset, and repeat
        """
        dataset = self._random_dataset

        itr = -1
        logger.info('Iteration {0}'.format(itr))
        logger.record_tabular('Itr', itr)
        self._log(dataset)

        for itr in range(self._num_onpolicy_iters + 1):
            logger.info('Iteration {0}'.format(itr))
            logger.record_tabular('Itr', itr)

            ### PROBLEM 3
            ### YOUR CODE HERE
            logger.info('Training policy...')
            # raise NotImplementedError
            self._train_policy(dataset)

            ### PROBLEM 3
            ### YOUR CODE HERE
            logger.info('Gathering rollouts...')
            # raise NotImplementedError
            new_dataset = self._gather_rollouts_cross_entropy(
                self._policy, self._num_onpolicy_rollouts)

            ### PROBLEM 3
            ### YOUR CODE HERE
            logger.info('Appending dataset...')
            # raise NotImplementedError
            dataset.append(new_dataset)

            self._log(new_dataset)
Ejemplo n.º 4
0
    def run_bonus_q2(self):
        """
        Train the model-based policy on a random dataset, and evaluate the performance of the resulting policy
        """
        logger.info('Random policy')
        self._log(self._random_dataset)

        logger.info('Training policy....')
        ### PROBLEM 2
        ### YOUR CODE HERE
        # raise NotImplementedError
        self._train_policy(self._random_dataset)

        logger.info('Evaluating policy...')
        ### PROBLEM 2
        ### YOUR CODE HERE
        # raise NotImplementedError
        eval_dataset = self._gather_rollouts_cross_entropy(
            self._policy, self._num_onpolicy_rollouts)

        logger.info('Trained policy')
        self._log(eval_dataset)
Ejemplo n.º 5
0
    def run_test(self):
        logger.info('Training policy....')
        ### PROBLEM 1
        ### YOUR CODE HERE
        # raise NotImplementedError
        self._train_policy(self._random_dataset)

        logger.info('Evaluating predictions...')
        for r_num, (states, actions, _, rewards,
                    _) in enumerate(self._random_dataset.rollout_iterator()):
            pred_states = []

            ### PROBLEM 1
            ### YOUR CODE HERE
            # raise NotImplementedError
            state_i = np.asarray(states[0])
            action_i = np.asarray(actions[0])
            pred_states.append(state_i)
            for i in range(len(states) - 1):
                pred_state_i = self._policy.predict(state_i, action_i)
                state_i = pred_state_i
                action_i = np.asarray(actions[i + 1])
                pred_states.append(pred_state_i)

            states = np.asarray(states)
            pred_states = np.asarray(pred_states)

            state_dim = states.shape[1]
            rows = int(np.sqrt(state_dim))
            cols = state_dim // rows
            f, axes = plt.subplots(rows, cols, figsize=(3 * cols, 3 * rows))
            f.suptitle(
                'Model predictions (red) versus ground truth (black) for open-loop predictions'
            )
            for i, (ax, state_i, pred_state_i) in enumerate(
                    zip(axes.ravel(), states.T, pred_states.T)):
                ax.set_title('state {0}'.format(i))
                ax.plot(state_i, color='k')
                ax.plot(pred_state_i, color='r')
            plt.tight_layout()
            plt.subplots_adjust(top=0.90)
            f.savefig(os.path.join(logger.dir,
                                   'prediction_{0:03d}.png'.format(r_num)),
                      bbox_inches='tight')

        logger.info('All plots saved to folder')