def __init__(self, env, num_init_random_rollouts=10, max_rollout_length=500, num_onplicy_iters=10, num_onpolicy_rollouts=10, training_epochs=60, training_batch_size=512, render=False, mpc_horizon=15, num_random_action_selection=4096, nn_layers=1): self._env = env self._max_rollout_length = max_rollout_length self._num_onpolicy_iters = num_onplicy_iters self._num_onpolicy_rollouts = num_onpolicy_rollouts self._training_epochs = training_epochs self._training_batch_size = training_batch_size self._render = render logger.info('Gathering random dataset') self._random_dataset = self._gather_rollouts(utils.RandomPolicy(env), num_init_random_rollouts) logger.info('Creating policy') self._policy = ModelBasedPolicy( env, self._random_dataset, horizon=mpc_horizon, num_random_action_selection=num_random_action_selection) timeit.reset() timeit.start('total')
def _log(self, dataset): timeit.stop('total') dataset.log() logger.dump_tabular(print_func=logger.info) logger.debug('') for line in str(timeit).split('\n'): logger.debug(line) timeit.reset() timeit.start('total')
def _log(self, dataset): # stop timing timeit.stop('total') # print logging information dataset.log() logger.dump_tabular(print_func=logger.info) logger.debug('') for line in str(timeit).split('\n'): logger.debug(line) # reset timing timeit.reset() timeit.start('total')