def dump_scatterplot(self, z, epoch):
     try:
         import matplotlib.pyplot as plt
     except ImportError:
         logger.log(__file__ + ": Unable to load matplotlib. Consider "
                    "setting do_scatterplot to False")
         return
     dim_and_stds = [(i, np.std(z[:, i])) for i in range(z.shape[1])]
     dim_and_stds = sorted(dim_and_stds, key=lambda x: x[1])
     dim1 = dim_and_stds[-1][0]
     dim2 = dim_and_stds[-2][0]
     plt.figure(figsize=(8, 8))
     plt.scatter(z[:, dim1], z[:, dim2], marker='o', edgecolor='none')
     if self.model.dist_mu is not None:
         x1 = self.model.dist_mu[dim1:dim1 + 1]
         y1 = self.model.dist_mu[dim2:dim2 + 1]
         x2 = (self.model.dist_mu[dim1:dim1 + 1] +
               self.model.dist_std[dim1:dim1 + 1])
         y2 = (self.model.dist_mu[dim2:dim2 + 1] +
               self.model.dist_std[dim2:dim2 + 1])
     plt.plot([x1, x2], [y1, y2], color='k', linestyle='-', linewidth=2)
     axes = plt.gca()
     axes.set_xlim([-6, 6])
     axes.set_ylim([-6, 6])
     axes.set_title('dim {} vs dim {}'.format(dim1, dim2))
     plt.grid(True)
     save_file = osp.join(self.log_dir, 'scatter%d.png' % epoch)
     plt.savefig(save_file)
Example #2
0
    def evaluate(self, epoch):
        """
        Perform evaluation for this algorithm.

        :param epoch: The epoch number.
        """
        statistics = OrderedDict()

        train_batch = self.get_batch()
        statistics.update(self._statistics_from_batch(train_batch, "Train"))

        logger.log("Collecting samples for evaluation")
        test_paths = self._sample_eval_paths()
        statistics.update(
            get_generic_path_information(
                test_paths,
                stat_prefix="Test",
            ))
        statistics.update(self._statistics_from_paths(test_paths, "Test"))
        average_returns = get_average_returns(test_paths)
        statistics['AverageReturn'] = average_returns

        statistics['Epoch'] = epoch

        for key, value in statistics.items():
            logger.record_tabular(key, value)

        self.env.log_diagnostics(test_paths)
        logger.dump_tabular(with_prefix=False, with_timestamp=False)
Example #3
0
 def get_action(self, obs):
     if self.last_solution is None:
         self.last_solution = np.hstack((np.tile(obs,
                                                 self.planning_horizon), ))
     self.constraints['args'] = (obs, )
     result = optimize.minimize(
         self.cost_function,
         self.last_solution,
         jac=self.cost_jacobian,
         constraints=self.constraints,
         method='SLSQP',
         options=self.solver_params,
         bounds=self.bounds,
     )
     next_goal_state = result.x[:self.observation_dim]
     action = self.get_np_action(obs, next_goal_state)
     if np.isnan(action).any():
         logger.log("WARNING: SLSQP returned nan. Adding noise to last "
                    "action")
         action = self.last_solution[:self.action_dim] + np.random.uniform(
             self.env.action_space.low,
             self.env.action_space.high,
         ) / 100
     else:
         self.last_solution = result.x
     return action, {}
Example #4
0
    def evaluate(self, epoch, eval_paths=None):
        statistics = OrderedDict()
        statistics.update(self.eval_statistics)

        logger.log("Collecting samples for evaluation")
        if eval_paths:
            test_paths = eval_paths
        else:
            test_paths = self.get_eval_paths()
        statistics.update(
            eval_util.get_generic_path_information(
                test_paths,
                stat_prefix="Test",
            ))
        # if len(self._exploration_paths) > 0:
        #     statistics.update(eval_util.get_generic_path_information(
        #         self._exploration_paths, stat_prefix="Exploration",
        #     ))
        if hasattr(self.env, "log_diagnostics"):
            self.env.log_diagnostics(test_paths, logger=logger)
        if hasattr(self.env, "get_diagnostics"):
            statistics.update(self.env.get_diagnostics(test_paths))

        average_returns = eval_util.get_average_returns(test_paths)
        statistics['AverageReturn'] = average_returns
        for key, value in statistics.items():
            logger.record_tabular(key, value)
        self.need_to_update_eval_statistics = True
Example #5
0
    def _try_to_eval(self, epoch, eval_paths=None):
        logger.save_extra_data(self.get_extra_data_to_save(epoch))

        params = self.get_epoch_snapshot(epoch)
        logger.save_itr_params(epoch, params)

        if self._can_evaluate():
            self.evaluate(epoch, eval_paths=eval_paths)

            # params = self.get_epoch_snapshot(epoch)
            # logger.save_itr_params(epoch, params)
            table_keys = logger.get_table_key_set()
            if self._old_table_keys is not None:
                assert table_keys == self._old_table_keys, (
                    "Table keys cannot change from iteration to iteration."
                )
            self._old_table_keys = table_keys

            logger.record_tabular(
                "Number of train steps total",
                self._n_train_steps_total,
            )
            logger.record_tabular(
                "Number of env steps total",
                self._n_env_steps_total,
            )
            logger.record_tabular(
                "Number of rollouts total",
                self._n_rollouts_total,
            )

            if self.collection_mode != 'online-parallel':
                times_itrs = gt.get_times().stamps.itrs
                train_time = times_itrs['train'][-1]
                sample_time = times_itrs['sample'][-1]
                if 'eval' in times_itrs:
                    eval_time = times_itrs['eval'][-1] if epoch > 0 else -1
                else:
                    eval_time = -1
                epoch_time = train_time + sample_time + eval_time
                total_time = gt.get_times().total

                logger.record_tabular('Train Time (s)', train_time)
                logger.record_tabular('(Previous) Eval Time (s)', eval_time)
                logger.record_tabular('Sample Time (s)', sample_time)
                logger.record_tabular('Epoch Time (s)', epoch_time)
                logger.record_tabular('Total Train Time (s)', total_time)
            else:
                logger.record_tabular('Epoch Time (s)',
                                      time.time() - self._epoch_start_time)
            logger.record_tabular("Epoch", epoch)
            logger.dump_tabular(with_prefix=False, with_timestamp=False)
        else:
            logger.log("Skipping eval for now.")
Example #6
0
 def _try_to_offline_eval(self, epoch):
     start_time = time.time()
     logger.save_extra_data(self.get_extra_data_to_save(epoch))
     self.offline_evaluate(epoch)
     params = self.get_epoch_snapshot(epoch)
     logger.save_itr_params(epoch, params)
     table_keys = logger.get_table_key_set()
     if self._old_table_keys is not None:
         assert table_keys == self._old_table_keys, (
             "Table keys cannot change from iteration to iteration.")
     self._old_table_keys = table_keys
     logger.dump_tabular(with_prefix=False, with_timestamp=False)
     logger.log("Eval Time: {0}".format(time.time() - start_time))
Example #7
0
 def train(self):
     self.fix_data_set()
     logger.log("Done creating dataset.")
     num_batches_total = 0
     for epoch in range(self.num_epochs):
         for _ in range(self.num_batches_per_epoch):
             self.qf.train(True)
             self._do_training()
             num_batches_total += 1
         logger.push_prefix('Iteration #%d | ' % epoch)
         self.qf.train(False)
         self.evaluate(epoch)
         params = self.get_epoch_snapshot(epoch)
         logger.save_itr_params(epoch, params)
         logger.log("Done evaluating")
         logger.pop_prefix()
Example #8
0
 def render(self):
     logger.push_prefix("HighLow(sign={0})\t".format(self._sign))
     if self._last_action is None:
         logger.log("No action taken.")
     else:
         if self._last_t == 0:
             logger.log("--- New Episode ---")
         logger.push_prefix("t={0}\t".format(self._last_t))
         with np_print_options(precision=4, suppress=False):
             logger.log("Action: {0}".format(self._last_action, ))
         logger.log("Reward: {0}".format(self._last_reward, ))
         logger.pop_prefix()
     logger.pop_prefix()
Example #9
0
    def evaluate(self, epoch, eval_paths=None):
        statistics = OrderedDict()
        statistics.update(self.eval_statistics)

        logger.log("Collecting samples for evaluation")
        if eval_paths:
            test_paths = eval_paths
        else:
            test_paths = self.get_eval_paths()
        statistics.update(
            eval_util.get_generic_path_information(
                test_paths,
                stat_prefix="Test",
            ))
        if len(self._exploration_paths) > 0:
            statistics.update(
                eval_util.get_generic_path_information(
                    self._exploration_paths,
                    stat_prefix="Exploration",
                ))
        if hasattr(self.env, "log_diagnostics"):
            self.env.log_diagnostics(test_paths, logger=logger)
        if hasattr(self.env, "get_diagnostics"):
            statistics.update(self.env.get_diagnostics(test_paths))

        if hasattr(self.eval_policy, "log_diagnostics"):
            self.eval_policy.log_diagnostics(test_paths, logger=logger)
        if hasattr(self.eval_policy, "get_diagnostics"):
            statistics.update(self.eval_policy.get_diagnostics(test_paths))

        process = psutil.Process(os.getpid())
        statistics['RAM Usage (Mb)'] = int(process.memory_info().rss / 1000000)

        statistics['Exploration Policy Noise'] = self._exploration_policy_noise

        average_returns = eval_util.get_average_returns(test_paths)
        statistics['AverageReturn'] = average_returns
        for key, value in statistics.items():
            logger.record_tabular(key, value)
        self.need_to_update_eval_statistics = True
Example #10
0
    def train(self):
        for epoch in range(self.num_epochs):
            logger.push_prefix('Iteration #%d | ' % epoch)

            start_time = time.time()
            for _ in range(self.num_steps_per_epoch):
                batch = self.get_batch()
                train_dict = self.get_train_dict(batch)

                self.policy_optimizer.zero_grad()
                policy_loss = train_dict['Policy Loss']
                policy_loss.backward()
                self.policy_optimizer.step()
            logger.log("Train time: {}".format(time.time() - start_time))

            start_time = time.time()
            self.evaluate(epoch)
            logger.log("Eval time: {}".format(time.time() - start_time))

            params = self.get_epoch_snapshot(epoch)
            logger.save_itr_params(epoch, params)
            logger.pop_prefix()
def run_task(variant):
    from railrl.core import logger
    print(variant)
    logger.log("Hello from script")
    logger.log("variant: " + str(variant))
    logger.record_tabular("value", 1)
    logger.dump_tabular()
    logger.log("snapshot_dir:", logger.get_snapshot_dir())
Example #12
0
    def evaluate(self, epoch, exploration_paths):
        """
        Perform evaluation for this algorithm.

        :param epoch: The epoch number.
        :param exploration_paths: List of dicts, each representing a path.
        """
        logger.log("Collecting samples for evaluation")
        paths = self._sample_eval_paths(epoch)
        statistics = OrderedDict()

        statistics.update(self._statistics_from_paths(paths, "Test"))
        statistics.update(self._get_other_statistics())
        statistics.update(self._statistics_from_paths(exploration_paths,
                                                      "Exploration"))

        statistics['AverageReturn'] = get_average_returns(paths)
        statistics['Epoch'] = epoch

        for key, value in statistics.items():
            logger.record_tabular(key, value)

        self.log_diagnostics(paths)
Example #13
0
def train_amortized_goal_chooser(
    goal_chooser,
    goal_conditioned_model,
    argmax_q,
    discount,
    replay_buffer,
    learning_rate=1e-3,
    batch_size=32,
    num_updates=1000,
):
    def get_loss(training=False):
        buffer = replay_buffer.get_replay_buffer(training)
        batch = buffer.random_batch(batch_size)
        obs = ptu.np_to_var(batch['observations'], requires_grad=False)
        goals = ptu.np_to_var(batch['goal_states'], requires_grad=False)
        goal = goal_chooser(obs, goals)
        actions = argmax_q(obs, goal, discount)
        final_state_predicted = goal_conditioned_model(
            obs,
            actions,
            goal,
            discount,
        ) + obs
        rewards = goal_chooser.reward_function(final_state_predicted, goals)
        return -rewards.mean()

    discount = ptu.np_to_var(discount * np.ones((batch_size, 1)))
    optimizer = optim.Adam(goal_chooser.parameters(), learning_rate)
    for i in range(num_updates):
        optimizer.zero_grad()
        loss = get_loss()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            logger.log("Number updates: {}".format(i))
            logger.log("Train loss: {}".format(float(ptu.get_numpy(loss))))
            logger.log("Validation loss: {}".format(
                float(ptu.get_numpy(get_loss(training=False)))))
def example(num_seconds, launch_time):
    logger.log(torch.__version__)
    date_format = '%m/%d/%Y %H:%M:%S %Z'
    date = datetime.now(tz=pytz.utc)
    logger.log("start")
    logger.log('Saved launch time {}'.format(launch_time))
    logger.log('Current date & time is: {}'.format(date.strftime(date_format)))
    if torch.cuda.is_available():
        x = torch.randn(3)
        logger.log(str(x.to(ptu.device)))

    date = date.astimezone(timezone('US/Pacific'))
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))
    for i in range(num_seconds):
        logger.log("Tick, {}".format(i))
        time.sleep(1)
    logger.log("end")
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv
    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujoco_py")
Example #15
0
    logger.log("end")
    logger.log('Local date & time is: {}'.format(date.strftime(date_format)))

    logger.log("start mujoco")
    from gym.envs.mujoco import HalfCheetahEnv
    e = HalfCheetahEnv()
    img = e.sim.render(32, 32)
    logger.log(str(sum(img)))
    logger.log("end mujoco_py")


if __name__ == "__main__":
    # noinspection PyTypeChecker
    date_format = '%m/%d/%Y %H:%M:%S %Z'
    date = datetime.now(tz=pytz.utc)
    logger.log("start")
    variant = dict(
        num_seconds=10,
        launch_time=str(date.strftime(date_format)),
        logger_config=dict(),
        seed=4,
    )
    run_experiment(
        example,
        exp_name='here-no-doodad-example',
        mode='here_no_doodad',
        variant=variant,
        use_gpu=False,
        num_exps_per_instance=2,
    )
Example #16
0
 def _end_epoch(self):
     logger.log("Epoch Duration: {0}".format(
         time.time() - self._epoch_start_time
     ))
     logger.log("Started Training: {0}".format(self._can_train()))
     logger.pop_prefix()