Esempio n. 1
0
    def _trigger(self):
        # log player statistics in training
        v = self._player_scores
        dist = self._player_distError
        try:
            mean, max = v.average, v.max
            self.trainer.monitors.put_scalar('expreplay/mean_score', mean)
            self.trainer.monitors.put_scalar('expreplay/max_score', max)
            mean, max = dist.average, dist.max
            self.trainer.monitors.put_scalar('expreplay/mean_dist', mean)
            self.trainer.monitors.put_scalar('expreplay/max_dist', max)
        except Exception:
            logger.exception("Cannot log training scores.")
        v.reset()
        dist.reset()

        # monitor number of played games and successes of reaching the target
        if self.player.num_games.count:
            self.trainer.monitors.put_scalar(
                'n_games', np.asscalar(self.player.num_games.sum))
        else:
            self.trainer.monitors.put_scalar('n_games', 0)

        if self.player.num_success.count:
            self.trainer.monitors.put_scalar(
                'n_success', np.asscalar(self.player.num_success.sum))
            self.trainer.monitors.put_scalar(
                'n_success_ratio',
                self.player.num_success.sum / self.player.num_games.sum)
        else:
            self.trainer.monitors.put_scalar('n_success', 0)
            self.trainer.monitors.put_scalar('n_success_ratio', 0)
        # reset stats
        self.player.reset_stat()
Esempio n. 2
0
    def run(self):
        with self.default_sess():
            try:
                self.reinitialize_dataflow()
                while True:
                    # pausable loop
                    if not self._running.is_set():
                        self._running.wait()

                    dp = next(self._itr)
                    feed = _make_feeds(self.placehdrs, dp)
                    # _, sz = sess.run([self.op, self._sz], feed_dict=feed)
                    self.op.run(feed_dict=feed)
            except (tf.errors.CancelledError, tf.errors.OutOfRangeError):
                pass
                # logger.exception("Exception in {}:".format(self.name))
            except Exception as e:
                if isinstance(e, RuntimeError) and 'closed Session' in str(e):
                    pass
                else:
                    logger.exception("Exception in {}:".format(self.name))
            finally:
                try:
                    self.close_op.run()
                except Exception:
                    pass
                logger.info("{} Exited.".format(self.name))
Esempio n. 3
0
 def _trigger(self):
     v = self._player_scores
     try:
         mean, max = v.average, v.max
         self.trainer.monitors.put_scalar('expreplay/mean_score', mean)
         self.trainer.monitors.put_scalar('expreplay/max_score', max)
     except:
         logger.exception("Cannot log training scores.")
     v.reset()
Esempio n. 4
0
 def _trigger(self):
     v = self._player_scores
     try:
         mean, max = v.average, v.max
         self.trainer.monitors.put_scalar('expreplay/mean_score', mean)
         self.trainer.monitors.put_scalar('expreplay/max_score', max)
     except Exception:
         logger.exception("Cannot log training scores.")
     v.reset()
Esempio n. 5
0
 def _trigger(self):
     for l in range (NUM_LEVELS):
         v = self._player_scores [l]
         try:
             mean, max = v.average, v.max
             self.trainer.monitors.put_scalar('expreplay/mean_score_' + str (l), mean)
             self.trainer.monitors.put_scalar('expreplay/max_score_' + str (l), max)
         except Exception:
             logger.exception("Cannot log training scores.")
         v.reset()
 def _trigger_epoch(self):
     # log player statistics in training
     stats = self.player.stats
     for k, v in six.iteritems(stats):
         try:
             mean, max = np.mean(v), np.max(v)
             self.trainer.monitors.put_scalar('expreplay/mean_' + k, mean)
             self.trainer.monitors.put_scalar('expreplay/max_' + k, max)
         except:
             logger.exception("Cannot log training scores.")
     self.player.reset_stat()
Esempio n. 7
0
 def _trigger(self):
     from simulator.tools import mean_score_logger
     v = self._player_scores
     try:
         mean, max = v.average, v.max
         logger.info('{} mean_score: {}'.format(self.agent_name, mean))
         mean_score_logger('{} mean_score: {}\n'.format(self.agent_name, mean))
         self.trainer.monitors.put_scalar('expreplay/mean_score', mean)
         self.trainer.monitors.put_scalar('expreplay/max_score', max)
     except Exception:
         logger.exception(self.agent_name + " Cannot log training scores.")
     v.reset()
Esempio n. 8
0
def eval_with_funcs(predictors, nr_eval, get_player_fn):
    """
    Args:
        predictors ([PredictorBase])
    """
    class Worker(StoppableThread, ShareSessionThread):
        def __init__(self, func, queue):
            super(Worker, self).__init__()
            self._func = func
            self.q = queue

        def func(self, *args, **kwargs):
            if self.stopped():
                raise RuntimeError("stopped!")
            return self._func(*args, **kwargs)

        def run(self):
            with self.default_sess():
                player = get_player_fn(train=False)
                while not self.stopped():
                    try:
                        score = play_one_episode(player, self.func)
                        # print("Score, ", score)
                    except RuntimeError:
                        return
                    self.queue_put_stoppable(self.q, score)

    q = queue.Queue()
    threads = [Worker(f, q) for f in predictors]

    for k in threads:
        k.start()
        time.sleep(0.1)  # avoid simulator bugs
    stat = StatCounter()
    try:
        for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()):
            r = q.get()
            stat.feed(r)
        logger.info("Waiting for all the workers to finish the last run...")
        for k in threads:
            k.stop()
        for k in threads:
            k.join()
        while q.qsize():
            r = q.get()
            stat.feed(r)
    except:
        logger.exception("Eval")
    finally:
        if stat.count > 0:
            return (stat.average, stat.max)
        return (0, 0)
Esempio n. 9
0
    def reset_stats(self):
        """
        Returns:
            mean, max: two stats of the runners, to be added to backend
        """
        scores = list(itertools.chain.from_iterable([v.total_scores for v in self._runners]))
        for v in self._runners:
            v.total_scores.clear()

        try:
            return np.mean(scores), np.max(scores)
        except Exception:
            logger.exception("Cannot compute total scores in EnvRunner.")
            return None, None
Esempio n. 10
0
    def _trigger(self):
        # log player statistics in training
        v = self._player_scores
        dist = self._player_distError
        for i in range(0, self.agents):
            try:
                mean, max = v[i].average, v[i].max
                self.trainer.monitors.put_scalar(
                    "expreplay/mean_score_{}".format(i), mean)
                self.trainer.monitors.put_scalar(
                    "expreplay/max_score_{}".format(i), max)
                mean, max = dist[i].average, dist[i].max
                self.trainer.monitors.put_scalar(
                    "expreplay/mean_dist_{}".format(i), mean)
                self.trainer.monitors.put_scalar(
                    "expreplay/max_dist_{}".format(i), max)
            except Exception:
                logger.exception("Cannot log training scores.")
            v[i].reset()
            dist[i].reset()

        # monitor number of played games and successes of reaching the target
        if self.player.num_games.count:
            self.trainer.monitors.put_scalar(
                "n_games", np.asscalar(self.player.num_games.sum))
        else:
            self.trainer.monitors.put_scalar("n_games", 0)

        for i in range(0, self.agents):

            if self.player.num_success[i].count:
                self.trainer.monitors.put_scalar(
                    "n_success_{}".format(i),
                    np.asscalar(self.player.num_success[i].sum),
                )
                self.trainer.monitors.put_scalar(
                    "n_success_ratio_{}".format(i),
                    self.player.num_success[i].sum / self.player.num_games.sum,
                )
            else:
                self.trainer.monitors.put_scalar("n_success_{}".format(i), 0)
                self.trainer.monitors.put_scalar(
                    "n_success_ratio_{}".format(i), 0)

        # reset stats
        self.player.reset_stat()
Esempio n. 11
0
    def _before_train(self):
        # graph is finalized, OK to write it now.
        time = datetime.now().strftime('%m%d-%H%M%S')
        self.saver.export_meta_graph(
            os.path.join(self.checkpoint_dir, 'graph-{}.meta'.format(time)),
            collection_list=self.graph.get_all_collection_keys())

        # save
        try:
            self.saver.save(tf.get_default_session(),
                            self.path,
                            global_step=tf.train.get_global_step(),
                            write_meta_graph=False)
            logger.info("Model saved to %s." % tf.train.get_checkpoint_state(
                self.checkpoint_dir).model_checkpoint_path)
        except (OSError, IOError, tf.errors.PermissionDeniedError,
                tf.errors.ResourceExhaustedError
                ):  # disk error sometimes.. just ignore it
            logger.exception("Exception in ModelSaver!")
        exit()
Esempio n. 12
0
    def _aggregate_batch(data_holder, use_list=False):
        error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"

        size = len(data_holder[0])
        result = []
        for k in range(size):
            if use_list:
                result.append([x[k] for x in data_holder])
            else:
                dt = data_holder[0][k]
                batch = [x[k] for x in data_holder]
                if type(dt) in list(six.integer_types) + [bool]:
                    tp = 'int32'
                elif type(dt) == float:
                    tp = 'float32'
                else:
                    try:
                        tp = dt.dtype
                    except AttributeError:
                        raise TypeError("Unsupported type to batch: {}".format(
                            type(dt)))
                try:
                    if isinstance(dt, torch.Tensor):
                        out = None
                        if _use_shared_memory:
                            # If we're in a background process, concatenate directly into a
                            # shared memory tensor to avoid an extra copy
                            numel = sum([b.numel() for b in batch])
                            storage = data_holder[0][k].storage()._new_shared(
                                numel)
                            out = data_holder[0][k].new(storage)
                        result.append(torch.stack(batch, 0, out=out))
                    elif type(dt).__name__ == 'ndarray':
                        # array of string classes and object
                        if re.search('[SaUO]', dt.dtype.str) is not None:
                            raise TypeError(error_msg.format(dt.dtype))
                        result.append(
                            torch.stack([torch.from_numpy(b) for b in batch],
                                        0))
                    elif isinstance(dt, six.integer_types):
                        result.append(torch.LongTensor(batch))
                    elif isinstance(dt, float):
                        result.append(torch.DoubleTensor(batch))
                    elif isinstance(dt, six.string_types):
                        result.append(batch)
                    else:
                        raise TypeError((error_msg.format(type(dt))))
                except Exception as e:  # noqa
                    logger.exception(e.message)
                    logger.exception(
                        "Cannot batch data. Perhaps they are of inconsistent shape?"
                    )
                    if isinstance(dt, np.ndarray):
                        s = pprint.pformat([x[k].shape for x in data_holder])
                        logger.error("Shape of all arrays to be batched: " + s)
                    try:
                        # open an ipython shell if possible
                        import IPython as IP
                        IP.embed()  # noqa
                    except ImportError:
                        pass
        return result
Esempio n. 13
0
    def _trigger(self):
        """log player statistics in training periodically"""
        logger.info("Logging stats... ")
        scores = self._player_scores
        qvals = self._player_qvals
        best_qs = self._player_best_qvals
        IoU = self._player_IOU

        try:
            if scores.count:
                self.trainer.monitors.put_scalar('expreplay/mean_score',
                                                 scores.average)
                self.trainer.monitors.put_scalar('expreplay/max_score',
                                                 scores.max)
            if IoU.count:
                self.trainer.monitors.put_scalar('expreplay/mean_IoU',
                                                 IoU.average)
                self.trainer.monitors.put_scalar('expreplay/max_IoU', IoU.max)

            if qvals.count:
                self.trainer.monitors.put_scalar('expreplay/max_qval',
                                                 qvals.max)
                self.trainer.monitors.put_scalar('expreplay/mean_qval',
                                                 qvals.average)

            if best_qs.count:
                self.trainer.monitors.put_scalar('expreplay/max_best_qval',
                                                 best_qs.max)
                self.trainer.monitors.put_scalar('expreplay/mean_best_qval',
                                                 best_qs.average)

        except Exception:
            logger.exception("Cannot log training scores.")
        scores.reset()
        IoU.reset()
        qvals.reset()
        best_qs.reset()

        # monitor number of played games and successes of reaching the target
        if self.player.num_games.count:
            self.trainer.monitors.put_scalar(
                'expreplay/n_games', np.asscalar(self.player.num_games.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/n_games', 0)

        if self.player.num_backtracked.count:
            self.trainer.monitors.put_scalar(
                'expreplay/n_backtracked',
                np.asscalar(self.player.num_backtracked.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/n_backtracked', 0)

        if self.player.num_backtracked.count:
            self.trainer.monitors.put_scalar(
                'expreplay/n_backtracked',
                np.asscalar(self.player.num_backtracked.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/n_backtracked', 0)

        # count wall collisions
        if self.player.num_go_out.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_go_out',
                np.asscalar(self.player.num_go_out.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_go_out', 0)

        if self.player.num_success.count:
            self.trainer.monitors.put_scalar(
                'expreplay/n_success',
                np.asscalar(self.player.num_success.sum))
            self.trainer.monitors.put_scalar(
                'expreplay/n_success_ratio',
                self.player.num_success.sum / self.player.num_games.sum)
        else:
            self.trainer.monitors.put_scalar('expreplay/n_success', 0)
            self.trainer.monitors.put_scalar('expreplay/n_success_ratio', 0)

        # length of trials
        if self.player.episode_duration.count:
            try:
                self.trainer.monitors.put_scalar(
                    'expreplay/avg_episode_duration',
                    np.asscalar(self.player.episode_duration.average))
            except:
                self.trainer.monitors.put_scalar(
                    'expreplay/avg_episode_duration',
                    self.player.episode_duration.average)
        # count different actions
        if self.player.num_act0.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act0', np.asscalar(self.player.num_act0.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act0', 0)

        if self.player.num_act1.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act1', np.asscalar(self.player.num_act1.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act1', 0)

        if self.player.num_act2.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act2', np.asscalar(self.player.num_act2.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act2', 0)

        if self.player.num_act3.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act3', np.asscalar(self.player.num_act3.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act3', 0)

        if self.player.num_act4.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act4', np.asscalar(self.player.num_act4.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act4', 0)

        if self.player.num_act5.count:
            self.trainer.monitors.put_scalar(
                'expreplay/num_act5', np.asscalar(self.player.num_act5.sum))
        else:
            self.trainer.monitors.put_scalar('expreplay/num_act5', 0)

        # reset stats after logging to tensorboard
        self.player.reset_stat()