def _trigger(self): # log player statistics in training v = self._player_scores dist = self._player_distError try: mean, max = v.average, v.max self.trainer.monitors.put_scalar('expreplay/mean_score', mean) self.trainer.monitors.put_scalar('expreplay/max_score', max) mean, max = dist.average, dist.max self.trainer.monitors.put_scalar('expreplay/mean_dist', mean) self.trainer.monitors.put_scalar('expreplay/max_dist', max) except Exception: logger.exception("Cannot log training scores.") v.reset() dist.reset() # monitor number of played games and successes of reaching the target if self.player.num_games.count: self.trainer.monitors.put_scalar( 'n_games', np.asscalar(self.player.num_games.sum)) else: self.trainer.monitors.put_scalar('n_games', 0) if self.player.num_success.count: self.trainer.monitors.put_scalar( 'n_success', np.asscalar(self.player.num_success.sum)) self.trainer.monitors.put_scalar( 'n_success_ratio', self.player.num_success.sum / self.player.num_games.sum) else: self.trainer.monitors.put_scalar('n_success', 0) self.trainer.monitors.put_scalar('n_success_ratio', 0) # reset stats self.player.reset_stat()
def run(self): with self.default_sess(): try: self.reinitialize_dataflow() while True: # pausable loop if not self._running.is_set(): self._running.wait() dp = next(self._itr) feed = _make_feeds(self.placehdrs, dp) # _, sz = sess.run([self.op, self._sz], feed_dict=feed) self.op.run(feed_dict=feed) except (tf.errors.CancelledError, tf.errors.OutOfRangeError): pass # logger.exception("Exception in {}:".format(self.name)) except Exception as e: if isinstance(e, RuntimeError) and 'closed Session' in str(e): pass else: logger.exception("Exception in {}:".format(self.name)) finally: try: self.close_op.run() except Exception: pass logger.info("{} Exited.".format(self.name))
def _trigger(self): v = self._player_scores try: mean, max = v.average, v.max self.trainer.monitors.put_scalar('expreplay/mean_score', mean) self.trainer.monitors.put_scalar('expreplay/max_score', max) except: logger.exception("Cannot log training scores.") v.reset()
def _trigger(self): v = self._player_scores try: mean, max = v.average, v.max self.trainer.monitors.put_scalar('expreplay/mean_score', mean) self.trainer.monitors.put_scalar('expreplay/max_score', max) except Exception: logger.exception("Cannot log training scores.") v.reset()
def _trigger(self): for l in range (NUM_LEVELS): v = self._player_scores [l] try: mean, max = v.average, v.max self.trainer.monitors.put_scalar('expreplay/mean_score_' + str (l), mean) self.trainer.monitors.put_scalar('expreplay/max_score_' + str (l), max) except Exception: logger.exception("Cannot log training scores.") v.reset()
def _trigger_epoch(self): # log player statistics in training stats = self.player.stats for k, v in six.iteritems(stats): try: mean, max = np.mean(v), np.max(v) self.trainer.monitors.put_scalar('expreplay/mean_' + k, mean) self.trainer.monitors.put_scalar('expreplay/max_' + k, max) except: logger.exception("Cannot log training scores.") self.player.reset_stat()
def _trigger(self): from simulator.tools import mean_score_logger v = self._player_scores try: mean, max = v.average, v.max logger.info('{} mean_score: {}'.format(self.agent_name, mean)) mean_score_logger('{} mean_score: {}\n'.format(self.agent_name, mean)) self.trainer.monitors.put_scalar('expreplay/mean_score', mean) self.trainer.monitors.put_scalar('expreplay/max_score', max) except Exception: logger.exception(self.agent_name + " Cannot log training scores.") v.reset()
def eval_with_funcs(predictors, nr_eval, get_player_fn): """ Args: predictors ([PredictorBase]) """ class Worker(StoppableThread, ShareSessionThread): def __init__(self, func, queue): super(Worker, self).__init__() self._func = func self.q = queue def func(self, *args, **kwargs): if self.stopped(): raise RuntimeError("stopped!") return self._func(*args, **kwargs) def run(self): with self.default_sess(): player = get_player_fn(train=False) while not self.stopped(): try: score = play_one_episode(player, self.func) # print("Score, ", score) except RuntimeError: return self.queue_put_stoppable(self.q, score) q = queue.Queue() threads = [Worker(f, q) for f in predictors] for k in threads: k.start() time.sleep(0.1) # avoid simulator bugs stat = StatCounter() try: for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()): r = q.get() stat.feed(r) logger.info("Waiting for all the workers to finish the last run...") for k in threads: k.stop() for k in threads: k.join() while q.qsize(): r = q.get() stat.feed(r) except: logger.exception("Eval") finally: if stat.count > 0: return (stat.average, stat.max) return (0, 0)
def reset_stats(self): """ Returns: mean, max: two stats of the runners, to be added to backend """ scores = list(itertools.chain.from_iterable([v.total_scores for v in self._runners])) for v in self._runners: v.total_scores.clear() try: return np.mean(scores), np.max(scores) except Exception: logger.exception("Cannot compute total scores in EnvRunner.") return None, None
def _trigger(self): # log player statistics in training v = self._player_scores dist = self._player_distError for i in range(0, self.agents): try: mean, max = v[i].average, v[i].max self.trainer.monitors.put_scalar( "expreplay/mean_score_{}".format(i), mean) self.trainer.monitors.put_scalar( "expreplay/max_score_{}".format(i), max) mean, max = dist[i].average, dist[i].max self.trainer.monitors.put_scalar( "expreplay/mean_dist_{}".format(i), mean) self.trainer.monitors.put_scalar( "expreplay/max_dist_{}".format(i), max) except Exception: logger.exception("Cannot log training scores.") v[i].reset() dist[i].reset() # monitor number of played games and successes of reaching the target if self.player.num_games.count: self.trainer.monitors.put_scalar( "n_games", np.asscalar(self.player.num_games.sum)) else: self.trainer.monitors.put_scalar("n_games", 0) for i in range(0, self.agents): if self.player.num_success[i].count: self.trainer.monitors.put_scalar( "n_success_{}".format(i), np.asscalar(self.player.num_success[i].sum), ) self.trainer.monitors.put_scalar( "n_success_ratio_{}".format(i), self.player.num_success[i].sum / self.player.num_games.sum, ) else: self.trainer.monitors.put_scalar("n_success_{}".format(i), 0) self.trainer.monitors.put_scalar( "n_success_ratio_{}".format(i), 0) # reset stats self.player.reset_stat()
def _before_train(self): # graph is finalized, OK to write it now. time = datetime.now().strftime('%m%d-%H%M%S') self.saver.export_meta_graph( os.path.join(self.checkpoint_dir, 'graph-{}.meta'.format(time)), collection_list=self.graph.get_all_collection_keys()) # save try: self.saver.save(tf.get_default_session(), self.path, global_step=tf.train.get_global_step(), write_meta_graph=False) logger.info("Model saved to %s." % tf.train.get_checkpoint_state( self.checkpoint_dir).model_checkpoint_path) except (OSError, IOError, tf.errors.PermissionDeniedError, tf.errors.ResourceExhaustedError ): # disk error sometimes.. just ignore it logger.exception("Exception in ModelSaver!") exit()
def _aggregate_batch(data_holder, use_list=False): error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" size = len(data_holder[0]) result = [] for k in range(size): if use_list: result.append([x[k] for x in data_holder]) else: dt = data_holder[0][k] batch = [x[k] for x in data_holder] if type(dt) in list(six.integer_types) + [bool]: tp = 'int32' elif type(dt) == float: tp = 'float32' else: try: tp = dt.dtype except AttributeError: raise TypeError("Unsupported type to batch: {}".format( type(dt))) try: if isinstance(dt, torch.Tensor): out = None if _use_shared_memory: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([b.numel() for b in batch]) storage = data_holder[0][k].storage()._new_shared( numel) out = data_holder[0][k].new(storage) result.append(torch.stack(batch, 0, out=out)) elif type(dt).__name__ == 'ndarray': # array of string classes and object if re.search('[SaUO]', dt.dtype.str) is not None: raise TypeError(error_msg.format(dt.dtype)) result.append( torch.stack([torch.from_numpy(b) for b in batch], 0)) elif isinstance(dt, six.integer_types): result.append(torch.LongTensor(batch)) elif isinstance(dt, float): result.append(torch.DoubleTensor(batch)) elif isinstance(dt, six.string_types): result.append(batch) else: raise TypeError((error_msg.format(type(dt)))) except Exception as e: # noqa logger.exception(e.message) logger.exception( "Cannot batch data. Perhaps they are of inconsistent shape?" ) if isinstance(dt, np.ndarray): s = pprint.pformat([x[k].shape for x in data_holder]) logger.error("Shape of all arrays to be batched: " + s) try: # open an ipython shell if possible import IPython as IP IP.embed() # noqa except ImportError: pass return result
def _trigger(self): """log player statistics in training periodically""" logger.info("Logging stats... ") scores = self._player_scores qvals = self._player_qvals best_qs = self._player_best_qvals IoU = self._player_IOU try: if scores.count: self.trainer.monitors.put_scalar('expreplay/mean_score', scores.average) self.trainer.monitors.put_scalar('expreplay/max_score', scores.max) if IoU.count: self.trainer.monitors.put_scalar('expreplay/mean_IoU', IoU.average) self.trainer.monitors.put_scalar('expreplay/max_IoU', IoU.max) if qvals.count: self.trainer.monitors.put_scalar('expreplay/max_qval', qvals.max) self.trainer.monitors.put_scalar('expreplay/mean_qval', qvals.average) if best_qs.count: self.trainer.monitors.put_scalar('expreplay/max_best_qval', best_qs.max) self.trainer.monitors.put_scalar('expreplay/mean_best_qval', best_qs.average) except Exception: logger.exception("Cannot log training scores.") scores.reset() IoU.reset() qvals.reset() best_qs.reset() # monitor number of played games and successes of reaching the target if self.player.num_games.count: self.trainer.monitors.put_scalar( 'expreplay/n_games', np.asscalar(self.player.num_games.sum)) else: self.trainer.monitors.put_scalar('expreplay/n_games', 0) if self.player.num_backtracked.count: self.trainer.monitors.put_scalar( 'expreplay/n_backtracked', np.asscalar(self.player.num_backtracked.sum)) else: self.trainer.monitors.put_scalar('expreplay/n_backtracked', 0) if self.player.num_backtracked.count: self.trainer.monitors.put_scalar( 'expreplay/n_backtracked', np.asscalar(self.player.num_backtracked.sum)) else: self.trainer.monitors.put_scalar('expreplay/n_backtracked', 0) # count wall collisions if self.player.num_go_out.count: self.trainer.monitors.put_scalar( 'expreplay/num_go_out', np.asscalar(self.player.num_go_out.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_go_out', 0) if self.player.num_success.count: self.trainer.monitors.put_scalar( 'expreplay/n_success', np.asscalar(self.player.num_success.sum)) self.trainer.monitors.put_scalar( 'expreplay/n_success_ratio', self.player.num_success.sum / self.player.num_games.sum) else: self.trainer.monitors.put_scalar('expreplay/n_success', 0) self.trainer.monitors.put_scalar('expreplay/n_success_ratio', 0) # length of trials if self.player.episode_duration.count: try: self.trainer.monitors.put_scalar( 'expreplay/avg_episode_duration', np.asscalar(self.player.episode_duration.average)) except: self.trainer.monitors.put_scalar( 'expreplay/avg_episode_duration', self.player.episode_duration.average) # count different actions if self.player.num_act0.count: self.trainer.monitors.put_scalar( 'expreplay/num_act0', np.asscalar(self.player.num_act0.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act0', 0) if self.player.num_act1.count: self.trainer.monitors.put_scalar( 'expreplay/num_act1', np.asscalar(self.player.num_act1.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act1', 0) if self.player.num_act2.count: self.trainer.monitors.put_scalar( 'expreplay/num_act2', np.asscalar(self.player.num_act2.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act2', 0) if self.player.num_act3.count: self.trainer.monitors.put_scalar( 'expreplay/num_act3', np.asscalar(self.player.num_act3.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act3', 0) if self.player.num_act4.count: self.trainer.monitors.put_scalar( 'expreplay/num_act4', np.asscalar(self.player.num_act4.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act4', 0) if self.player.num_act5.count: self.trainer.monitors.put_scalar( 'expreplay/num_act5', np.asscalar(self.player.num_act5.sum)) else: self.trainer.monitors.put_scalar('expreplay/num_act5', 0) # reset stats after logging to tensorboard self.player.reset_stat()