def flush_to_stdout(self, epoch): stats = dict(self.base_stats) stats.update({"dts_h": util.dts(), "epoch": epoch, "n_egs_trained": self.n_egs_trained, "elapsed_time": int(time.time()) - self.start_time, "train_cost": util.mean_sd(self.train_costs), "dev_cost": util.mean_sd(self.dev_costs), "dev_acc": self.dev_accuracy}) if self.norms: stats.update({"norms": self.norms}) print "STATS\t%s" % json.dumps(stats) sys.stdout.flush() self.reset()
def flush_to_stdout(self, epoch): stats = dict(self.base_stats) stats.update({ "dts_h": util.dts(), "epoch": epoch, "n_egs_trained": self.n_egs_trained, "elapsed_time": int(time.time()) - self.start_time, "train_cost": util.mean_sd(self.train_costs), "dev_cost": util.mean_sd(self.dev_costs), "dev_acc": self.dev_accuracy }) if self.norms: stats.update({"norms": self.norms}) print "STATS\t%s" % json.dumps(stats) sys.stdout.flush() self.reset()
def log(s): print >> sys.stderr, util.dts(), s
# init our rl_agent agent_cstr = eval("agents.%sAgent" % opts.agent) agent = agent_cstr(opts) # init event log (if logging events) event_log = event_log.EventLog(opts.event_log_out) if opts.event_log_out else None # hook up connection to trainer if opts.trainer_port == 0: trainer = None else: channel = grpc.insecure_channel("localhost:%d" % opts.trainer_port) trainer = model_pb2.ModelStub(channel) for episode_idx in itertools.count(0): print util.dts(), "EPISODE", episode_idx, "eval", opts.eval # start new mission; explicitly wait for first observation # (not just world_state.has_mission_begun) mission_start = time.time() while True: try: # TODO: work out why this blocks and how to get it timeout somehow... malmo.startMission(mission, client_pool, mission_record, 0, "") break except RuntimeError as r: # have observed that getting stuck here doesn't recover, even if the servers # are restarted. try to recreate everything print >>sys.stderr, util.dts(), "failed to start mission", r print >>sys.stderr, util.dts(), "recreating malmo components..." time.sleep(1)
def log(s): print >>sys.stderr, util.dts(), s
spec = spec.replace("__WIDTH__", str(opts.width)) spec = spec.replace("__HEIGHT__", str(opts.height)) spec = spec.replace("__EPISODE_TIME_MS__", str(opts.episode_time_ms)) mission = MalmoPython.MissionSpec(spec, True) mission_record = MalmoPython.MissionRecordSpec() # init our rl_agent agent_cstr = eval("agents.%sAgent" % opts.agent) agent = agent_cstr(opts) event_log = event_log.EventLog( opts.event_log_out) if opts.event_log_out else None for episode_idx in itertools.count(1): eval_episode = (episode_idx % opts.eval_freq == 0) print >> sys.stderr, "EPISODE", episode_idx, util.dts( ), "eval =", eval_episode # start new mission; explicitly wait for first observation # (not just world_state.has_mission_begun) mission_start = time.time() while True: try: malmo.startMission(mission, mission_record) break except RuntimeError as r: print >> sys.stderr, "failed to start mission", r time.sleep(1) world_state = malmo.getWorldState() while len(world_state.observations) == 0: print >> sys.stderr, "started, but no obs?" time.sleep(0.1)
def run_trainer(episodes, opts): # init replay memory render_shape = (opts.height, opts.width, 3) replay_memory = rm.ReplayMemory(opts=opts, state_shape=render_shape, action_dim=2, load_factor=1.1) if opts.event_log_in: replay_memory.reset_from_event_logs(opts.event_log_in, opts.event_log_in_num, opts.reset_smooth_reward_factor) # init network for training config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction sess = tf.Session(config=config) network = models.NafNetwork("naf", action_dim=2, opts=opts) with sess.as_default(): # setup saver util and either load saved ckpt or init variables saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir, opts.ckpt_save_freq) for v in tf.all_variables(): if '/biases:' not in v.name: print >> sys.stderr, v.name, util.shape_and_product_of(v) network.setup_target_network() # while true process episodes from run_agents print util.dts(), "waiting for episodes" while True: start_time = time.time() episode = episodes.get() wait_time = time.time() - start_time start_time = time.time() replay_memory.add_episode( episode, smooth_reward_factor=opts.smooth_reward_factor) losses = [] if replay_memory.burnt_in(): for _ in xrange(opts.batches_per_new_episode): batch = replay_memory.batch(opts.batch_size) batch_losses = network.train(batch).T[ 0] # .T[0] => (B, 1) -> (B,) replay_memory.update_priorities(batch.idxs, batch_losses) network.target_value_net.update_target_weights() losses.extend(batch_losses) saver.save_if_required() process_time = time.time() - start_time stats = { "wait_time": wait_time, "process_time": process_time, "pending": episodes.qsize(), "replay_memory": replay_memory.stats } if losses: stats['loss'] = { "min": float(np.min(losses)), "median": float(np.median(losses)), "mean": float(np.mean(losses)), "max": float(np.max(losses)) } print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats))