def train(config): all_agents = [ agents.catalog( DictTree(evaluation=config.eval, model_dirname=config.model, domain_name=config.domain, task_name=task_name, hardware_name=config.hardware, rollable=False, teacher=False)) for task_name in config.tasks ] data_dirname = "{}/{}".format(config.data, config.domain) for agent in all_agents[:-1]: client.delete(agent) _train( data_dirname, agent, [], DictTree(modes=['independent'], batch_size=None, validate=False, model_dirname="model/{}/{}".format( config.domain, agent.task_name))) results = DictTree() if config.independent: modes_list = [['independent']] else: modes_list = itertools.product(['validation', ''], ['training', ''], ['independent', '']) for modes in modes_list: modes = [mode for mode in modes if mode] if not modes: continue print("Training with modes: {}".format(', '.join(modes))) client.delete(all_agents[-1]) results['+'.join(modes)] = _train( data_dirname, all_agents[-1], all_agents[:-1], DictTree(modes=modes, batch_size=(None if config.full_batch else 1), validate=True, model_dirname="model/{}/{}_{}".format( config.domain, ".".join(config.tasks), "+".join(modes)))) try: os.makedirs("results/{}/{}".format(config.domain, ".".join(config.tasks))) except OSError: pass time_stamp = time.strftime("%Y-%m-%d %H-%M-%S", time.gmtime()) pickle.dump(results, open( "results/{}/{}/{}.{}.pkl".format(config.domain, ".".join(config.tasks), all_agents[-1].task_name, time_stamp), 'wb'), protocol=2)
def main(): config_keys = ['env', 'agent', 'data'] parser = argparse.ArgumentParser() for k in config_keys: parser.add_argument(f'--{k}', required=True) args = parser.parse_args() config = utils.json2dict(args, config_keys) env = envs.catalog(config.env) agent = agents.catalog(env, DictTree(teacher=True) | config.agent) agent.to(config.data.get('device', 'cuda')) env.training = True DataLoader(DEFAULT_CONFIG | config.data.train, env, agent) env.training = False DataLoader(DEFAULT_CONFIG | config.data.test, env, agent)
def rollout(config): env = envs.catalog( DictTree(domain_name=config.domain, hardware_name=config.hardware)) agent = agents.catalog( DictTree(domain_name=config.domain, task_name=config.task, rollable=True, model_dirname=config.model, hardware_name=config.hardware, evaluation=config.eval)) init_arg = env.reset() agent.reset(init_arg) trace = agent.rollout(env) time_stamp = time.strftime("%Y-%m-%d %H-%M-%S", time.gmtime()) # pickle.dump(trace, open("{}/{}/{}/{}.pkl".format(config.data, config.domain, config.task, time_stamp), 'wb'), # protocol=2) pickle.dump( trace, open( f"{config.data}/{config.domain}/{config.task}/trace_result_{time_stamp}.pkl", 'wb'), protocol=2)
def rollout(config): env = envs.catalog(config.domain) agent = agents.catalog( DictTree(domain_name=config.domain, task_name=config.task, teacher=config.teacher, rollable=True, model_dirname=config.model)) init_arg = env.reset(config.task) agent.reset(init_arg) trace = agent.rollout(env) try: os.makedirs("{}/{}".format(config.data, config.domain)) except OSError: pass time_stamp = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) pickle.dump(trace, open( "{}/{}/{}.{}.pkl".format(config.data, config.domain, config.task, time_stamp), 'wb'), protocol=2) print("=== trace saved ===") raw_input("Press Enter to continue...")
def _train(config): torch.set_num_threads(config.train.num_threads) print(config.train.logdir) env = envs.catalog(config.env) agent = agents.catalog(env, DictTree(teacher=False) | config.agent) agent.to(config.train.device) train_data = data.DataLoader(config.train.data.train) valid_data = data.DataLoader(config.train.data.train | DictTree(num_unannotated=1000)) test_data = data.DataLoader(config.train.data.test) saver = torch_utils.Saver(agent, train_data=train_data, valid_data=valid_data, test_data=test_data, config=DictTree( save_dir=config.train.logdir, keep_save_freq=config.train.keep_save_freq, )) step, restored_step = saver.restore() best_valid_loss = math.inf best_step = None last_save_step = restored_step last_eval_step = -math.inf stats_writer = tf_utils.TensorBoardWriter(config.train.logdir) try: while True: for train_batch in train_data.batches(step): if step >= config.train.num_steps: return if config.train.save_freq and step - last_save_step >= config.train.save_freq and step > restored_step: saver.save(step) last_save_step = step if config.train.eval_freq and step - last_eval_step >= config.train.eval_freq: valid_loss = _evaluate(valid_data, agent, 'valid', stats_writer, step) if valid_loss < best_valid_loss: best_valid_loss = valid_loss best_step = step last_eval_step = step train_stats = _step(agent, train_batch) step += len(train_batch) # TODO: more stats # TODO: summarize stats for entire training epoch? avg_stats = train_stats.get('per_trace', DictTree()) / len(train_batch) avg_stats |= train_stats.get('per_step', DictTree()) / sum( trace.metadata.length for trace in train_batch) for k, v in avg_stats.allitems(): k = '/'.join(k) # print(f"Step {step} training {k}: {v}") stats_writer.add(step, f'train/{k}', v) stats_writer.flush() finally: try: if step > restored_step: saver.save(step) saver.restore(step=best_step) _evaluate(test_data, agent, 'test', stats_writer, step) finally: stats_writer.close()