예제 #1
0
def train(config):
    all_agents = [
        agents.catalog(
            DictTree(evaluation=config.eval,
                     model_dirname=config.model,
                     domain_name=config.domain,
                     task_name=task_name,
                     hardware_name=config.hardware,
                     rollable=False,
                     teacher=False)) for task_name in config.tasks
    ]
    data_dirname = "{}/{}".format(config.data, config.domain)
    for agent in all_agents[:-1]:
        client.delete(agent)
        _train(
            data_dirname, agent, [],
            DictTree(modes=['independent'],
                     batch_size=None,
                     validate=False,
                     model_dirname="model/{}/{}".format(
                         config.domain, agent.task_name)))
    results = DictTree()
    if config.independent:
        modes_list = [['independent']]
    else:
        modes_list = itertools.product(['validation', ''], ['training', ''],
                                       ['independent', ''])
    for modes in modes_list:
        modes = [mode for mode in modes if mode]
        if not modes:
            continue
        print("Training with modes: {}".format(', '.join(modes)))
        client.delete(all_agents[-1])
        results['+'.join(modes)] = _train(
            data_dirname, all_agents[-1], all_agents[:-1],
            DictTree(modes=modes,
                     batch_size=(None if config.full_batch else 1),
                     validate=True,
                     model_dirname="model/{}/{}_{}".format(
                         config.domain, ".".join(config.tasks),
                         "+".join(modes))))
    try:
        os.makedirs("results/{}/{}".format(config.domain,
                                           ".".join(config.tasks)))
    except OSError:
        pass
    time_stamp = time.strftime("%Y-%m-%d %H-%M-%S", time.gmtime())
    pickle.dump(results,
                open(
                    "results/{}/{}/{}.{}.pkl".format(config.domain,
                                                     ".".join(config.tasks),
                                                     all_agents[-1].task_name,
                                                     time_stamp), 'wb'),
                protocol=2)
예제 #2
0
파일: data.py 프로젝트: royf/hvil
def main():
    config_keys = ['env', 'agent', 'data']
    parser = argparse.ArgumentParser()
    for k in config_keys:
        parser.add_argument(f'--{k}', required=True)
    args = parser.parse_args()
    config = utils.json2dict(args, config_keys)

    env = envs.catalog(config.env)
    agent = agents.catalog(env, DictTree(teacher=True) | config.agent)
    agent.to(config.data.get('device', 'cuda'))
    env.training = True
    DataLoader(DEFAULT_CONFIG | config.data.train, env, agent)
    env.training = False
    DataLoader(DEFAULT_CONFIG | config.data.test, env, agent)
예제 #3
0
def rollout(config):
    env = envs.catalog(
        DictTree(domain_name=config.domain, hardware_name=config.hardware))
    agent = agents.catalog(
        DictTree(domain_name=config.domain,
                 task_name=config.task,
                 rollable=True,
                 model_dirname=config.model,
                 hardware_name=config.hardware,
                 evaluation=config.eval))

    init_arg = env.reset()
    agent.reset(init_arg)
    trace = agent.rollout(env)
    time_stamp = time.strftime("%Y-%m-%d %H-%M-%S", time.gmtime())
    # pickle.dump(trace, open("{}/{}/{}/{}.pkl".format(config.data, config.domain, config.task,  time_stamp), 'wb'),
    #             protocol=2)
    pickle.dump(
        trace,
        open(
            f"{config.data}/{config.domain}/{config.task}/trace_result_{time_stamp}.pkl",
            'wb'),
        protocol=2)
예제 #4
0
def rollout(config):
    env = envs.catalog(config.domain)
    agent = agents.catalog(
        DictTree(domain_name=config.domain,
                 task_name=config.task,
                 teacher=config.teacher,
                 rollable=True,
                 model_dirname=config.model))
    init_arg = env.reset(config.task)
    agent.reset(init_arg)
    trace = agent.rollout(env)
    try:
        os.makedirs("{}/{}".format(config.data, config.domain))
    except OSError:
        pass
    time_stamp = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    pickle.dump(trace,
                open(
                    "{}/{}/{}.{}.pkl".format(config.data, config.domain,
                                             config.task, time_stamp), 'wb'),
                protocol=2)
    print("=== trace saved ===")
    raw_input("Press Enter to continue...")
예제 #5
0
def _train(config):
    torch.set_num_threads(config.train.num_threads)
    print(config.train.logdir)
    env = envs.catalog(config.env)
    agent = agents.catalog(env, DictTree(teacher=False) | config.agent)
    agent.to(config.train.device)
    train_data = data.DataLoader(config.train.data.train)
    valid_data = data.DataLoader(config.train.data.train
                                 | DictTree(num_unannotated=1000))
    test_data = data.DataLoader(config.train.data.test)
    saver = torch_utils.Saver(agent,
                              train_data=train_data,
                              valid_data=valid_data,
                              test_data=test_data,
                              config=DictTree(
                                  save_dir=config.train.logdir,
                                  keep_save_freq=config.train.keep_save_freq,
                              ))
    step, restored_step = saver.restore()
    best_valid_loss = math.inf
    best_step = None
    last_save_step = restored_step
    last_eval_step = -math.inf
    stats_writer = tf_utils.TensorBoardWriter(config.train.logdir)

    try:
        while True:
            for train_batch in train_data.batches(step):
                if step >= config.train.num_steps:
                    return
                if config.train.save_freq and step - last_save_step >= config.train.save_freq and step > restored_step:
                    saver.save(step)
                    last_save_step = step
                if config.train.eval_freq and step - last_eval_step >= config.train.eval_freq:
                    valid_loss = _evaluate(valid_data, agent, 'valid',
                                           stats_writer, step)
                    if valid_loss < best_valid_loss:
                        best_valid_loss = valid_loss
                        best_step = step
                    last_eval_step = step
                train_stats = _step(agent, train_batch)
                step += len(train_batch)
                # TODO: more stats
                # TODO: summarize stats for entire training epoch?
                avg_stats = train_stats.get('per_trace',
                                            DictTree()) / len(train_batch)
                avg_stats |= train_stats.get('per_step', DictTree()) / sum(
                    trace.metadata.length for trace in train_batch)
                for k, v in avg_stats.allitems():
                    k = '/'.join(k)
                    # print(f"Step {step} training {k}: {v}")
                    stats_writer.add(step, f'train/{k}', v)
                stats_writer.flush()
    finally:
        try:
            if step > restored_step:
                saver.save(step)
            saver.restore(step=best_step)
            _evaluate(test_data, agent, 'test', stats_writer, step)
        finally:
            stats_writer.close()