Beispiel #1
0
def main(cfg: DictConfig):
    # print all configs
    print(cfg.pretty())

    # build env
    print("===INITIALIZING ENV===")
    env = build_env(cfg.experiment_info)
    print(env.reset())
    print("=================")

    # build model
    print("===INITIALIZING MODEL===")
    cfg.model.params.model_cfg.state_dim = env.observation_space.shape
    cfg.model.params.model_cfg.action_dim = env.action_space.n
    cfg.model.params.model_cfg.fc.output.params.output_size = env.action_space.n
    model = build_model(cfg.model)
    test_input = torch.FloatTensor(env.reset()).unsqueeze(0)
    print(model)
    print(model.forward(test_input))
    print("===================")

    # build action_selector
    print("===INITIALIZING ACTION SELECTOR===")
    action_selector = build_action_selector(cfg.experiment_info)
    print(action_selector)
    print("==============================")

    # build loss
    print("===INITIALIZING LOSS===")
    loss = build_loss(cfg.experiment_info)
    print(loss)
    print("==================")

    # build learner
    print("===INITIALIZING LEARNER===")
    learner = build_learner(**cfg)
    print(learner)
    print("=====================")

    # build agent
    print("===INITIALIZING AGENT===")
    agent = build_agent(**cfg)
    print(agent)
    print("=====================")
Beispiel #2
0
def main(cfg: DictConfig):
    agent = build_agent(**cfg)
    agent.train()