Beispiel #1
0
def main(args):
    # Set logging
    if not os.path.exists("./log"):
        os.makedirs("./log")

    log = set_log(args)
    tb_writer = SummaryWriter('./log/tb_{0}'.format(args.log_name))

    # Set seed
    set_seed(args.seed, cudnn=args.make_deterministic)

    # Set sampler
    sampler = BatchSampler(args, log)

    # Set policy
    policy = CaviaMLPPolicy(
        input_size=int(np.prod(sampler.observation_space.shape)),
        output_size=int(np.prod(sampler.action_space.shape)),
        hidden_sizes=(args.hidden_size, ) * args.num_layers,
        num_context_params=args.num_context_params,
        device=args.device)

    # Initialise baseline
    baseline = LinearFeatureBaseline(
        int(np.prod(sampler.observation_space.shape)))

    # Initialise meta-learner
    metalearner = MetaLearner(sampler, policy, baseline, args, tb_writer)

    # Begin train
    train(sampler, metalearner, args, log, tb_writer)
Beispiel #2
0
def main(args):
    if args.central_train is True:
        raise NotImplementedError("todo")

    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    agents = [
        set_policy(env, tb_writer, log, args, name="agent", i_agent=i_agent)
        for i_agent in range(args.n_agent)
    ]

    # Start train
    train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args)
Beispiel #3
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    student_n = [
        set_policy(env, log, args, name="student", i_agent=i_agent)
        for i_agent in range(args.n_student)
    ]

    # Start train
    train(student_n=student_n,
          env=env,
          log=log,
          tb_writer=tb_writer,
          args=args)

    if not os.path.exists("./saved_model"):
        os.makedirs("./saved_model")
Beispiel #4
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logging
    log = set_log(args)
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))

    # Create env
    env = make_env(args)

    # Set seeds 0 seed is odd
    env.seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Initialize policy
    agent = set_policy(env, args.n_hidden, tb_writer, log, args)

    # load agent
    if args.mode == "test":
        agent.load_weight("pytorch_models/", args.test_model)
        test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
    else:
        train(agent=agent,
              env=env,
              log=log,
              tb_writer=tb_writer,
              num_samples=args.num_samples,
              args=args)
Beispiel #5
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    log = set_log(args)
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))

    # Create env
    env = make_env(args)

    # Set seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    env.seed(args.seed)

    # Initialize agents
    agent1 = Agent(env, log, tb_writer, args, name="agent1", i_agent=1)
    agent2 = Agent(env, log, tb_writer, args, name="agent2", i_agent=2)

    # Start train
    train(agent1, agent2, env, log, tb_writer, args)
Beispiel #6
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(log, args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    agent = set_policy(env, tb_writer, log, args, name=args.algorithm)

    if args.test:
        from tester import test
        test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
    else:
        from trainer import train
        train(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
Beispiel #7
0
def main(args):
    # Set logging
    if not os.path.exists("./log"):
        os.makedirs("./log")

    log = set_log(args)
    tb_writer = SummaryWriter('./log/tb_{0}'.format(args.log_name))

    # Set seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if device == torch.device("cuda"):
        torch.cuda.manual_seed(args.seed)

    # For GPU, Set start method for multithreading
    if device == torch.device("cuda"):
        torch.multiprocessing.set_start_method('spawn')

    # Initialize shared meta-agent
    shared_meta_agent = MetaAgent(log,
                                  tb_writer,
                                  args,
                                  name="meta-agent",
                                  i_agent=0)
    shared_meta_agent.share_memory()

    # Begin either meta-train or meta-test
    if not args.test_mode:
        # Start meta-train
        processes, process_dict = [], mp.Manager().dict()
        for rank in range(args.n_process):
            p = mp.Process(target=meta_train,
                           args=(shared_meta_agent, process_dict, rank, log,
                                 args))
            p.start()
            processes.append(p)
            time.sleep(0.1)

        p = mp.Process(target=meta_val,
                       args=(shared_meta_agent, process_dict, -1, log, args))
        p.start()
        processes.append(p)
        time.sleep(0.1)

        for p in processes:
            time.sleep(0.1)
            p.join()
    else:
        # Start meta-test
        meta_test(shared_meta_agent, log, tb_writer, args)
Beispiel #8
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    opponent_n = [
        set_policy(env, tb_writer, log, args, name="opponent", i_agent=i_agent)
        for i_agent in range(1)
    ]

    modeler = set_policy(env, tb_writer, log, args, name="modeler", i_agent=0)

    # Start training
    if args.train_opponent:
        train_opponent(opponent_n=opponent_n,
                       env=env,
                       log=log,
                       tb_writer=tb_writer,
                       args=args)
    else:
        # Load trained opponent model
        for opponent in opponent_n:
            opponent.load_model(filename="opponent0_500",
                                directory="./pytorch_models")

        train_modeler(modeler=modeler,
                      opponent_n=opponent_n,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
Beispiel #9
0
def main(args):
    """
    Program entry point

    Arguments
    args (argparse.Namespace) command-line arguments
    """

    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")
    pathlib.Path(args.binary_file).parents[0].mkdir(parents=False,
                                                    exist_ok=True)

    # Set logging
    log = set_log(args)
    tb_writer = SummaryWriter('./logs/tb_{}'.format(args.log_name))

    if args.env == "GridEnv-v0":
        if args.start is not None:
            args.start = tuple(args.start)

        args.target = tuple(args.target)
        args.exits = Consts.EXITS

        gui = None
        if args.render:
            multiprocessing.set_start_method("spawn")
            manager = multiprocessing.Manager()
            queue = manager.Queue()
            gui = GUI(args.gui_width, args.gui_height, args.rows, args.cols,
                      args.x_rooms, args.y_rooms, args.target, args.exits,
                      queue)
        train(args, gui, log, tb_writer)

        if gui:
            gui.process.join()
            sys.exit()

    elif args.env == "Taxi-v4":
        train(args, None)
    else:
        raise ValueError("Environment {} not recognized".format(args.env))
Beispiel #10
0
def main(args):
    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    # Note that only one teacher is considered in the one box push domain 
    # to transfer knowledge from agent $i$ to agent $k$ (Section 6.1)
    workers = [
        set_policy(env, tb_writer, log, args, name="worker", i_agent=i_agent)
        for i_agent in range(args.n_worker)]
    managers = [
        set_policy(env, tb_writer, log, args, name="manager", i_agent=i_agent)
        for i_agent in range(args.n_manager)]
    temp_managers = [
        set_policy(env, tb_writer, log, args, name="temp_manager", i_agent=i_agent)
        for i_agent in range(args.n_manager)]
    teacher = set_policy(env, tb_writer, log, args, name="teacher", i_agent=0)

    assert len(workers) == len(managers), "The two number must be same"
    assert len(managers) == len(temp_managers), "The two number must be same"
    
    # Start train
    train(
        workers=workers, managers=managers, 
        temp_managers=temp_managers, teacher=teacher,
        env=env, log=log, tb_writer=tb_writer, args=args)
Beispiel #11
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Set seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if device == torch.device("cuda"):
        torch.cuda.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    # Set the gpu
    learner = MetaLearner(log, tb_writer, args)
    learner.train()
Beispiel #12
0
def main(args):
    # Create dir
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./pytorch_models"):
        os.makedirs("./pytorch_models")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Initialize policy
    predator_agents = [
        set_policy(env, tb_writer, log, args, name="predator", i_agent=i_agent)
        for i_agent in range(args.n_predator)
    ]

    prey_agents = [
        set_policy(env, tb_writer, log, args, name="prey", i_agent=i_agent)
        for i_agent in range(args.n_prey)
    ]

    # Start training
    train(predator_agents=predator_agents,
          prey_agents=prey_agents,
          env=env,
          log=log,
          tb_writer=tb_writer,
          args=args)
Beispiel #13
0
def main(args):
    # Check arguments
    assert args.n_agent == 2, "Only two agents are supported"

    # Create directories
    if not os.path.exists("./logs"):
        os.makedirs("./logs")
    if not os.path.exists("./data"):
        os.makedirs("./data")

    # Set logs
    tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name))
    log = set_log(args)

    # Create env
    env = make_env(args)

    # Set seeds
    env.seed(args.seed)
    np.random.seed(args.seed)

    # Set agents
    agents = [
        Agent(env=env,
              tb_writer=tb_writer,
              log=log,
              args=args,
              name="agent",
              i_agent=i_agent) for i_agent in range(args.n_agent)
    ]

    # Get true return by Monte Carlo estimate
    if args.estimate_option == "montecarlo":
        from trainer.montecarlo import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    elif args.estimate_option == "naive":
        from trainer.naive import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    elif args.estimate_option == "ours":
        from trainer.ours_mp import train
        # from trainer.ours import train
        table = train(agents=agents,
                      env=env,
                      log=log,
                      tb_writer=tb_writer,
                      args=args)
    else:
        raise ValueError("Invalid option")

    # Save and vis result
    save_name = args.estimate_option + "_" + str(args.decay_max_timesteps)
    if args.estimate_option == "ours":
        save_name += "_" + str(args.future_max_timesteps)
    np.save("./data/" + save_name + ".npy", table)
Beispiel #14
0
def main(args):
    # Setup for logging
    tb_writer = SummaryWriter('./logs/tb_{}'.format(
        args.log_name))  # Tensorboard logging
    log = set_log(args)

    # Setup before meta-train starts
    sampler = BatchSampler(env_name=args.env_name,
                           batch_size=args.fast_batch_size,
                           num_workers=args.num_workers,
                           args=args)

    # NOTE Observation space is a list with [predator0, predator1, ..., prey]
    # Thus using the index of 0
    policy = NormalMLPPolicy(
        input_size=int(np.prod(sampler.envs.observation_space[0].shape)),
        output_size=int(np.prod(sampler.envs.action_space[0].shape)),
        hidden_sizes=(args.hidden_size, ) * args.num_layers)

    baseline = LinearFeatureBaseline(
        input_size=int(np.prod(sampler.envs.observation_space[0].shape)))

    meta_learner = MetaLearner(sampler,
                               policy,
                               baseline,
                               gamma=args.gamma,
                               fast_lr=args.fast_lr,
                               tau=args.tau,
                               device=args.device,
                               args=args,
                               log=log,
                               tb_writer=tb_writer)

    # meta_learner.load(
    #     filename="theta_200", directory="./pytorch_models")

    meta_tester = MetaTester(sampler,
                             policy,
                             baseline,
                             gamma=args.gamma,
                             fast_lr=args.fast_lr,
                             tau=args.tau,
                             device=args.device,
                             args=args,
                             log=log,
                             tb_writer=tb_writer)

    prey = Prey(env=sampler._env,
                args=args,
                log=log,
                tb_writer=tb_writer,
                name="prey",
                i_agent=0)

    # Meta-train starts
    iteration = 0
    while True:
        # Sample train and validation episode
        tasks = sampler.sample_tasks(num_tasks=args.meta_batch_size,
                                     test=False)
        episodes = meta_learner.sample(tasks,
                                       prey,
                                       first_order=args.first_order,
                                       iteration=iteration)

        # Train meta-policy
        meta_learner.step(episodes=episodes, args=args)

        # Test meta-policy
        if iteration % 10 == 0:
            test_tasks = sampler.sample_tasks(num_tasks=5, test=True)
            meta_tester.few_shot_adaptation(meta_policy=meta_learner.policy,
                                            tasks=test_tasks,
                                            first_order=args.first_order,
                                            iteration=iteration,
                                            prey=prey)

        if iteration % 100 == 0:
            meta_learner.save(iteration)

        iteration += 1