Esempio n. 1
0
    def from_resume(mode, args):
        """
        :param mode: Script name
        :param args: Dict[str, Any], static args
        :return: args, log_id, initial_step_count
        """
        resume = args.resume
        log_dir_helper = LogDirHelper(args.resume)
        with open(log_dir_helper.args_file_path(), "r") as args_file:
            args = DotDict(json.load(args_file))
            args.resume = resume

        args.load_network = log_dir_helper.latest_network_path()
        args.load_optim = log_dir_helper.latest_optim_path()
        initial_step_count = log_dir_helper.latest_epoch()

        if args.agent:
            name = args.agent
        else:
            name = args.actor_host

        log_id = Init.make_log_id(
            args.tag,
            mode,
            name,
            args.netbody,
            timestamp=log_dir_helper.timestamp(),
        )
        log_id_path = Init.log_id_dir(args.logdir, args.env, log_id)
        return args, log_id_path, initial_step_count
Esempio n. 2
0
def parse_args():
    from docopt import docopt
    args = docopt(__doc__)
    args = {k.strip('--').replace('-', '_'): v for k, v in args.items()}
    del args['h']
    del args['help']
    args = DotDict(args)
    args.epoch = int(float(args.epoch))
    args.gpu_id = int(args.gpu_id)
    args.seed = int(args.seed)
    return args
Esempio n. 3
0
def parse_args():
    from docopt import docopt

    args = docopt(__doc__)
    args = {k.strip("--").replace("-", "_"): v for k, v in args.items()}
    del args["h"]
    del args["help"]
    args = DotDict(args)
    args.epoch = int(float(args.epoch))
    args.gpu_id = int(args.gpu_id)
    args.seed = int(args.seed)
    return args
Esempio n. 4
0
def main(local_args):
    """
    Run distributed training.

    :param local_args: Dict[str, Any]
    :return:
    """
    log_id_dir = local_args.log_id_dir
    initial_step_count = local_args.initial_step_count

    R.load_extern_classes(log_id_dir)
    logger = Init.setup_logger(log_id_dir, "train{}".format(GLOBAL_RANK))

    helper = LogDirHelper(log_id_dir)
    with open(helper.args_file_path(), "r") as args_file:
        args = DotDict(json.load(args_file))

    if local_args.resume:
        args = DotDict({**args, **vars(local_args)})

    dist.init_process_group(
        backend="nccl",
        init_method=args.init_method,
        world_size=WORLD_SIZE,
        rank=LOCAL_RANK,
    )
    logger.info("Rank {} initialized.".format(GLOBAL_RANK))

    if LOCAL_RANK == 0:
        container = DistribHost(
            args,
            logger,
            log_id_dir,
            initial_step_count,
            LOCAL_RANK,
            GLOBAL_RANK,
            WORLD_SIZE,
        )
    else:
        container = DistribWorker(
            args,
            logger,
            log_id_dir,
            initial_step_count,
            LOCAL_RANK,
            GLOBAL_RANK,
            WORLD_SIZE,
        )

    try:
        container.run()
    finally:
        container.close()
Esempio n. 5
0
def main(args):
    """
    Run an evaluation training.

    :param args: Dict[str, Any]
    :return:
    """
    # construct logging objects
    args = DotDict(args)

    Init.print_ascii_logo()
    logger = Init.setup_logger(args.logdir, "eval")
    Init.log_args(logger, args)
    R.load_extern_classes(args.logdir)

    container = RenderContainer(
        args.actor,
        args.epoch,
        args.start,
        args.end,
        logger,
        args.logdir,
        args.gpu_id,
        args.seed,
        args.manager,
    )
    try:
        container.run()
    finally:
        container.close()
Esempio n. 6
0
    def from_prompt(args):
        if args.agent:
            agent_cls = R.lookup_agent(args.agent)
            agent_args = agent_cls.prompt(provided=args)
        else:
            h = R.lookup_actor(args.actor_host)
            w = R.lookup_actor(args.actor_worker)
            l = R.lookup_learner(args.learner)
            e = R.lookup_exp(args.exp)
            agent_args = {
                **h.prompt(args),
                **w.prompt(args),
                **l.prompt(args),
                **e.prompt(args),
            }

        env_cls = R.lookup_env(args.env)
        rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm)

        env_args = env_cls.prompt(provided=args)
        rwdnorm_args = rwdnorm_cls.prompt(provided=args)
        if args.custom_network:
            net_args = R.lookup_network(args.custom_network).prompt()
        else:
            net_args = R.prompt_modular_args(args)
        args = DotDict({
            **args,
            **agent_args,
            **env_args,
            **rwdnorm_args,
            **net_args
        })
        return args
Esempio n. 7
0
    def main(mode, args):
        args = DotDict(args)

        if not args.prompt:
            args = Init.from_defaults(args)
        if args.config:
            args = Init.from_config(args)
        if args.prompt:
            args = Init.from_prompt(args)

        if args.agent:
            name = args.agent
        else:
            name = args.actor_host

        log_id = Init.make_log_id(args.tag, mode, name, args.netbody)
        log_id_dir = Init.log_id_dir(args.logdir, args.env, log_id)
        initial_step = 0

        if args.resume:
            args, log_id_dir, initial_step = Init.from_resume(mode, args)

        Init.print_ascii_logo()
        Init.make_log_dirs(log_id_dir)
        Init.write_args_file(log_id_dir, args)
        logger = Init.setup_logger(log_id_dir)
        Init.log_args(logger, args)
        return args, log_id_dir, initial_step, logger
Esempio n. 8
0
def main(args):
    """
    Run an evaluation.
    :param args: Dict[str, Any]
    :return:
    """
    args = DotDict(args)

    Init.print_ascii_logo()
    logger = Init.setup_logger(args.logdir, 'eval')
    Init.log_args(logger, args)
    R.load_extern_classes(args.logdir)

    eval_container = EvalContainer(
        args.actor,
        args.epoch,
        logger,
        args.logdir,
        args.gpu_id,
        args.nb_episode,
        args.start,
        args.end,
        args.seed,
        args.manager
    )
    try:
        eval_container.run()
    finally:
        eval_container.close()
Esempio n. 9
0
    def from_defaults(args):
        if args.agent:
            agent_cls = R.lookup_agent(args.agent)
            agent_args = agent_cls.args
        else:
            h = R.lookup_actor(args.actor_host)
            w = R.lookup_actor(args.actor_worker)
            l = R.lookup_learner(args.learner)
            e = R.lookup_exp(args.exp)
            agent_args = {**h.args, **w.args, **l.args, **e.args}

        env_cls = R.lookup_env(args.env)
        rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm)

        env_args = env_cls.args
        rwdnorm_args = rwdnorm_cls.args
        if args.custom_network:
            net_args = R.lookup_network(args.custom_network).args
        else:
            net_args = R.lookup_modular_args(args)
        args = DotDict({
            **args,
            **agent_args,
            **env_args,
            **rwdnorm_args,
            **net_args
        })

        return args
Esempio n. 10
0
def main(args):
    """
    Generate SC2 replays.

    :param args: Dict[str, Any]
    :return:
    """

    print_ascii_logo()
    print('Saving replays... Press Ctrl+C to stop.')

    log_dir_helper = LogDirHelper(args.log_id_dir)

    with open(log_dir_helper.args_file_path(), 'r') as args_file:
        train_args = DotDict(json.load(args_file))

    engine = env_registry.lookup_engine(train_args.env)
    assert engine == 'AdeptSC2Env', "replay_gen_sc2.py is only for SC2."

    # construct env
    env = SubProcEnvManager.from_args(
        train_args,
        seed=args.seed,
        nb_env=1,
        registry=env_registry,
        sc2_replay_dir=log_dir_helper.epoch_path_at_epoch(args.epoch),
        sc2_render=args.render)

    output_space = agent_registry.lookup_output_space(train_args.agent,
                                                      env.action_space)
    if args.custom_network:
        network = net_registry.lookup_custom_net(
            train_args.custom_network).from_args(train_args,
                                                 env.observation_space,
                                                 output_space, net_registry)
    else:
        network = ModularNetwork.from_args(train_args, env.observation_space,
                                           output_space, net_registry)

    # create an agent (add act_eval method)
    device = torch.device("cuda:{}".format(args.gpu_id) if (
        torch.cuda.is_available() and args.gpu_id >= 0) else "cpu")
    torch.backends.cudnn.benchmark = True
    agent = agent_registry.lookup_agent(train_args.agent).from_args(
        train_args,
        network,
        device,
        env_registry.lookup_reward_normalizer(train_args.env),
        env.gpu_preprocessor,
        env_registry.lookup_policy(env.engine)(env.action_space),
        nb_env=1)

    # create a rendering container
    # TODO: could terminate after a configurable number of replays instead of running indefinitely
    renderer = ReplayGenerator(agent, device, env)
    try:
        renderer.run()
    finally:
        env.close()
Esempio n. 11
0
    def __init__(
        self,
        actor,
        epoch_id,
        start,
        end,
        logger,
        log_id_dir,
        gpu_id,
        seed,
        manager,
        extra_args={},
    ):
        self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir)
        self.train_args = train_args = log_dir_helper.load_args()
        self.train_args = DotDict({**self.train_args, **extra_args})
        self.device = device = self._device_from_gpu_id(gpu_id)
        self.logger = logger

        if epoch_id:
            epoch_ids = [epoch_id]
        else:
            epoch_ids = self.log_dir_helper.epochs()
            epoch_ids = filter(lambda eid: eid >= start, epoch_ids)
            if end != -1.0:
                epoch_ids = filter(lambda eid: eid <= end, epoch_ids)
            epoch_ids = list(epoch_ids)
        self.epoch_ids = epoch_ids

        engine = REGISTRY.lookup_engine(train_args.env)
        env_cls = REGISTRY.lookup_env(train_args.env)
        manager_cls = REGISTRY.lookup_manager(manager)
        self.env_mgr = manager_cls.from_args(self.train_args,
                                             engine,
                                             env_cls,
                                             seed=seed,
                                             nb_env=1)
        if train_args.agent:
            agent = train_args.agent
        else:
            agent = train_args.actor_host
        output_space = REGISTRY.lookup_output_space(agent,
                                                    self.env_mgr.action_space)
        actor_cls = REGISTRY.lookup_actor(actor)
        self.actor = actor_cls.from_args(actor_cls.prompt(),
                                         self.env_mgr.action_space)

        self.network = self._init_network(
            train_args,
            self.env_mgr.observation_space,
            self.env_mgr.gpu_preprocessor,
            output_space,
            REGISTRY,
        ).to(device)
Esempio n. 12
0
def parse_args():
    from docopt import docopt
    args = docopt(__doc__)
    args = {k.strip('--').replace('-', '_'): v for k, v in args.items()}
    del args['h']
    del args['help']
    args = DotDict(args)
    args.logdir = parse_path(args.logdir)
    # TODO implement Option utility
    epoch_option = parse_none(args.epoch)
    if epoch_option:
        args.epoch = int(float(epoch_option))
    else:
        args.epoch = epoch_option
    args.gpu_id = int(args.gpu_id)
    args.nb_episode = int(args.nb_episode)
    args.start = float(args.start)
    args.end = float(args.end)
    args.seed = int(args.seed)
    return args
Esempio n. 13
0
def parse_args():
    from docopt import docopt

    args = docopt(__doc__)
    args = {k.strip("--").replace("-", "_"): v for k, v in args.items()}
    del args["h"]
    del args["help"]
    args = DotDict(args)

    args.logdir = parse_path(args.logdir)

    # TODO implement Option utility
    epoch_option = parse_none(args.epoch)
    if epoch_option:
        args.epoch = int(float(epoch_option))
    else:
        args.epoch = epoch_option
    args.start = int(float(args.start))
    args.end = int(float(args.end))
    args.gpu_id = int(args.gpu_id)
    args.seed = int(args.seed)
    return args
Esempio n. 14
0
def main(args):
    """
    Run distributed training.
    :param args: Dict[str, Any]
    :return:
    """
    args = DotDict(args)

    dist_world_size = args.nb_proc * args.nb_node

    current_env = os.environ.copy()
    current_env["MASTER_ADDR"] = args.master_addr
    current_env["MASTER_PORT"] = str(args.master_port)
    current_env["WORLD_SIZE"] = str(dist_world_size)

    args, log_id_dir, initial_step, logger = Init.main(MODE, args)
    R.save_extern_classes(log_id_dir)

    processes = []

    for local_rank in range(0, args.nb_proc):
        # each process's rank
        dist_rank = args.nb_proc * args.node_rank + local_rank
        current_env["RANK"] = str(dist_rank)
        current_env["LOCAL_RANK"] = str(local_rank)

        # spawn the processes
        if not args.resume:
            cmd = [
                sys.executable,
                "-u",
                "-m",
                "adept.scripts._distrib",
                "--log-id-dir={}".format(log_id_dir),
            ]
        else:
            cmd = [
                sys.executable,
                "-u",
                "-m",
                "adept.scripts._distrib",
                "--log-id-dir={}".format(log_id_dir),
                "--resume={}".format(True),
                "--load-network={}".format(args.load_network),
                "--load-optim={}".format(args.load_optim),
                "--initial-step-count={}".format(initial_step),
                "--init-method={}".format(args.init_method),
            ]
        if args.custom_network:
            cmd += ["--custom-network", args.custom_network]

        process = subprocess.Popen(cmd, env=current_env)
        processes.append(process)

    for process in processes:
        process.wait()

    if args.eval:
        from adept.scripts.evaluate import main

        eval_args = {
            "log_id_dir": log_id_dir,
            "gpu_id": 0,
            "nb_episode": 30,
        }
        if args.custom_network:
            eval_args["custom_network"] = args.custom_network
        main(eval_args)
Esempio n. 15
0
def parse_args():
    from docopt import docopt

    args = docopt(__doc__)
    args = {k.strip("--").replace("-", "_"): v for k, v in args.items()}
    del args["h"]
    del args["help"]

    args = DotDict(args)

    args.nb_node = int(args.nb_node)
    args.node_rank = int(args.node_rank)
    args.nb_proc = int(args.nb_proc)
    args.master_port = int(args.master_port)

    if args.resume:
        args.resume = parse_path(args.resume)
        return args

    if args.config:
        args.config = parse_path(args.config)

    args.logdir = parse_path(args.logdir)
    args.nb_env = int(args.nb_env)
    args.seed = int(args.seed)
    args.nb_step = int(float(args.nb_step))
    args.tag = parse_none(args.tag)
    args.nb_eval_env = int(args.nb_eval_env)
    args.summary_freq = int(args.summary_freq)
    args.lr = float(args.lr)
    args.epoch_len = int(float(args.epoch_len))
    args.profile = bool(args.profile)
    return args
Esempio n. 16
0
def parse_args():
    from docopt import docopt
    args = docopt(__doc__)
    args = {k.strip('--').replace('-', '_'): v for k, v in args.items()}
    del args['h']
    del args['help']
    args = DotDict(args)

    # Ignore other args if resuming
    if args.resume:
        args.resume = parse_path(args.resume)
        return args

    if args.config:
        args.config = parse_path(args.config)

    args.logdir = parse_path(args.logdir)
    args.nb_env = int(args.nb_env)
    args.seed = int(args.seed)
    args.nb_step = int(float(args.nb_step))
    args.tag = parse_none(args.tag)
    args.summary_freq = int(args.summary_freq)
    args.lr = float(args.lr)
    args.epoch_len = int(float(args.epoch_len))
    args.profile = bool(args.profile)

    args.ray_addr = parse_none(args.ray_addr)
    args.nb_learners = int(args.nb_learners)
    args.nb_workers = int(args.nb_workers)
    args.learner_cpu_alloc = int(args.learner_cpu_alloc)
    args.learner_gpu_alloc = float(args.learner_gpu_alloc)
    args.worker_cpu_alloc = int(args.worker_cpu_alloc)
    args.worker_gpu_alloc = float(args.worker_gpu_alloc)

    args.nb_learn_batch = int(args.nb_learn_batch)
    args.rollout_queue_size = int(args.rollout_queue_size)

    # arg checking
    assert args.nb_learn_batch <= args.nb_workers, 'WARNING: nb_learn_batch must be <= nb_workers. Got {} <= {}' \
           .format(args.nb_learn_batch, args.nb_workers)
    return args
Esempio n. 17
0
def parse_args():
    from docopt import docopt
    args = docopt(__doc__)
    args = {k.strip('--').replace('-', '_'): v for k, v in args.items()}
    del args['h']
    del args['help']
    args = DotDict(args)

    # Ignore other args if resuming
    if args.resume:
        args.resume = parse_path(args.resume)
        return args

    if args.config:
        args.config = parse_path(args.config)

    args.logdir = parse_path(args.logdir)
    args.gpu_id = int(args.gpu_id)
    args.nb_env = int(args.nb_env)
    args.seed = int(args.seed)
    args.nb_step = int(float(args.nb_step))
    args.tag = parse_none(args.tag)
    args.nb_eval_env = int(args.nb_eval_env)
    args.summary_freq = int(args.summary_freq)
    args.lr = float(args.lr)
    args.warmup = int(float(args.warmup))
    args.epoch_len = int(float(args.epoch_len))
    args.profile = bool(args.profile)
    return args
Esempio n. 18
0
 def from_config(args):
     with open(args.config, "r") as args_file:
         config_args = json.load(args_file)
     return DotDict({**args, **config_args})
Esempio n. 19
0
 def load_args(self):
     with open(self.args_file_path()) as args_file:
         return DotDict(json.load(args_file))