Example #1
0
def main(args):
    """
    Run an evaluation training.

    :param args: Dict[str, Any]
    :return:
    """
    # construct logging objects
    args = DotDict(args)

    Init.print_ascii_logo()
    logger = Init.setup_logger(args.logdir, "eval")
    Init.log_args(logger, args)
    R.load_extern_classes(args.logdir)

    container = RenderContainer(
        args.actor,
        args.epoch,
        args.start,
        args.end,
        logger,
        args.logdir,
        args.gpu_id,
        args.seed,
        args.manager,
    )
    try:
        container.run()
    finally:
        container.close()
Example #2
0
def main(args):
    """
    Run an evaluation.
    :param args: Dict[str, Any]
    :return:
    """
    args = DotDict(args)

    Init.print_ascii_logo()
    logger = Init.setup_logger(args.logdir, 'eval')
    Init.log_args(logger, args)
    R.load_extern_classes(args.logdir)

    eval_container = EvalContainer(
        args.actor,
        args.epoch,
        logger,
        args.logdir,
        args.gpu_id,
        args.nb_episode,
        args.start,
        args.end,
        args.seed,
        args.manager
    )
    try:
        eval_container.run()
    finally:
        eval_container.close()
Example #3
0
    def __init__(
            self,
            eval_actor,
            epoch_id,
            logger,
            log_id_dir,
            gpu_id,
            nb_episode,
            start,
            end,
            seed,
            manager
    ):
        self.log_dir_helper = log_dir_helper = LogDirHelper(log_id_dir)
        self.train_args = train_args = log_dir_helper.load_args()
        self.device = device = self._device_from_gpu_id(gpu_id)
        self.logger = logger

        if epoch_id:
            epoch_ids = [epoch_id]
        else:
            epoch_ids = self.log_dir_helper.epochs()
            epoch_ids = filter(lambda eid: eid >= start, epoch_ids)
            if end != -1.:
                epoch_ids = filter(lambda eid: eid <= end, epoch_ids)
            epoch_ids = list(epoch_ids)
        self.epoch_ids = epoch_ids

        engine = REGISTRY.lookup_engine(train_args.env)
        env_cls = REGISTRY.lookup_env(train_args.env)
        mgr_cls = REGISTRY.lookup_manager(manager)
        self.env_mgr = env_mgr = SubProcEnvManager.from_args(
            self.train_args,
            engine,
            env_cls,
            seed=seed,
            nb_env=nb_episode
        )
        if train_args.agent:
            agent = train_args.agent
        else:
            agent = train_args.actor_host
        output_space = REGISTRY.lookup_output_space(
            agent, env_mgr.action_space
        )
        actor_cls = REGISTRY.lookup_actor(eval_actor)
        self.actor = actor_cls.from_args(
            actor_cls.prompt(),
            env_mgr.action_space
        )

        self.network = self._init_network(
            train_args,
            env_mgr.observation_space,
            env_mgr.gpu_preprocessor,
            output_space,
            REGISTRY
        ).to(device)
Example #4
0
    def from_defaults(args):
        if args.agent:
            agent_cls = R.lookup_agent(args.agent)
            agent_args = agent_cls.args
        else:
            h = R.lookup_actor(args.actor_host)
            w = R.lookup_actor(args.actor_worker)
            l = R.lookup_learner(args.learner)
            e = R.lookup_exp(args.exp)
            agent_args = {**h.args, **w.args, **l.args, **e.args}

        env_cls = R.lookup_env(args.env)
        rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm)

        env_args = env_cls.args
        rwdnorm_args = rwdnorm_cls.args
        if args.custom_network:
            net_args = R.lookup_network(args.custom_network).args
        else:
            net_args = R.lookup_modular_args(args)
        args = DotDict({
            **args,
            **agent_args,
            **env_args,
            **rwdnorm_args,
            **net_args
        })

        return args
Example #5
0
    def from_prompt(args):
        if args.agent:
            agent_cls = R.lookup_agent(args.agent)
            agent_args = agent_cls.prompt(provided=args)
        else:
            h = R.lookup_actor(args.actor_host)
            w = R.lookup_actor(args.actor_worker)
            l = R.lookup_learner(args.learner)
            e = R.lookup_exp(args.exp)
            agent_args = {
                **h.prompt(args),
                **w.prompt(args),
                **l.prompt(args),
                **e.prompt(args),
            }

        env_cls = R.lookup_env(args.env)
        rwdnorm_cls = R.lookup_reward_normalizer(args.rwd_norm)

        env_args = env_cls.prompt(provided=args)
        rwdnorm_args = rwdnorm_cls.prompt(provided=args)
        if args.custom_network:
            net_args = R.lookup_network(args.custom_network).prompt()
        else:
            net_args = R.prompt_modular_args(args)
        args = DotDict({
            **args,
            **agent_args,
            **env_args,
            **rwdnorm_args,
            **net_args
        })
        return args
Example #6
0
def main(local_args):
    """
    Run distributed training.

    :param local_args: Dict[str, Any]
    :return:
    """
    log_id_dir = local_args.log_id_dir
    initial_step_count = local_args.initial_step_count

    R.load_extern_classes(log_id_dir)
    logger = Init.setup_logger(log_id_dir, "train{}".format(GLOBAL_RANK))

    helper = LogDirHelper(log_id_dir)
    with open(helper.args_file_path(), "r") as args_file:
        args = DotDict(json.load(args_file))

    if local_args.resume:
        args = DotDict({**args, **vars(local_args)})

    dist.init_process_group(
        backend="nccl",
        init_method=args.init_method,
        world_size=WORLD_SIZE,
        rank=LOCAL_RANK,
    )
    logger.info("Rank {} initialized.".format(GLOBAL_RANK))

    if LOCAL_RANK == 0:
        container = DistribHost(
            args,
            logger,
            log_id_dir,
            initial_step_count,
            LOCAL_RANK,
            GLOBAL_RANK,
            WORLD_SIZE,
        )
    else:
        container = DistribWorker(
            args,
            logger,
            log_id_dir,
            initial_step_count,
            LOCAL_RANK,
            GLOBAL_RANK,
            WORLD_SIZE,
        )

    try:
        container.run()
    finally:
        container.close()
Example #7
0
    def __init__(self, args, log_id_dir, initial_step_count, rank):
        seed = args.seed \
            if rank == 0 \
            else args.seed + args.nb_env * rank
        print('Worker {} using seed {}'.format(rank, seed))

        # load saved registry classes
        REGISTRY.load_extern_classes(log_id_dir)

        # ENV
        engine = REGISTRY.lookup_engine(args.env)
        env_cls = REGISTRY.lookup_env(args.env)
        mgr_cls = REGISTRY.lookup_manager(args.manager)
        env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed)

        # NETWORK
        torch.manual_seed(args.seed)
        device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")
        output_space = REGISTRY.lookup_output_space(args.actor_worker,
                                                    env_mgr.action_space)
        if args.custom_network:
            net_cls = REGISTRY.lookup_network(args.custom_network)
        else:
            net_cls = ModularNetwork
        net = net_cls.from_args(args, env_mgr.observation_space, output_space,
                                env_mgr.gpu_preprocessor, REGISTRY)
        actor_cls = REGISTRY.lookup_actor(args.actor_worker)
        actor = actor_cls.from_args(args, env_mgr.action_space)
        builder = actor_cls.exp_spec_builder(env_mgr.observation_space,
                                             env_mgr.action_space,
                                             net.internal_space(),
                                             env_mgr.nb_env)
        exp = REGISTRY.lookup_exp(args.exp).from_args(args, builder)

        self.actor = actor
        self.exp = exp.to(device)
        self.nb_step = args.nb_step
        self.env_mgr = env_mgr
        self.nb_env = args.nb_env
        self.network = net.to(device)
        self.device = device
        self.initial_step_count = initial_step_count

        # TODO: this should be set to eval after some number of training steps
        self.network.train()

        # SETUP state variables for run
        self.step_count = self.initial_step_count
        self.global_step_count = self.initial_step_count
        self.ep_rewards = torch.zeros(self.nb_env)
        self.rank = rank

        self.obs = dtensor_to_dev(self.env_mgr.reset(), self.device)
        self.internals = listd_to_dlist([
            self.network.new_internals(self.device) for _ in range(self.nb_env)
        ])
        self.start_time = time()
        self._weights_synced = False
Example #8
0
def main(args):
    """
    Run local training.
    :param args: Dict[str, Any]
    :return:
    """
    args, log_id_dir, initial_step, logger = Init.main(MODE, args)
    R.save_extern_classes(log_id_dir)

    container = Local(args, logger, log_id_dir, initial_step)

    if args.profile:
        try:
            from pyinstrument import Profiler
        except:
            raise ImportError(
                'You must install pyinstrument to use profiling.')
        container.nb_step = 10e3
        profiler = Profiler()
        profiler.start()

    try:
        container.run()
    finally:
        if args.profile:
            profiler.stop()
            print(profiler.output_text(unicode=True, color=True))
        container.close()

    if args.eval:
        from adept.scripts.evaluate import main
        eval_args = {
            'log_id_dir': log_id_dir,
            'gpu_id': 0,
            'nb_episode': 30,
        }
        if args.custom_network:
            eval_args['custom_network'] = args.custom_network
        main(eval_args)
Example #9
0
def register_learner(learner_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_learner(learner_cls)
Example #10
0
    def __init__(
            self,
            args,
            log_id_dir,
            initial_step_count,
            rank=0,
    ):
        # ARGS TO STATE VARS
        self._args = args
        self.nb_learners = args.nb_learners
        self.nb_workers = args.nb_workers
        self.rank = rank
        self.nb_step = args.nb_step
        self.nb_env = args.nb_env
        self.initial_step_count = initial_step_count
        self.epoch_len = args.epoch_len
        self.summary_freq = args.summary_freq
        self.nb_learn_batch = args.nb_learn_batch
        self.rollout_queue_size = args.rollout_queue_size
        # can be none if rank != 0
        self.log_id_dir = log_id_dir

        # load saved registry classes
        REGISTRY.load_extern_classes(log_id_dir)

        # ENV (temporary)
        env_cls = REGISTRY.lookup_env(args.env)
        env = env_cls.from_args(args, 0)
        env_action_space, env_observation_space, env_gpu_preprocessor = \
            env.action_space, env.observation_space, env.gpu_preprocessor
        env.close()

        # NETWORK
        torch.manual_seed(args.seed)
        device = torch.device("cuda")  # ray handles gpus
        torch.backends.cudnn.benchmark = True
        output_space = REGISTRY.lookup_output_space(
            args.actor_worker, env_action_space)
        if args.custom_network:
            net_cls = REGISTRY.lookup_network(args.custom_network)
        else:
            net_cls = ModularNetwork
        net = net_cls.from_args(
            args,
            env_observation_space,
            output_space,
            env_gpu_preprocessor,
            REGISTRY
        )
        self.network = net.to(device)
        # TODO: this is a hack, remove once queuer puts rollouts on the correct device
        self.network.device = device
        self.device = device
        self.network.train()

        # OPTIMIZER
        def optim_fn(x):
            return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99)
        if args.nb_learners > 1:
            self.optimizer = NCCLOptimizer(optim_fn, self.network, self.nb_learners)
        else:
            self.optimizer = optim_fn(self.network.parameters())

        # LEARNER / EXP
        rwd_norm = REGISTRY.lookup_reward_normalizer(
            args.rwd_norm).from_args(args)
        actor_cls = REGISTRY.lookup_actor(args.actor_host)
        builder = actor_cls.exp_spec_builder(
            env.observation_space,
            env.action_space,
            net.internal_space(),
            args.nb_env * args.nb_learn_batch
        )
        w_builder = REGISTRY.lookup_actor(args.actor_worker).exp_spec_builder(
            env.observation_space,
            env.action_space,
            net.internal_space(),
            args.nb_env
        )
        actor = actor_cls.from_args(args, env.action_space)
        learner = REGISTRY.lookup_learner(args.learner).from_args(args, rwd_norm)

        exp_cls = REGISTRY.lookup_exp(args.exp).from_args(args, builder)

        self.actor = actor
        self.learner = learner
        self.exp = exp_cls.from_args(args, builder).to(device)

        # Rank 0 setup, load network/optimizer and create SummaryWriter/Saver
        if rank == 0:
            if args.load_network:
                self.network = self.load_network(self.network, args.load_network)
                print('Reloaded network from {}'.format(args.load_network))
            if args.load_optim:
                self.optimizer = self.load_optim(self.optimizer, args.load_optim)
                print('Reloaded optimizer from {}'.format(args.load_optim))

            print('Network parameters: ' + str(self.count_parameters(net)))
            self.summary_writer = SummaryWriter(log_id_dir)
            self.saver = SimpleModelSaver(log_id_dir)
Example #11
0
def register_actor(actor_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_actor(actor_cls)
Example #12
0
def register_manager(manager_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_manager(manager_cls)
Example #13
0
def register_submodule(submod_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_submodule(submod_cls)
Example #14
0
def register_network(network_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_network(network_cls)
Example #15
0
    def __init__(self, args, logger, log_id_dir, initial_step_count):
        # ENV
        engine = REGISTRY.lookup_engine(args.env)
        env_cls = REGISTRY.lookup_env(args.env)
        mgr_cls = REGISTRY.lookup_manager(args.manager)
        env_mgr = mgr_cls.from_args(args, engine, env_cls)

        # NETWORK
        torch.manual_seed(args.seed)
        if torch.cuda.is_available() and args.gpu_id >= 0:
            device = torch.device("cuda:{}".format(args.gpu_id))
            torch.backends.cudnn.benchmark = True
        else:
            device = torch.device("cpu")
        output_space = REGISTRY.lookup_output_space(args.agent,
                                                    env_mgr.action_space)
        if args.custom_network:
            net_cls = REGISTRY.lookup_network(args.custom_network)
        else:
            net_cls = ModularNetwork
        net = net_cls.from_args(
            args,
            env_mgr.gpu_preprocessor.observation_space,
            output_space,
            env_mgr.gpu_preprocessor,
            REGISTRY,
        )
        logger.info("Network parameters: " + str(self.count_parameters(net)))

        def optim_fn(x):
            if args.optim == "RMSprop":
                return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99)
            elif args.optim == "Adam":
                return torch.optim.Adam(x, lr=args.lr, eps=1e-5)

        def warmup_schedule(back_step):
            return back_step / args.warmup if back_step < args.warmup else 1.0

        # AGENT
        rwd_norm = REGISTRY.lookup_reward_normalizer(
            args.rwd_norm).from_args(args)
        agent_cls = REGISTRY.lookup_agent(args.agent)
        builder = agent_cls.exp_spec_builder(
            env_mgr.observation_space,
            env_mgr.action_space,
            net.internal_space(),
            env_mgr.nb_env,
        )
        agent = agent_cls.from_args(args, rwd_norm, env_mgr.action_space,
                                    builder)

        self.agent = agent.to(device)
        self.nb_step = args.nb_step
        self.env_mgr = env_mgr
        self.nb_env = args.nb_env
        self.network = net.to(device)
        self.optimizer = optim_fn(self.network.parameters())
        self.scheduler = LambdaLR(self.optimizer, warmup_schedule)
        self.device = device
        self.initial_step_count = initial_step_count
        self.log_id_dir = log_id_dir
        self.epoch_len = args.epoch_len
        self.summary_freq = args.summary_freq
        self.logger = logger
        self.summary_writer = SummaryWriter(log_id_dir)
        self.saver = SimpleModelSaver(log_id_dir)
        self.updater = LocalUpdater(self.optimizer, self.network,
                                    args.grad_norm_clip)

        if args.load_network:
            self.network = self.load_network(self.network, args.load_network)
            logger.info("Reloaded network from {}".format(args.load_network))
        if args.load_optim:
            self.optimizer = self.load_optim(self.optimizer, args.load_optim)
            logger.info("Reloaded optimizer from {}".format(args.load_optim))

        self.network.train()
Example #16
0
    def __init__(
        self,
        args,
        logger,
        log_id_dir,
        initial_step_count,
        local_rank,
        global_rank,
        world_size,
    ):
        seed = (
            args.seed
            if global_rank == 0
            else args.seed + args.nb_env * global_rank
        )
        logger.info("Using {} for rank {} seed.".format(seed, global_rank))

        # ENV
        engine = REGISTRY.lookup_engine(args.env)
        env_cls = REGISTRY.lookup_env(args.env)
        mgr_cls = REGISTRY.lookup_manager(args.manager)
        env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed)

        # NETWORK
        torch.manual_seed(args.seed)
        device = torch.device("cuda:{}".format(local_rank))
        output_space = REGISTRY.lookup_output_space(
            args.agent, env_mgr.action_space
        )
        if args.custom_network:
            net_cls = REGISTRY.lookup_network(args.custom_network)
        else:
            net_cls = ModularNetwork
        net = net_cls.from_args(
            args,
            env_mgr.observation_space,
            output_space,
            env_mgr.gpu_preprocessor,
            REGISTRY,
        )
        logger.info("Network parameters: " + str(self.count_parameters(net)))

        def optim_fn(x):
            return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99)

        # AGENT
        rwd_norm = REGISTRY.lookup_reward_normalizer(args.rwd_norm).from_args(
            args
        )
        agent_cls = REGISTRY.lookup_agent(args.agent)
        builder = agent_cls.exp_spec_builder(
            env_mgr.observation_space,
            env_mgr.action_space,
            net.internal_space(),
            env_mgr.nb_env,
        )
        agent = agent_cls.from_args(
            args, rwd_norm, env_mgr.action_space, builder
        )

        self.agent = agent
        self.nb_step = args.nb_step
        self.env_mgr = env_mgr
        self.nb_env = args.nb_env
        self.network = net.to(device)
        self.optimizer = optim_fn(self.network.parameters())
        self.device = device
        self.initial_step_count = initial_step_count
        self.log_id_dir = log_id_dir
        self.epoch_len = args.epoch_len
        self.summary_freq = args.summary_freq
        self.logger = logger
        self.summary_writer = SummaryWriter(
            os.path.join(log_id_dir, "rank{}".format(global_rank))
        )
        self.saver = SimpleModelSaver(log_id_dir)
        self.local_rank = local_rank
        self.global_rank = global_rank
        self.world_size = world_size
        self.updater = DistribUpdater(
            self.optimizer,
            self.network,
            args.grad_norm_clip,
            world_size,
            not args.no_divide,
        )

        if args.load_network:
            self.network = self.load_network(self.network, args.load_network)
            logger.info("Reloaded network from {}".format(args.load_network))
        if args.load_optim:
            self.optimizer = self.load_optim(self.optimizer, args.load_optim)
            logger.info("Reloaded optimizer from {}".format(args.load_optim))

        self.network.train()
Example #17
0
def register_exp(exp_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_exp(exp_cls)
Example #18
0
def register_agent(agent_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_agent(agent_cls)
Example #19
0
    def __init__(
            self,
            args,
            logger,
            log_id_dir,
            initial_step_count,
            local_rank,
            global_rank,
            world_size
    ):
        seed = args.seed \
            if global_rank == 0 \
            else args.seed + args.nb_env * global_rank
        logger.info('Using {} for rank {} seed.'.format(seed, global_rank))

        # ENV
        engine = REGISTRY.lookup_engine(args.env)
        env_cls = REGISTRY.lookup_env(args.env)
        env_mgr = SubProcEnvManager.from_args(args, engine, env_cls, seed=seed)

        # NETWORK
        torch.manual_seed(args.seed)
        device = torch.device("cuda:{}".format(local_rank))
        output_space = REGISTRY.lookup_output_space(
            args.agent, env_mgr.action_space)
        if args.custom_network:
            net_cls = REGISTRY.lookup_network(args.custom_network)
        else:
            net_cls = ModularNetwork
        net = net_cls.from_args(
            args,
            env_mgr.observation_space,
            output_space,
            env_mgr.gpu_preprocessor,
            REGISTRY
        )

        def optim_fn(x):
            return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99)

        # AGENT
        rwd_norm = REGISTRY.lookup_reward_normalizer(
            args.rwd_norm).from_args(args)
        agent_cls = REGISTRY.lookup_agent(args.agent)
        builder = agent_cls.exp_spec_builder(
            env_mgr.observation_space,
            env_mgr.action_space,
            net.internal_space(),
            env_mgr.nb_env
        )
        agent = agent_cls.from_args(
            args,
            rwd_norm,
            env_mgr.action_space,
            builder
        )

        self.agent = agent
        self.nb_step = args.nb_step
        self.env_mgr = env_mgr
        self.nb_env = args.nb_env
        self.network = net.to(device)
        self.optimizer = optim_fn(self.network.parameters())
        self.device = device
        self.initial_step_count = initial_step_count
        self.log_id_dir = log_id_dir
        self.epoch_len = args.epoch_len
        self.summary_freq = args.summary_freq
        self.logger = logger
        self.local_rank = local_rank
        self.global_rank = global_rank
        self.world_size = world_size

        if args.load_network:
            self.network = self.load_network(self.network, args.load_network)
            logger.info('Reloaded network from {}'.format(args.load_network))
        if args.load_optim:
            self.optimizer = self.load_optim(self.optimizer, args.load_optim)
            logger.info('Reloaded optimizer from {}'.format(args.load_optim))

        self.network.train()
Example #20
0
def main(args):
    """
    Run actorlearner training.
    :param args: Dict[str, Any]
    :return:
    """
    args, log_id_dir, initial_step, logger = Init.main(MODE, args)
    R.save_extern_classes(log_id_dir)

    # start ray
    if args.ray_addr is not None:
        ray.init(address=args.ray_addr)
        logger.info('Using Ray on a cluster. Head node address: {}'.format(
            args.ray_addr))
    else:
        logger.info('Using Ray on a single machine.')
        ray.init()

    # create a main learner which logs summaries and saves weights
    main_learner_cls = ActorLearnerHost.as_remote(
        num_cpus=args.learner_cpu_alloc, num_gpus=args.learner_gpu_alloc)
    main_learner = main_learner_cls.remote(args,
                                           log_id_dir,
                                           initial_step,
                                           rank=0)

    # if multiple learners setup nccl
    if args.nb_learners > 1:
        # create N peer learners
        peer_learners = []
        for p_ind in range(args.nb_learners - 1):
            remote_cls = ActorLearnerHost.as_remote(
                num_cpus=args.learner_cpu_alloc,
                num_gpus=args.learner_gpu_alloc)
            # init
            remote = remote_cls.remote(args,
                                       log_id_dir,
                                       initial_step,
                                       rank=p_ind + 1)
            peer_learners.append(remote)

        # figure out main learner node ip
        nccl_addr, nccl_ip, nccl_port = ray.get(
            main_learner._rank0_nccl_port_init.remote())

        # setup all nccls
        nccl_inits = [
            main_learner._nccl_init.remote(nccl_addr, nccl_ip, nccl_port)
        ]
        nccl_inits.extend([
            p._nccl_init.remote(nccl_addr, nccl_ip, nccl_port)
            for p in peer_learners
        ])
        # wait for all
        ray.get(nccl_inits)
        logger.info('NCCL initialized')

        # have all sync parameters
        [f._sync_peer_parameters.remote() for f in peer_learners]
        main_learner._sync_peer_parameters.remote()
    # else just 1 learner
    else:
        peer_learners = []

    # create workers
    workers = [
        ActorLearnerWorker.as_remote(num_cpus=args.worker_cpu_alloc,
                                     num_gpus=args.worker_gpu_alloc).remote(
                                         args, log_id_dir, initial_step, w_ind)
        for w_ind in range(args.nb_workers)
    ]

    # synchronize worker variables
    ray.get(
        main_learner.synchronize_worker_parameters.remote(workers,
                                                          initial_step,
                                                          blocking=True))

    try:
        # startup the run method of all containers
        runs = [main_learner.run.remote(workers, args.profile)]
        runs.extend([f.run.remote(workers) for f in peer_learners])
        done_training = ray.wait(runs)

    finally:
        closes = [main_learner.close.remote()]
        closes.extend([f.close.remote() for f in peer_learners])
        done_closing = ray.wait(closes)

    if args.eval:
        from adept.scripts.evaluate import main
        eval_args = {
            'log_id_dir': log_id_dir,
            'gpu_id': 0,
            'nb_episode': 30,
        }
        if args.custom_network:
            eval_args['custom_network'] = args.custom_network
        main(eval_args)
Example #21
0
def register_env(env_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_env(env_cls)
Example #22
0
def main(args):
    """
    Run distributed training.
    :param args: Dict[str, Any]
    :return:
    """
    args = DotDict(args)

    dist_world_size = args.nb_proc * args.nb_node

    current_env = os.environ.copy()
    current_env["MASTER_ADDR"] = args.master_addr
    current_env["MASTER_PORT"] = str(args.master_port)
    current_env["WORLD_SIZE"] = str(dist_world_size)

    args, log_id_dir, initial_step, logger = Init.main(MODE, args)
    R.save_extern_classes(log_id_dir)

    processes = []

    for local_rank in range(0, args.nb_proc):
        # each process's rank
        dist_rank = args.nb_proc * args.node_rank + local_rank
        current_env["RANK"] = str(dist_rank)
        current_env["LOCAL_RANK"] = str(local_rank)

        # spawn the processes
        if not args.resume:
            cmd = [
                sys.executable,
                "-u",
                "-m",
                "adept.scripts._distrib",
                "--log-id-dir={}".format(log_id_dir),
            ]
        else:
            cmd = [
                sys.executable,
                "-u",
                "-m",
                "adept.scripts._distrib",
                "--log-id-dir={}".format(log_id_dir),
                "--resume={}".format(True),
                "--load-network={}".format(args.load_network),
                "--load-optim={}".format(args.load_optim),
                "--initial-step-count={}".format(initial_step),
                "--init-method={}".format(args.init_method),
            ]
        if args.custom_network:
            cmd += ["--custom-network", args.custom_network]

        process = subprocess.Popen(cmd, env=current_env)
        processes.append(process)

    for process in processes:
        process.wait()

    if args.eval:
        from adept.scripts.evaluate import main

        eval_args = {
            "log_id_dir": log_id_dir,
            "gpu_id": 0,
            "nb_episode": 30,
        }
        if args.custom_network:
            eval_args["custom_network"] = args.custom_network
        main(eval_args)
Example #23
0
def register_reward_norm(rwd_norm_cls):
    from adept.registry import REGISTRY
    REGISTRY.register_reward_normalizer(rwd_norm_cls)