Ejemplo n.º 1
0
def run_sweep_cmds(cmds):
    output_dir = Path.cwd().joinpath('sp_outputs')
    output_dir.mkdir(parents=True, exist_ok=True)
    processes = []
    nbsrs = []
    for idx, cmd in enumerate(cmds):
        logger.info(f'CMD_{idx}:{cmd}')
        p = subprocess.Popen(cmd,
                             shell=True,
                             stderr=subprocess.STDOUT,
                             stdout=subprocess.PIPE)
        # p = subprocess.Popen(shlex.split(cmd),
        #                      stderr=subprocess.STDOUT,
        #                      stdout=subprocess.PIPE)
        processes.append(p)
        nbsrs.append(NBSR(p.stdout))
    try:
        all_done = [False for i in range(len(processes))]
        while True:
            for idx, p in enumerate(processes):
                stime = time.time()
                proc_print = False
                while True:
                    lines = nbsrs[idx].readline(0.2)
                    if lines:
                        if not proc_print:
                            logger.info(
                                f'====================================')
                            logger.info(f'Process {idx}:')
                            proc_print = True
                        logger.info(lines.decode('utf-8'))
                        if time.time() - stime > 10:
                            break
                    else:
                        break
                if p.poll() is not None:
                    all_done[idx] = True
            if all(all_done):
                break
            time.sleep(2)
        logger.info('All processes are completed.')
    except KeyboardInterrupt:
        logger.warning('Keyboard interruption.')
    finally:
        print('Exiting...')
        for p in processes:
            p.terminate()
        sys.exit()
Ejemplo n.º 2
0
def make_vec_env(env_id=None,
                 num_envs=1,
                 seed=1,
                 env_func=None,
                 no_timeout=False,
                 env_kwargs=None,
                 distributed=False,
                 extra_wrapper=None,
                 wrapper_kwargs=None):
    logger.info(f'Creating {num_envs} environments.')
    if env_kwargs is None:
        env_kwargs = {}
    if wrapper_kwargs is None:
        wrapper_kwargs = {}
    if distributed:
        import horovod.torch as hvd
        seed_offset = hvd.rank() * 100000
        seed += seed_offset

    def make_env(env_id, rank, seed, no_timeout, env_kwargs, extra_wrapper,
                 wrapper_kwargs):
        def _thunk():
            if env_func is not None:
                env = env_func(**env_kwargs)
            else:
                env = gym.make(env_id, **env_kwargs)
            if no_timeout:
                env = NoTimeOutEnv(env)
            if extra_wrapper is not None:
                env = extra_wrapper(env, **wrapper_kwargs)
            env.seed(seed + rank)
            return env

        return _thunk

    envs = [
        make_env(env_id, idx, seed, no_timeout, env_kwargs, extra_wrapper,
                 wrapper_kwargs) for idx in range(num_envs)
    ]
    if num_envs > 1:
        envs = ShmemVecEnv(envs, context='spawn')
    else:
        envs = DummyVecEnv(envs)
    return envs
Ejemplo n.º 3
0
def check_if_run_distributed(cfg):
    from easyrl import HOROVOD_AVAILABLE
    if HOROVOD_AVAILABLE:
        import horovod.torch as hvd
        hvd.init()
        if hvd.size() > 1:
            cfg.distributed = True
    if cfg.distributed and not HOROVOD_AVAILABLE:
        logger.error('Horovod is not installed! Will not run in distributed training')
    distributed = HOROVOD_AVAILABLE and cfg.distributed
    cfg.distributed = distributed
    if distributed:
        gpu_id = hvd.local_rank() + cfg.gpu_shift
        if cfg.gpus is not None:
            gpu_id = cfg.gpus[gpu_id]
        logger.info(f'Rank {hvd.local_rank()} GPU ID: {gpu_id}')
        torch.cuda.set_device(gpu_id)
        logger.info(f'Using Horovod for distributed training, number of processes:{hvd.size()}')
    return distributed
Ejemplo n.º 4
0
 def save_model(self, is_best=False, step=None):
     self.save_env(cfg.alg.model_dir)
     data_to_save = {
         'step': step,
         'actor_state_dict': self.actor.state_dict(),
         'q1_state_dict': self.q1.state_dict(),
         'q1_tgt_state_dict': self.q1_tgt.state_dict(),
         'q2_state_dict': self.q2.state_dict(),
         'q2_tgt_state_dict': self.q2_tgt.state_dict(),
         'pi_optim_state_dict': self.pi_optimizer.state_dict(),
         'q_optim_state_dict': self.q_optimizer.state_dict(),
     }
     if cfg.alg.alpha is None:
         data_to_save['log_alpha'] = self.log_alpha
         data_to_save[
             'alpha_optim_state_dict'] = self.alpha_optimizer.state_dict()
     save_model(data_to_save, cfg.alg, is_best=is_best, step=step)
     logger.info(f'Saving the replay buffer to: {self.mem_file}.')
     save_to_pickle(self.memory, self.mem_file)
     logger.info('The replay buffer is saved.')
Ejemplo n.º 5
0
def save_model(data, cfg, is_best=False, step=None):
    if not cfg.save_best_only and step is not None:
        ckpt_file = cfg.model_dir \
            .joinpath('ckpt_{:012d}.pt'.format(step))
    else:
        ckpt_file = None
    if is_best:
        best_model_file = cfg.model_dir.joinpath('model_best.pt')
    else:
        best_model_file = None

    if not cfg.save_best_only:
        saved_model_files = sorted(cfg.model_dir.glob('*.pt'))
        if len(saved_model_files) > cfg.max_saved_models:
            saved_model_files[0].unlink()

    logger.info(f'Exploration steps: {step}')
    for fl in [ckpt_file, best_model_file]:
        if fl is not None:
            logger.info(f'Saving checkpoint: {fl}.')
            torch.save(data, fl)
Ejemplo n.º 6
0
    def restore_cfg(self, skip_params=None, path=None):
        if path is None:
            path = self.data_dir
        hp_file = path.joinpath('hp.json')
        with hp_file.open() as f:
            cfg_stored = json.load(f)
        if skip_params is None:
            skip_params = []

        skip_params.extend(['resume',
                            'resume_step',
                            'render',
                            'test',
                            'test_num',
                            'eval_num_envs',
                            'save_test_traj',
                            'save_dir',
                            'diff_cfg'])
        for key, val in cfg_stored.items():
            if hasattr(self, key) and key not in skip_params:
                setattr(self, key, val)
                logger.info(f'Restoring {key} to {val}.')
Ejemplo n.º 7
0
 def print_param_grad_status(self):
     logger.info('Requires Grad?')
     logger.info('================== Actor ================== ')
     for name, param in self.actor.named_parameters():
         print(f'{name}: {param.requires_grad}')
     logger.info('================== Critic ================== ')
     for name, param in self.critic.named_parameters():
         print(f'{name}: {param.requires_grad}')
Ejemplo n.º 8
0
def make_vec_env(env_id, num_envs, seed=1, no_timeout=False, env_kwargs=None):
    logger.info(f'Creating {num_envs} environments.')
    if env_kwargs is None:
        env_kwargs = {}

    def make_env(env_id, rank, seed, no_timeout, env_kwargs):
        def _thunk():
            from gym import envs
            #print(envs.registry.all())

            try:
                env = gym.make(env_id, **env_kwargs)
            except Exception:
                if env_id == "CheetahMPCEnv-v0":
                    env = CheetahMPCEnv(**env_kwargs)
                elif env_id == "CheetahRSSPMTGEnv-v0":
                    env = CheetahRSSPMTGEnv - v0(**env_kwargs)
                elif env_id == "CheetahRSSFlatEnv-v0":
                    env = CheetahRSSFlatEnv - v0(**env_kwargs)
                else:
                    print(f"ENVIRONMENT {env_id} NOT REGISTERED")
                    raise Exception
            if no_timeout:
                env = NoTimeOutEnv(env)
            env.seed(seed + rank)
            return env

        return _thunk

    envs = [
        make_env(env_id, idx, seed, no_timeout, env_kwargs)
        for idx in range(num_envs)
    ]
    if num_envs > 1:
        envs = ShmemVecEnv(envs, context='spawn')
    else:
        envs = DummyVecEnv(envs)
    return envs