Esempio n. 1
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    train_loader, test_loader = make_dataset(config)
    if config['nn.type'] == 'VAE':
        model = VAE(config, device)
    elif config['nn.type'] == 'ConvVAE':
        model = ConvVAE(config, device)
    optimizer = optim.Adam(model.parameters(), lr=config['lr'])
    
    engine = Engine(config, 
                    model=model, 
                    optimizer=optimizer,
                    train_loader=train_loader, 
                    test_loader=test_loader)
    
    train_logs = []
    eval_logs = []
    for epoch in range(config['train.num_epoch']):
        train_logger = engine.train(epoch, logdir=logdir)
        train_logs.append(train_logger.logs)
        eval_logger = engine.eval(epoch, logdir=logdir)
        eval_logs.append(eval_logger.logs)
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
    return None
Esempio n. 2
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    
    print('Initializing...')
    agent = Agent(config, make_env(config, seed), device)
    es = CMAES([config['train.mu0']]*agent.num_params, config['train.std0'], 
               {'popsize': config['train.popsize'], 
                'seed': seed})
    train_logs = []
    checkpoint_count = 0
    with ProcessPoolExecutor(max_workers=config['train.popsize'], initializer=initializer, initargs=(config, seed, device)) as executor:
        print('Finish initialization. Training starts...')
        for generation in range(config['train.generations']):
            start_time = time.perf_counter()
            solutions = es.ask()
            out = list(executor.map(fitness, solutions, chunksize=2))
            Rs, Hs = zip(*out)
            es.tell(solutions, [-R for R in Rs])
            logger = Logger()
            logger('generation', generation+1)
            logger('num_seconds', round(time.perf_counter() - start_time, 1))
            logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('fbest', es.result.fbest)
            train_logs.append(logger.logs)
            if generation == 0 or (generation+1)%config['log.freq'] == 0:
                logger.dump(keys=None, index=0, indent=0, border='-'*50)
            if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
                agent.from_vec(tensorify(es.result.xbest, 'cpu'))
                agent.checkpoint(logdir, generation+1)
                checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    return None
Esempio n. 3
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    env = make_env(config, seed, 'train')
    eval_env = make_env(config, seed, 'eval')
    random_agent = RandomAgent(config, env, device)
    if config['agent.use_td3']:
        agent = TD3Agent(config, env, device)
    else:
        agent = DDPGAgent(config, env, device)
    runner = EpisodeRunner()
    replay = ReplayBuffer(env, config['replay.capacity'], device)
    engine = Engine(config,
                    agent=agent,
                    random_agent=random_agent,
                    env=env,
                    eval_env=eval_env,
                    runner=runner,
                    replay=replay,
                    logdir=logdir)

    train_logs, eval_logs = engine.train()
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')
    return None
Esempio n. 4
0
def run(config, seed, device):
    set_global_seeds(seed)
    logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

    env = make_env(config, seed)
    env = VecMonitor(env)
    if config['env.standardize_obs']:
        env = VecStandardizeObservation(env, clip=5.)
    if config['env.standardize_reward']:
        env = VecStandardizeReward(env, clip=10., gamma=config['agent.gamma'])

    agent = Agent(config, env, device)
    runner = EpisodeRunner(reset_on_call=False)
    engine = Engine(config, agent=agent, env=env, runner=runner)
    train_logs = []
    for i in count():
        if agent.total_timestep >= config['train.timestep']:
            break
        train_logger = engine.train(i)
        train_logs.append(train_logger.logs)
        if i == 0 or (i + 1) % config['log.freq'] == 0:
            train_logger.dump(keys=None, index=0, indent=0, border='-' * 50)
        if i == 0 or (i + 1) % config['checkpoint.freq'] == 0:
            agent.checkpoint(logdir, i + 1)
    agent.checkpoint(logdir, i + 1)
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    return None
Esempio n. 5
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    queue = mp.Queue(maxsize=100)
    env = make_env(config, seed, 'train')
    agent = Agent(config, env, device)
    agent.share_memory()
    runner = EpisodeRunner(reset_on_call=False)
    engine = Engine(config, agent=agent, env=env, runner=runner)

    learner_process = mp.Process(target=learner,
                                 args=(config, logdir, agent, engine, queue))
    actor_processes = [
        mp.Process(target=actor,
                   args=(config, seed, make_env, agent, runner, queue))
        for _ in range(config['agent.num_actors'])
    ]
    evaluator_process = mp.Process(target=evaluator,
                                   args=(config, logdir, seed, make_env,
                                         agent))

    learner_process.start()
    print('Learner started !')
    [p.start() for p in actor_processes]
    print('Actors started !')
    evaluator_process.start()
    print('Evaluator started !')
    evaluator_process.join()
    [p.join() for p in actor_processes]
    learner_process.join()
    return None
Esempio n. 6
0
File: algo.py Progetto: vin136/lagom
    def __call__(self, config, seed, device):
        set_global_seeds(seed)
        logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

        es = ESMaster(config, ESWorker, logdir=logdir)
        es()

        return None
Esempio n. 7
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    torch.set_num_threads(1)  # VERY IMPORTANT TO AVOID GETTING STUCK

    print('Initializing...')
    agent = Agent(config, make_env(config, seed, 'eval'), device)
    es = OpenAIES(
        [config['train.mu0']] * agent.num_params, config['train.std0'], {
            'popsize': config['train.popsize'],
            'seed': seed,
            'sigma_scheduler_args': config['train.sigma_scheduler_args'],
            'lr': config['train.lr'],
            'lr_decay': config['train.lr_decay'],
            'min_lr': config['train.min_lr'],
            'antithetic': config['train.antithetic'],
            'rank_transform': config['train.rank_transform']
        })
    train_logs = []
    checkpoint_count = 0
    with Pool(processes=config['train.popsize'] //
              config['train.worker_chunksize']) as pool:
        print('Finish initialization. Training starts...')
        for generation in range(config['train.generations']):
            t0 = time.perf_counter()
            solutions = es.ask()
            data = [(config, seed, device, solution) for solution in solutions]
            out = pool.map(CloudpickleWrapper(fitness),
                           data,
                           chunksize=config['train.worker_chunksize'])
            Rs, Hs = zip(*out)
            es.tell(solutions, [-R for R in Rs])
            logger = Logger()
            logger('generation', generation + 1)
            logger('num_seconds', round(time.perf_counter() - t0, 1))
            logger('Returns',
                   describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('Horizons',
                   describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('fbest', es.result.fbest)
            train_logs.append(logger.logs)
            if generation == 0 or (generation + 1) % config['log.freq'] == 0:
                logger.dump(keys=None, index=0, indent=0, border='-' * 50)
            if (generation + 1) >= int(config['train.generations'] *
                                       (checkpoint_count /
                                        (config['checkpoint.num'] - 1))):
                agent.from_vec(tensorify(es.result.xbest, 'cpu'))
                agent.checkpoint(logdir, generation + 1)
                checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    return None
Esempio n. 8
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    
    env = make_env(config, seed)
    env = VecMonitor(env)
    env = VecStepInfo(env)
    
    eval_env = make_env(config, seed)
    eval_env = VecMonitor(eval_env)
    
    agent = Agent(config, env, device)
    replay = ReplayBuffer(env, config['replay.capacity'], device)
    engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir)
    
    train_logs, eval_logs = engine.train()
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
    return None  
Esempio n. 9
0
File: algo.py Progetto: vin136/lagom
 def __call__(self, config, seed, device):
     set_global_seeds(seed)
     logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
     
     train_loader, test_loader = self.make_dataset(config)
     
     model = VAE(config=config, device=device)
     
     model.train_loader = train_loader
     model.test_loader = test_loader
     model.optimizer = optim.Adam(model.parameters(), lr=1e-3)
     
     engine = Engine(agent=model, runner=None, config=config)
     
     for epoch in range(config['train.num_epoch']):
         train_output = engine.train(n=epoch)
         engine.log_train(train_output, logdir=logdir, epoch=epoch)
         
         eval_output = engine.eval(n=epoch)
         engine.log_eval(eval_output, logdir=logdir, epoch=epoch)
 
     return None
Esempio n. 10
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    env = make_env(config, seed, 'train')
    agent = Agent(config, env, device)
    runner = StepRunner(reset_on_call=False)
    engine = Engine(config, agent=agent, env=env, runner=runner)
    train_logs = []
    checkpoint_count = 0
    for i in count():
        if agent.total_timestep >= config['train.timestep']:
            break
        train_logger = engine.train(i)
        train_logs.append(train_logger.logs)
        if i == 0 or (i + 1) % config['log.freq'] == 0:
            train_logger.dump(keys=None, index=0, indent=0, border='-' * 50)
        if agent.total_timestep >= int(config['train.timestep'] *
                                       (checkpoint_count /
                                        (config['checkpoint.num'] - 1))):
            agent.checkpoint(logdir, i + 1)
            checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    return None
Esempio n. 11
0
    def __call__(self, config, seed, device):
        set_global_seeds(seed)
        logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

        if config['env.time_aware_obs']:
            kwargs = {'extra_wrapper': [TimeAwareObservation]}
        else:
            kwargs = {}
        env = make_vec_env(SerialVecEnv,
                           make_gym_env,
                           config['env.id'],
                           config['train.N'],
                           seed,
                           monitor=True,
                           **kwargs)
        if config['eval.independent']:
            eval_env = make_vec_env(SerialVecEnv, make_gym_env,
                                    config['env.id'], config['eval.N'], seed)
        if config['env.clip_action']:
            env = VecClipAction(env)
            if config['eval.independent']:
                eval_env = VecClipAction(eval_env)
        if config[
                'env.standardize']:  # running averages of observation and reward
            env = VecStandardize(
                venv=env,
                use_obs=True,
                use_reward=False,  # A2C specific 
                clip_obs=10.,
                clip_reward=10.,
                gamma=0.99,
                eps=1e-8)
        env_spec = EnvSpec(env)

        agent = Agent(config, env_spec, device)

        runner = RollingSegmentRunner(config, agent, env)

        if config['eval.independent']:
            engine = Engine(agent, runner, config, eval_env=eval_env)
        else:
            engine = Engine(agent, runner, config)

        train_logs = []
        eval_logs = []
        for i in count():
            if 'train.iter' in config and i >= config[
                    'train.iter']:  # enough iterations
                break
            elif 'train.timestep' in config and agent.total_T >= config[
                    'train.timestep']:  # enough timesteps
                break

            train_output = engine.train(i)

            if i == 0 or (i + 1) % config['log.interval'] == 0:
                train_log = engine.log_train(train_output)
                train_logs.append(train_log)

                if config['eval.independent']:
                    with torch.no_grad():  # disable grad, save memory
                        eval_output = engine.eval(n=i)
                    eval_log = engine.log_eval(eval_output)
                    eval_logs.append(eval_log)

        pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
        pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')

        return None