예제 #1
0
 def __call__(self, config, seed, device_str):
     # Set random seeds
     set_global_seeds(seed)
     # Create device
     device = torch.device(device_str)
     # Use log dir for current job (run_experiment)
     logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
     
     # Create dataset for training and testing
     train_dataset = datasets.MNIST('data/', 
                                    train=True, 
                                    download=True, 
                                    transform=transforms.ToTensor())
     test_dataset = datasets.MNIST('data/', 
                                   train=False, 
                                   transform=transforms.ToTensor())
     # Define GPU-dependent keywords for DataLoader
     if config['cuda']:
         kwargs = {'num_workers': 1, 'pin_memory': True}
     else:
         kwargs = {}
     # Create data loader for training and testing
     train_loader = DataLoader(train_dataset, 
                               batch_size=config['train.batch_size'], 
                               shuffle=True, 
                               **kwargs)
     test_loader = DataLoader(test_dataset, 
                              batch_size=config['eval.batch_size'], 
                              shuffle=True, 
                              **kwargs)
     
     # Create the model
     if config['network.type'] == 'VAE':
         model = VAE(config=config)
     elif config['network.type'] == 'ConvVAE':
         model = ConvVAE(config=config)
     model = model.to(device)
     
     # Create optimizer
     optimizer = optim.Adam(model.parameters(), lr=1e-3)
     
     # Create engine
     engine = Engine(agent=model,
                     runner=None,
                     config=config,
                     device=device,
                     optimizer=optimizer, 
                     train_loader=train_loader, 
                     test_loader=test_loader)
     
     # Training and evaluation
     for epoch in range(config['train.num_epoch']):
         train_output = engine.train(n=epoch)
         engine.log_train(train_output, logdir=logdir, epoch=epoch)
         
         eval_output = engine.eval(n=epoch)
         engine.log_eval(eval_output, logdir=logdir, epoch=epoch)
 
     return None
예제 #2
0
파일: algo.py 프로젝트: cclauss/lagom
    def __call__(self, config):
        # Set random seeds: PyTorch, numpy.random, random
        set_global_seeds(seed=config['seed'])
        
        # Create environment and seed it
        env = make_env(seed=config['seed'], 
                       monitor=False, 
                       monitor_dir=None)
        # Create environment specification
        env_spec = EnvSpec(env)  # TODO: integrate within make_env globally
        
        # Create device
        device = torch.device('cuda' if config['cuda'] else 'cpu')
        
        # Create logger
        logger = Logger(name='logger')
        
        # Create policy
        network = MLP(config=config)
        policy = CategoricalPolicy(network=network, env_spec=env_spec)
        policy.network = policy.network.to(device)

        # Create optimizer
        optimizer = optim.Adam(policy.network.parameters(), lr=config['lr'])
        # Learning rate scheduler
        max_epoch = config['train_iter']  # Max number of lr decay, Note where lr_scheduler put
        lambda_f = lambda epoch: 1 - epoch/max_epoch  # decay learning rate for each training epoch
        lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f)
        
        # Create agent
        agent_class = ActorCriticAgent#REINFORCEAgent
        agent = agent_class(policy=policy, 
                            optimizer=optimizer, 
                            config=config, 
                            lr_scheduler=lr_scheduler, 
                            device=device)
        
        # Create runner
        runner = Runner(agent=agent, 
                        env=env, 
                        gamma=config['gamma'])
        
        # Create engine
        engine = Engine(agent=agent, 
                        runner=runner, 
                        config=config, 
                        logger=logger)
        
        # Training
        train_output = engine.train()
        np.save('logs/returns_ActorCritic', train_output)
        
        return None
예제 #3
0
 def __call__(self, config, seed, device_str):
     # Set random seeds
     set_global_seeds(seed)
     # Use log dir for current job (run_experiment)
     logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
     
     # train
     es = ESMaster(num_iteration=config['train.num_iteration'], 
                   worker_class=ESWorker, 
                   init_seed=seed, 
                   daemonic_worker=None, 
                   config=config, 
                   logdir=logdir)
     es()
     
     return None
예제 #4
0
    def __call__(self, config):
        # Set random seeds: PyTorch, numpy.random, random
        set_global_seeds(seed=config['seed'])

        # Make a list of make_env functions
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id=config['env:id'],
                                  num_env=config['train:N'],
                                  init_seed=config['seed'] * 2)
        # Create vectorized environment
        env = SerialVecEnv(list_make_env=list_make_env)
        # Create environment specification
        env_spec = EnvSpec(env)

        # Create device
        device = torch.device(
            f'cuda:{config["cuda_id"]}' if config['cuda'] else 'cpu')

        # Create policy
        network = MLP(config=config).to(device)
        policy = CategoricalPolicy(network=network, env_spec=env_spec)

        # Create optimizer
        optimizer = optim.Adam(policy.network.parameters(),
                               lr=config['algo:lr'])
        # Create learning rate scheduler
        if config['algo:use_lr_scheduler']:
            max_epoch = config[
                'train:iter']  # Max number of lr decay, Note where lr_scheduler put
            lambda_f = lambda epoch: 1 - epoch / max_epoch  # decay learning rate for each training epoch
            lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                       lr_lambda=lambda_f)

        # Create agent
        kwargs = {'device': device}
        if config['algo:use_lr_scheduler']:
            kwargs['lr_scheduler'] = lr_scheduler
        agent = A2CAgent(policy=policy,
                         optimizer=optimizer,
                         config=config,
                         **kwargs)

        # Create runner
        runner = SegmentRunner(agent=agent,
                               env=env,
                               gamma=config['algo:gamma'])

        # Create engine
        engine = Engine(agent=agent, runner=runner, config=config, logger=None)

        # Training and evaluation
        train_logs = []
        eval_logs = []
        for i in range(config['train:iter']):
            train_output = engine.train(i)

            # Logging and evaluation
            if i == 0 or (i + 1) % config['log:interval'] == 0:
                # Log training and record the loggings
                train_logger = engine.log_train(train_output)
                train_logs.append(train_logger.logs)
                # Log evaluation and record the loggings
                eval_output = engine.eval(i)
                eval_logger = engine.log_eval(eval_output)
                eval_logs.append(eval_logger.logs)

        # Save the loggings
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'train', train_logs)
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'eval', eval_logs)

        return None
예제 #5
0
파일: algo.py 프로젝트: wolegechu/lagom
    def __call__(self, config, seed, device_str):
        # Set random seeds
        set_global_seeds(seed)
        # Create device
        device = torch.device(device_str)
        # Use log dir for current job (run_experiment)
        logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

        # Make environment (VecEnv) for training and evaluating
        env = make_vec_env(
            vec_env_class=SerialVecEnv,
            make_env=make_gym_env,
            env_id=config['env.id'],
            num_env=config['train.N'],  # batch size for multiple environments
            init_seed=seed)
        eval_env = make_vec_env(vec_env_class=SerialVecEnv,
                                make_env=make_gym_env,
                                env_id=config['env.id'],
                                num_env=1,
                                init_seed=seed)
        if config[
                'env.standardize']:  # wrap with VecStandardize for running averages of observation and rewards
            env = VecStandardize(venv=env,
                                 use_obs=True,
                                 use_reward=True,
                                 clip_obs=10.,
                                 clip_reward=10.,
                                 gamma=0.99,
                                 eps=1e-8)
            eval_env = VecStandardize(
                venv=
                eval_env,  # remember to synchronize running averages during evaluation !!!
                use_obs=True,
                use_reward=False,  # do not process rewards, no training
                clip_obs=env.clip_obs,
                clip_reward=env.clip_reward,
                gamma=env.gamma,
                eps=env.eps,
                constant_obs_mean=env.obs_runningavg.
                mu,  # use current running average as constant
                constant_obs_std=env.obs_runningavg.sigma)
        env_spec = EnvSpec(env)

        # Create policy
        network = Network(config=config, env_spec=env_spec)
        if env_spec.control_type == 'Discrete':
            policy = CategoricalPolicy(config=config,
                                       network=network,
                                       env_spec=env_spec,
                                       learn_V=True)
        elif env_spec.control_type == 'Continuous':
            policy = GaussianPolicy(
                config=config,
                network=network,
                env_spec=env_spec,
                learn_V=True,
                min_std=config['agent.min_std'],
                std_style=config['agent.std_style'],
                constant_std=config['agent.constant_std'],
                std_state_dependent=config['agent.std_state_dependent'],
                init_std=config['agent.init_std'])
        network = network.to(device)

        # Create optimizer and learning rate scheduler
        optimizer = optim.Adam(policy.network.parameters(),
                               lr=config['algo.lr'])
        if config['algo.use_lr_scheduler']:
            if 'train.iter' in config:  # iteration-based training
                max_epoch = config['train.iter']
            elif 'train.timestep' in config:  # timestep-based training
                max_epoch = config[
                    'train.timestep'] + 1  # +1 to avoid 0.0 lr in final iteration
            lambda_f = lambda epoch: 1 - epoch / max_epoch  # decay learning rate for each training epoch
            lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                       lr_lambda=lambda_f)

        # Create agent
        kwargs = {'device': device}
        if config['algo.use_lr_scheduler']:
            kwargs['lr_scheduler'] = lr_scheduler
        agent = A2CAgent(config=config,
                         policy=policy,
                         optimizer=optimizer,
                         **kwargs)

        # Create runner
        runner = SegmentRunner(agent=agent,
                               env=env,
                               gamma=config['algo.gamma'])
        eval_runner = TrajectoryRunner(agent=agent, env=eval_env, gamma=1.0)

        # Create engine
        engine = Engine(agent=agent,
                        runner=runner,
                        config=config,
                        eval_runner=eval_runner)

        # Training and evaluation
        train_logs = []
        eval_logs = []

        for i in count():  # incremental iteration
            if 'train.iter' in config and i >= config[
                    'train.iter']:  # enough iterations
                break
            elif 'train.timestep' in config and agent.total_T >= config[
                    'train.timestep']:  # enough timesteps
                break

            # train and evaluation
            train_output = engine.train(n=i)

            # logging
            if i == 0 or (i + 1) % config['log.record_interval'] == 0 or (
                    i + 1) % config['log.print_interval'] == 0:
                train_log = engine.log_train(train_output)

                with torch.no_grad():  # disable grad, save memory
                    eval_output = engine.eval(n=i)
                eval_log = engine.log_eval(eval_output)

                if i == 0 or (i + 1) % config[
                        'log.record_interval'] == 0:  # record loggings
                    train_logs.append(train_log)
                    eval_logs.append(eval_log)

        # Save all loggings
        pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
        pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')

        return None
예제 #6
0
    def __call__(self, config):
        # Set random seeds
        set_global_seeds(config['seed'])

        # Create device
        device = torch.device('cuda' if config['cuda'] else 'cpu')

        # Define GPU-dependent keywords for DataLoader
        if config['cuda']:
            kwargs = {'num_workers': 1, 'pin_memory': True}
        else:
            kwargs = {}

        # Create dataset for training and testing
        train_dataset = datasets.MNIST('data/',
                                       train=True,
                                       download=True,
                                       transform=transforms.ToTensor())
        test_dataset = datasets.MNIST('data/',
                                      train=False,
                                      transform=transforms.ToTensor())
        # Create data loader for training and testing
        train_loader = DataLoader(train_dataset,
                                  batch_size=config['batch_size'],
                                  shuffle=True,
                                  **kwargs)
        test_loader = DataLoader(test_dataset,
                                 batch_size=config['batch_size'],
                                 shuffle=True,
                                 **kwargs)

        # Create the model
        if config['use_ConvVAE']:
            model = ConvVAE(config=None)
        else:
            model = VAE(config=None)
        model = model.to(device)

        # Create optimizer
        optimizer = optim.Adam(model.parameters(), lr=1e-3)

        # Create logger
        logger = Logger(name='logger')

        # Create engine
        engine = Engine(model=model,
                        optimizer=optimizer,
                        train_loader=train_loader,
                        test_loader=test_loader,
                        config=config,
                        logger=logger,
                        device=device)

        # Trainning and testing
        for epoch in range(config['num_epochs']):
            print('#' * 20)
            print(f'# Epoch: {epoch+1}')
            print('#' * 20)
            engine.train()
            engine.eval()

            # Sample image from standard Gaussian noise as input to decoder
            with torch.no_grad():
                sample = torch.randn(64, 8).to(device)
                sample = model.decoder_forward(sample).cpu()

                save_image(sample.view(64, 1, 28, 28),
                           f'data/sample_{epoch}.png')

        # Save the logger
        # logger.save(name=f'{self.name}_ID_{config["ID"]}')

        return None
예제 #7
0
 def __call__(self, config, seed, device_str):
     set_global_seeds(seed)
     device = torch.device(device_str)
     logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
     
     # Environment related
     env = make_vec_env(vec_env_class=SerialVecEnv, 
                        make_env=make_gym_env, 
                        env_id=config['env.id'], 
                        num_env=config['train.N'],  # batched environment
                        init_seed=seed, 
                        rolling=True)
     eval_env = make_vec_env(vec_env_class=SerialVecEnv, 
                             make_env=make_gym_env, 
                             env_id=config['env.id'], 
                             num_env=config['eval.N'], 
                             init_seed=seed, 
                             rolling=False)
     if config['env.standardize']:  # running averages of observation and reward
         env = VecStandardize(venv=env, 
                              use_obs=True, 
                              use_reward=False,  # A2C
                              clip_obs=10., 
                              clip_reward=10., 
                              gamma=0.99, 
                              eps=1e-8)
         eval_env = VecStandardize(venv=eval_env,  # remember to synchronize running averages during evaluation !!!
                                   use_obs=True, 
                                   use_reward=False,  # do not process rewards, no training
                                   clip_obs=env.clip_obs, 
                                   clip_reward=env.clip_reward, 
                                   gamma=env.gamma, 
                                   eps=env.eps, 
                                   constant_obs_mean=env.obs_runningavg.mu,  # use current running average as constant
                                   constant_obs_std=env.obs_runningavg.sigma)
     env_spec = EnvSpec(env)
     
     # Network and policy
     if config['network.recurrent']:
         network = LSTM(config=config, device=device, env_spec=env_spec)
     else:
         network = Network(config=config, device=device, env_spec=env_spec)
     if env_spec.control_type == 'Discrete':
         policy = CategoricalPolicy(config=config, 
                                    network=network, 
                                    env_spec=env_spec, 
                                    device=device,
                                    learn_V=True)
     elif env_spec.control_type == 'Continuous':
         policy = GaussianPolicy(config=config, 
                                 network=network, 
                                 env_spec=env_spec, 
                                 device=device,
                                 learn_V=True,
                                 min_std=config['agent.min_std'], 
                                 std_style=config['agent.std_style'], 
                                 constant_std=config['agent.constant_std'],
                                 std_state_dependent=config['agent.std_state_dependent'],
                                 init_std=config['agent.init_std'])
     
     # Optimizer and learning rate scheduler
     optimizer = optim.Adam(policy.network.parameters(), lr=config['algo.lr'])
     if config['algo.use_lr_scheduler']:
         if 'train.iter' in config:  # iteration-based
             max_epoch = config['train.iter']
         elif 'train.timestep' in config:  # timestep-based
             max_epoch = config['train.timestep'] + 1  # avoid zero lr in final iteration
         lambda_f = lambda epoch: 1 - epoch/max_epoch
         lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f)
     
     # Agent
     kwargs = {'device': device}
     if config['algo.use_lr_scheduler']:
         kwargs['lr_scheduler'] = lr_scheduler
     agent = A2CAgent(config=config, 
                      policy=policy, 
                      optimizer=optimizer, 
                      **kwargs)
     
     # Runner
     runner = SegmentRunner(agent=agent, 
                            env=env, 
                            gamma=config['algo.gamma'])
     eval_runner = TrajectoryRunner(agent=agent, 
                                    env=eval_env, 
                                    gamma=1.0)
     
     # Engine
     engine = Engine(agent=agent, 
                     runner=runner, 
                     config=config, 
                     eval_runner=eval_runner)
     
     # Training and evaluation
     train_logs = []
     eval_logs = []
     
     if config['network.recurrent']:
         rnn_states_buffer = agent.policy.rnn_states  # for SegmentRunner
     
     for i in count():
         if 'train.iter' in config and i >= config['train.iter']:  # enough iterations
             break
         elif 'train.timestep' in config and agent.total_T >= config['train.timestep']:  # enough timesteps
             break
         
         if config['network.recurrent']:
             if isinstance(rnn_states_buffer, list):  # LSTM: [h, c]
                 rnn_states_buffer = [buf.detach() for buf in rnn_states_buffer]
             else:
                 rnn_states_buffer = rnn_states_buffer.detach()
             agent.policy.rnn_states = rnn_states_buffer
             
         train_output = engine.train(n=i)
         
         # Logging
         if i == 0 or (i+1) % config['log.record_interval'] == 0 or (i+1) % config['log.print_interval'] == 0:
             train_log = engine.log_train(train_output)
             
             if config['network.recurrent']:
                 rnn_states_buffer = agent.policy.rnn_states  # for SegmentRunner
                 
             with torch.no_grad():  # disable grad, save memory
                 eval_output = engine.eval(n=i)
             eval_log = engine.log_eval(eval_output)
             
             if i == 0 or (i+1) % config['log.record_interval'] == 0:
                 train_logs.append(train_log)
                 eval_logs.append(eval_log)
     
     # Save all loggings
     pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
     pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
     
     return None