def __call__(self, config, seed, device_str): # Set random seeds set_global_seeds(seed) # Create device device = torch.device(device_str) # Use log dir for current job (run_experiment) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) # Create dataset for training and testing train_dataset = datasets.MNIST('data/', train=True, download=True, transform=transforms.ToTensor()) test_dataset = datasets.MNIST('data/', train=False, transform=transforms.ToTensor()) # Define GPU-dependent keywords for DataLoader if config['cuda']: kwargs = {'num_workers': 1, 'pin_memory': True} else: kwargs = {} # Create data loader for training and testing train_loader = DataLoader(train_dataset, batch_size=config['train.batch_size'], shuffle=True, **kwargs) test_loader = DataLoader(test_dataset, batch_size=config['eval.batch_size'], shuffle=True, **kwargs) # Create the model if config['network.type'] == 'VAE': model = VAE(config=config) elif config['network.type'] == 'ConvVAE': model = ConvVAE(config=config) model = model.to(device) # Create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) # Create engine engine = Engine(agent=model, runner=None, config=config, device=device, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader) # Training and evaluation for epoch in range(config['train.num_epoch']): train_output = engine.train(n=epoch) engine.log_train(train_output, logdir=logdir, epoch=epoch) eval_output = engine.eval(n=epoch) engine.log_eval(eval_output, logdir=logdir, epoch=epoch) return None
def __call__(self, config): # Set random seeds: PyTorch, numpy.random, random set_global_seeds(seed=config['seed']) # Create environment and seed it env = make_env(seed=config['seed'], monitor=False, monitor_dir=None) # Create environment specification env_spec = EnvSpec(env) # TODO: integrate within make_env globally # Create device device = torch.device('cuda' if config['cuda'] else 'cpu') # Create logger logger = Logger(name='logger') # Create policy network = MLP(config=config) policy = CategoricalPolicy(network=network, env_spec=env_spec) policy.network = policy.network.to(device) # Create optimizer optimizer = optim.Adam(policy.network.parameters(), lr=config['lr']) # Learning rate scheduler max_epoch = config['train_iter'] # Max number of lr decay, Note where lr_scheduler put lambda_f = lambda epoch: 1 - epoch/max_epoch # decay learning rate for each training epoch lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f) # Create agent agent_class = ActorCriticAgent#REINFORCEAgent agent = agent_class(policy=policy, optimizer=optimizer, config=config, lr_scheduler=lr_scheduler, device=device) # Create runner runner = Runner(agent=agent, env=env, gamma=config['gamma']) # Create engine engine = Engine(agent=agent, runner=runner, config=config, logger=logger) # Training train_output = engine.train() np.save('logs/returns_ActorCritic', train_output) return None
def __call__(self, config, seed, device_str): # Set random seeds set_global_seeds(seed) # Use log dir for current job (run_experiment) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) # train es = ESMaster(num_iteration=config['train.num_iteration'], worker_class=ESWorker, init_seed=seed, daemonic_worker=None, config=config, logdir=logdir) es() return None
def __call__(self, config): # Set random seeds: PyTorch, numpy.random, random set_global_seeds(seed=config['seed']) # Make a list of make_env functions list_make_env = make_envs(make_env=make_gym_env, env_id=config['env:id'], num_env=config['train:N'], init_seed=config['seed'] * 2) # Create vectorized environment env = SerialVecEnv(list_make_env=list_make_env) # Create environment specification env_spec = EnvSpec(env) # Create device device = torch.device( f'cuda:{config["cuda_id"]}' if config['cuda'] else 'cpu') # Create policy network = MLP(config=config).to(device) policy = CategoricalPolicy(network=network, env_spec=env_spec) # Create optimizer optimizer = optim.Adam(policy.network.parameters(), lr=config['algo:lr']) # Create learning rate scheduler if config['algo:use_lr_scheduler']: max_epoch = config[ 'train:iter'] # Max number of lr decay, Note where lr_scheduler put lambda_f = lambda epoch: 1 - epoch / max_epoch # decay learning rate for each training epoch lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f) # Create agent kwargs = {'device': device} if config['algo:use_lr_scheduler']: kwargs['lr_scheduler'] = lr_scheduler agent = A2CAgent(policy=policy, optimizer=optimizer, config=config, **kwargs) # Create runner runner = SegmentRunner(agent=agent, env=env, gamma=config['algo:gamma']) # Create engine engine = Engine(agent=agent, runner=runner, config=config, logger=None) # Training and evaluation train_logs = [] eval_logs = [] for i in range(config['train:iter']): train_output = engine.train(i) # Logging and evaluation if i == 0 or (i + 1) % config['log:interval'] == 0: # Log training and record the loggings train_logger = engine.log_train(train_output) train_logs.append(train_logger.logs) # Log evaluation and record the loggings eval_output = engine.eval(i) eval_logger = engine.log_eval(eval_output) eval_logs.append(eval_logger.logs) # Save the loggings np.save( Path(config['log:dir']) / str(config['ID']) / 'train', train_logs) np.save( Path(config['log:dir']) / str(config['ID']) / 'eval', eval_logs) return None
def __call__(self, config, seed, device_str): # Set random seeds set_global_seeds(seed) # Create device device = torch.device(device_str) # Use log dir for current job (run_experiment) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) # Make environment (VecEnv) for training and evaluating env = make_vec_env( vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id=config['env.id'], num_env=config['train.N'], # batch size for multiple environments init_seed=seed) eval_env = make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id=config['env.id'], num_env=1, init_seed=seed) if config[ 'env.standardize']: # wrap with VecStandardize for running averages of observation and rewards env = VecStandardize(venv=env, use_obs=True, use_reward=True, clip_obs=10., clip_reward=10., gamma=0.99, eps=1e-8) eval_env = VecStandardize( venv= eval_env, # remember to synchronize running averages during evaluation !!! use_obs=True, use_reward=False, # do not process rewards, no training clip_obs=env.clip_obs, clip_reward=env.clip_reward, gamma=env.gamma, eps=env.eps, constant_obs_mean=env.obs_runningavg. mu, # use current running average as constant constant_obs_std=env.obs_runningavg.sigma) env_spec = EnvSpec(env) # Create policy network = Network(config=config, env_spec=env_spec) if env_spec.control_type == 'Discrete': policy = CategoricalPolicy(config=config, network=network, env_spec=env_spec, learn_V=True) elif env_spec.control_type == 'Continuous': policy = GaussianPolicy( config=config, network=network, env_spec=env_spec, learn_V=True, min_std=config['agent.min_std'], std_style=config['agent.std_style'], constant_std=config['agent.constant_std'], std_state_dependent=config['agent.std_state_dependent'], init_std=config['agent.init_std']) network = network.to(device) # Create optimizer and learning rate scheduler optimizer = optim.Adam(policy.network.parameters(), lr=config['algo.lr']) if config['algo.use_lr_scheduler']: if 'train.iter' in config: # iteration-based training max_epoch = config['train.iter'] elif 'train.timestep' in config: # timestep-based training max_epoch = config[ 'train.timestep'] + 1 # +1 to avoid 0.0 lr in final iteration lambda_f = lambda epoch: 1 - epoch / max_epoch # decay learning rate for each training epoch lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f) # Create agent kwargs = {'device': device} if config['algo.use_lr_scheduler']: kwargs['lr_scheduler'] = lr_scheduler agent = A2CAgent(config=config, policy=policy, optimizer=optimizer, **kwargs) # Create runner runner = SegmentRunner(agent=agent, env=env, gamma=config['algo.gamma']) eval_runner = TrajectoryRunner(agent=agent, env=eval_env, gamma=1.0) # Create engine engine = Engine(agent=agent, runner=runner, config=config, eval_runner=eval_runner) # Training and evaluation train_logs = [] eval_logs = [] for i in count(): # incremental iteration if 'train.iter' in config and i >= config[ 'train.iter']: # enough iterations break elif 'train.timestep' in config and agent.total_T >= config[ 'train.timestep']: # enough timesteps break # train and evaluation train_output = engine.train(n=i) # logging if i == 0 or (i + 1) % config['log.record_interval'] == 0 or ( i + 1) % config['log.print_interval'] == 0: train_log = engine.log_train(train_output) with torch.no_grad(): # disable grad, save memory eval_output = engine.eval(n=i) eval_log = engine.log_eval(eval_output) if i == 0 or (i + 1) % config[ 'log.record_interval'] == 0: # record loggings train_logs.append(train_log) eval_logs.append(eval_log) # Save all loggings pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None
def __call__(self, config): # Set random seeds set_global_seeds(config['seed']) # Create device device = torch.device('cuda' if config['cuda'] else 'cpu') # Define GPU-dependent keywords for DataLoader if config['cuda']: kwargs = {'num_workers': 1, 'pin_memory': True} else: kwargs = {} # Create dataset for training and testing train_dataset = datasets.MNIST('data/', train=True, download=True, transform=transforms.ToTensor()) test_dataset = datasets.MNIST('data/', train=False, transform=transforms.ToTensor()) # Create data loader for training and testing train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, **kwargs) test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=True, **kwargs) # Create the model if config['use_ConvVAE']: model = ConvVAE(config=None) else: model = VAE(config=None) model = model.to(device) # Create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) # Create logger logger = Logger(name='logger') # Create engine engine = Engine(model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, config=config, logger=logger, device=device) # Trainning and testing for epoch in range(config['num_epochs']): print('#' * 20) print(f'# Epoch: {epoch+1}') print('#' * 20) engine.train() engine.eval() # Sample image from standard Gaussian noise as input to decoder with torch.no_grad(): sample = torch.randn(64, 8).to(device) sample = model.decoder_forward(sample).cpu() save_image(sample.view(64, 1, 28, 28), f'data/sample_{epoch}.png') # Save the logger # logger.save(name=f'{self.name}_ID_{config["ID"]}') return None
def __call__(self, config, seed, device_str): set_global_seeds(seed) device = torch.device(device_str) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) # Environment related env = make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id=config['env.id'], num_env=config['train.N'], # batched environment init_seed=seed, rolling=True) eval_env = make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id=config['env.id'], num_env=config['eval.N'], init_seed=seed, rolling=False) if config['env.standardize']: # running averages of observation and reward env = VecStandardize(venv=env, use_obs=True, use_reward=False, # A2C clip_obs=10., clip_reward=10., gamma=0.99, eps=1e-8) eval_env = VecStandardize(venv=eval_env, # remember to synchronize running averages during evaluation !!! use_obs=True, use_reward=False, # do not process rewards, no training clip_obs=env.clip_obs, clip_reward=env.clip_reward, gamma=env.gamma, eps=env.eps, constant_obs_mean=env.obs_runningavg.mu, # use current running average as constant constant_obs_std=env.obs_runningavg.sigma) env_spec = EnvSpec(env) # Network and policy if config['network.recurrent']: network = LSTM(config=config, device=device, env_spec=env_spec) else: network = Network(config=config, device=device, env_spec=env_spec) if env_spec.control_type == 'Discrete': policy = CategoricalPolicy(config=config, network=network, env_spec=env_spec, device=device, learn_V=True) elif env_spec.control_type == 'Continuous': policy = GaussianPolicy(config=config, network=network, env_spec=env_spec, device=device, learn_V=True, min_std=config['agent.min_std'], std_style=config['agent.std_style'], constant_std=config['agent.constant_std'], std_state_dependent=config['agent.std_state_dependent'], init_std=config['agent.init_std']) # Optimizer and learning rate scheduler optimizer = optim.Adam(policy.network.parameters(), lr=config['algo.lr']) if config['algo.use_lr_scheduler']: if 'train.iter' in config: # iteration-based max_epoch = config['train.iter'] elif 'train.timestep' in config: # timestep-based max_epoch = config['train.timestep'] + 1 # avoid zero lr in final iteration lambda_f = lambda epoch: 1 - epoch/max_epoch lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f) # Agent kwargs = {'device': device} if config['algo.use_lr_scheduler']: kwargs['lr_scheduler'] = lr_scheduler agent = A2CAgent(config=config, policy=policy, optimizer=optimizer, **kwargs) # Runner runner = SegmentRunner(agent=agent, env=env, gamma=config['algo.gamma']) eval_runner = TrajectoryRunner(agent=agent, env=eval_env, gamma=1.0) # Engine engine = Engine(agent=agent, runner=runner, config=config, eval_runner=eval_runner) # Training and evaluation train_logs = [] eval_logs = [] if config['network.recurrent']: rnn_states_buffer = agent.policy.rnn_states # for SegmentRunner for i in count(): if 'train.iter' in config and i >= config['train.iter']: # enough iterations break elif 'train.timestep' in config and agent.total_T >= config['train.timestep']: # enough timesteps break if config['network.recurrent']: if isinstance(rnn_states_buffer, list): # LSTM: [h, c] rnn_states_buffer = [buf.detach() for buf in rnn_states_buffer] else: rnn_states_buffer = rnn_states_buffer.detach() agent.policy.rnn_states = rnn_states_buffer train_output = engine.train(n=i) # Logging if i == 0 or (i+1) % config['log.record_interval'] == 0 or (i+1) % config['log.print_interval'] == 0: train_log = engine.log_train(train_output) if config['network.recurrent']: rnn_states_buffer = agent.policy.rnn_states # for SegmentRunner with torch.no_grad(): # disable grad, save memory eval_output = engine.eval(n=i) eval_log = engine.log_eval(eval_output) if i == 0 or (i+1) % config['log.record_interval'] == 0: train_logs.append(train_log) eval_logs.append(eval_log) # Save all loggings pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl') return None