class ESMaster(BaseESMaster): def _network_size(self): worker = ESWorker() tmp_agent = worker.init(seed=0, config=self.config) num_params = worker.network.num_params del worker, tmp_agent return num_params def make_es(self, config): if self.config['es.algo'] == 'CMAES': es = CMAES(mu0=[self.config['es.mu0']]*self._network_size(), std0=self.config['es.std0'], popsize=self.config['es.popsize']) elif self.config['es.algo'] == 'OpenAIES': es = OpenAIES(mu0=[self.config['es.mu0']]*self._network_size(), std0=self.config['es.std0'], popsize=self.config['es.popsize'], std_decay=0.999, min_std=0.01, lr=5e-2, lr_decay=0.99, min_lr=1e-3, antithetic=True, rank_transform=True) self.logger = Logger() return es def _process_es_result(self, result): best_f_val = result['best_f_val'] best_return = -best_f_val # negate to get back reward # logging self.logger.log('generation', self.generation) self.logger.log('best_return', best_return) if self.generation == 0 or (self.generation+1) % self.config['log.interval'] == 0: print('-'*50) self.logger.dump(keys=None, index=-1, indent=0) print('-'*50) # Save the loggings and final parameters if (self.generation+1) == self.num_iteration: pickle_dump(obj=self.logger.logs, f=self.logdir/'result', ext='.pkl') np.save(self.logdir/'trained_param', result['best_param'])
def train(self, n=None): self.agent.train() # set to training mode # Create a logger train_output = Logger() # Iterate over data batches for one epoch for i, (data, label) in enumerate(self.train_loader): # Put data to device data = data.to(self.device) # Zero-out gradient buffer self.optimizer.zero_grad() # Forward pass of data re_x, mu, logvar = self.agent(data) # Calculate loss out = self.agent.calculate_loss(re_x=re_x, x=data, mu=mu, logvar=logvar, loss_type='BCE') loss = out['loss'] # Backward pass to calcualte gradients loss.backward() # Take a gradient step self.optimizer.step() # Record train output train_output.log('epoch', n) train_output.log('iteration', i) train_output.log('train_loss', out['loss'].item()) # item() saves memory train_output.log('reconstruction_loss', out['re_loss'].item()) train_output.log('KL_loss', out['KL_loss'].item()) # Dump logging if i == 0 or (i + 1) % self.config['log.interval'] == 0: print('-' * 50) train_output.dump(keys=None, index=-1, indent=0) print('-' * 50) return train_output.logs
def eval(self, n=None): self.agent.eval() # set to evaluation mode # Create a logger eval_output = Logger() # Iterate over test batches for i, (data, label) in enumerate(self.test_loader): # Put data to device data = data.to(self.device) with torch.no_grad(): # fast, disable grad # Forward pass of data re_x, mu, logvar = self.agent(data) # Calculate loss out = self.agent.calculate_loss(re_x=re_x, x=data, mu=mu, logvar=logvar, loss_type='BCE') # Record eval output eval_output.log('eval_loss', out['loss'].item()) return eval_output.logs
def log_train(self, train_output): # Create training logger logger = Logger(name='train_logger') # Unpack training output for logging D = train_output['D'] out_agent = train_output['out_agent'] n = train_output['n'] # Loggings # Use item() for tensor to save memory logger.log(key='train_iteration', val=n + 1) # iteration starts from 1 if self.config['algo:use_lr_scheduler']: logger.log(key='current_lr', val=out_agent['current_lr']) logger.log(key='loss', val=out_agent['loss'].item()) policy_loss = torch.stack(out_agent['batch_policy_loss']).mean().item() logger.log(key='policy_loss', val=policy_loss) entropy_loss = torch.stack( out_agent['batch_entropy_loss']).mean().item() logger.log(key='policy_entropy', val=-entropy_loss) # negation of entropy loss value_loss = torch.stack(out_agent['batch_value_loss']).mean().item() logger.log(key='value_loss', val=value_loss) # Get some data from trajectory list batch_returns = [trajectory.all_returns[0] for trajectory in D] batch_discounted_returns = [ trajectory.all_discounted_returns[0] for trajectory in D ] num_timesteps = sum([trajectory.T for trajectory in D]) # Log more information logger.log(key='num_trajectories', val=len(D)) logger.log(key='num_timesteps', val=num_timesteps) logger.log(key='accumulated_trained_timesteps', val=self.accumulated_trained_timesteps) logger.log(key='average_return', val=np.mean(batch_returns)) logger.log(key='average_discounted_return', val=np.mean(batch_discounted_returns)) logger.log(key='std_return', val=np.std(batch_returns)) logger.log(key='min_return', val=np.min(batch_returns)) logger.log(key='max_return', val=np.max(batch_returns)) # Dump the loggings print('-' * 50) logger.dump(keys=None, index=None, indent=0) print('-' * 50) return logger
def log_eval(self, eval_output): # Create evaluation logger logger = Logger(name='eval_logger') # Unpack evaluation for logging D = eval_output['D'] n = eval_output['n'] # Compute some metrics batch_returns = [sum(trajectory.all_r) for trajectory in D] batch_T = [trajectory.T for trajectory in D] # Loggings # Use item() for tensor to save memory logger.log(key='evaluation_iteration', val=n + 1) logger.log(key='num_trajectories', val=len(D)) logger.log(key='max_allowed_horizon', val=self.config['eval:T']) logger.log(key='average_horizon', val=np.mean(batch_T)) logger.log(key='num_timesteps', val=np.sum(batch_T)) logger.log(key='accumulated_trained_timesteps', val=self.accumulated_trained_timesteps) logger.log(key='average_return', val=np.mean(batch_returns)) logger.log(key='std_return', val=np.std(batch_returns)) logger.log(key='min_return', val=np.min(batch_returns)) logger.log(key='max_return', val=np.max(batch_returns)) # Dump the loggings print('-' * 50) logger.dump(keys=None, index=None, indent=0) print('-' * 50) return logger
def log_eval(self, eval_output, **kwargs): # Create evaluation logger logger = Logger(name='eval_logger') # Unpack evaluation for logging D = eval_output['D'] n = eval_output['n'] T = eval_output['T'] # Loggings: use item() to save memory # Log something about trajectories batch_returns = [sum(trajectory.all_r) for trajectory in D] batch_T = [trajectory.T for trajectory in D] logger.log('evaluation_iteration', n + 1) logger.log('num_trajectories', len(D)) logger.log('max_allowed_horizon', T) logger.log('average_horizon', np.mean(batch_T)) logger.log('num_timesteps', np.sum(batch_T)) logger.log('accumulated_trained_timesteps', self.agent.total_T) logger.log('average_return', np.mean(batch_returns)) logger.log('std_return', np.std(batch_returns)) logger.log('min_return', np.min(batch_returns)) logger.log('max_return', np.max(batch_returns)) # Dump loggings if n == 0 or (n + 1) % self.config['log.print_interval'] == 0: print(color_str('+' * 50, 'yellow', 'bold')) logger.dump(keys=None, index=None, indent=0) print(color_str('+' * 50, 'yellow', 'bold')) return logger.logs
def log_train(self, train_output, **kwargs): # Create training logger logger = Logger(name='train_logger') # Unpack training output for logging D = train_output['D'] out_agent = train_output['out_agent'] n = train_output['n'] # Loggings: use item() to save memory logger.log('train_iteration', n + 1) # iteration starts from 1 if self.config['algo.use_lr_scheduler']: logger.log('current_lr', out_agent['current_lr']) logger.log('loss', out_agent['loss']) logger.log('policy_loss', out_agent['policy_loss']) logger.log( 'policy_entropy', -out_agent['entropy_loss']) # negate entropy loss is entropy logger.log('value_loss', out_agent['value_loss']) # Log something about segments all_immediate_reward = [segment.all_r for segment in D] num_timesteps = sum([segment.T for segment in D]) logger.log('num_segments', len(D)) logger.log('num_subsegments', sum([len(segment.trajectories) for segment in D])) logger.log('num_timesteps', num_timesteps) logger.log('accumulated_trained_timesteps', self.agent.total_T) logger.log('average_immediate_reward', np.mean(all_immediate_reward)) logger.log('std_immediate_reward', np.std(all_immediate_reward)) logger.log('min_immediate_reward', np.min(all_immediate_reward)) logger.log('max_immediate_reward', np.max(all_immediate_reward)) # Dump loggings if n == 0 or (n + 1) % self.config['log.print_interval'] == 0: print('-' * 50) logger.dump(keys=None, index=None, indent=0) print('-' * 50) return logger.logs
def log_train(self, train_output): # Create training logger logger = Logger(name='train_logger') # Unpack training output for logging D = train_output['D'] out_agent = train_output['out_agent'] n = train_output['n'] # Loggings # Use item() for tensor to save memory logger.log(key='train_iteration', val=n + 1) # iteration starts from 1 if self.config['algo:use_lr_scheduler']: logger.log(key='current_lr', val=out_agent['current_lr']) logger.log(key='loss', val=out_agent['loss'].item()) policy_loss = torch.stack(out_agent['batch_policy_loss']).mean().item() logger.log(key='policy_loss', val=policy_loss) entropy_loss = torch.stack( out_agent['batch_entropy_loss']).mean().item() logger.log(key='policy_entropy', val=-entropy_loss) # negation of entropy loss value_loss = torch.stack(out_agent['batch_value_loss']).mean().item() logger.log(key='value_loss', val=value_loss) # Get some data from segment list all_immediate_reward = [segment.all_r for segment in D] num_timesteps = sum([segment.T for segment in D]) # Log more information logger.log(key='num_segments', val=sum([len(segment.split_transitions) for segment in D])) logger.log(key='num_timesteps', val=num_timesteps) logger.log(key='accumulated_trained_timesteps', val=self.accumulated_trained_timesteps) logger.log(key='average_immediate_reward', val=np.mean(all_immediate_reward)) logger.log(key='std_immediate_reward', val=np.std(all_immediate_reward)) logger.log(key='min_immediate_reward', val=np.min(all_immediate_reward)) logger.log(key='max_immediate_reward', val=np.max(all_immediate_reward)) # Dump the loggings print('-' * 50) logger.dump(keys=None, index=None, indent=0) print('-' * 50) return logger
def log_train(self, train_output, **kwargs): # Create training logger logger = Logger(name='train_logger') # Unpack training output for logging D = train_output['D'] out_agent = train_output['out_agent'] n = train_output['n'] # Loggings: use item() to save memory logger.log('train_iteration', n + 1) # iteration starts from 1 if self.config['algo.use_lr_scheduler']: logger.log('current_lr', out_agent['current_lr']) logger.log('loss', out_agent['loss']) logger.log('policy_loss', out_agent['policy_loss']) logger.log( 'policy_entropy', -out_agent['entropy_loss']) # negate entropy loss is entropy # Log something about trajectories batch_returns = [sum(trajectory.all_r) for trajectory in D] batch_discounted_returns = [ trajectory.all_discounted_returns[0] for trajectory in D ] num_timesteps = sum([trajectory.T for trajectory in D]) logger.log('num_trajectories', len(D)) logger.log('num_timesteps', num_timesteps) logger.log('accumulated_trained_timesteps', self.agent.total_T) logger.log('average_return', np.mean(batch_returns)) logger.log('average_discounted_return', np.mean(batch_discounted_returns)) logger.log('std_return', np.std(batch_returns)) logger.log('min_return', np.min(batch_returns)) logger.log('max_return', np.max(batch_returns)) # Dump loggings if n == 0 or (n + 1) % self.config['log.print_interval'] == 0: print('-' * 50) logger.dump(keys=None, index=None, indent=0) print('-' * 50) return logger.logs
def test_logger(self): logger = Logger(name='logger') logger.log('iteration', 1) logger.log('learning_rate', 1e-3) logger.log('training_loss', 0.12) logger.log('evaluation_loss', 0.14) logger.log('iteration', 2) logger.log('learning_rate', 5e-4) logger.log('training_loss', 0.11) logger.log('evaluation_loss', 0.13) logger.log('iteration', 3) logger.log('learning_rate', 1e-4) logger.log('training_loss', 0.09) logger.log('evaluation_loss', 0.10) # Test dump, because dump will call print, impossible to use assert logger.dump() logger.dump(keys=None, index=None, indent=1) logger.dump(keys=None, index=None, indent=2) logger.dump(keys=['iteration', 'evaluation_loss'], index=None, indent=0) logger.dump(keys=None, index=0, indent=0) logger.dump(keys=None, index=2, indent=0) logger.dump(keys=None, index=[0, 2], indent=0) logger.dump(keys=['iteration', 'training_loss'], index=[0, 2], indent=0) # Test save function file = './test_logger_file' logger.save(file=file) assert os.path.exists(file) # Load file logging = Logger.load(file) assert len(logging) == 4 assert 'iteration' in logging assert 'learning_rate' in logging assert 'training_loss' in logging assert 'evaluation_loss' in logging assert np.allclose(logging['iteration'], [1, 2, 3]) assert np.allclose(logging['learning_rate'], [1e-3, 5e-4, 1e-4]) assert np.allclose(logging['training_loss'], [0.12, 0.11, 0.09]) assert np.allclose(logging['evaluation_loss'], [0.14, 0.13, 0.1]) # Delete the temp logger file os.unlink(file)