Exemple #1
0
class ESMaster(BaseESMaster):
    def _network_size(self):
        worker = ESWorker()
        tmp_agent = worker.init(seed=0, config=self.config)
        num_params = worker.network.num_params
        
        del worker, tmp_agent
        
        return num_params
    
    def make_es(self, config):
        if self.config['es.algo'] == 'CMAES':
            es = CMAES(mu0=[self.config['es.mu0']]*self._network_size(),
                       std0=self.config['es.std0'], 
                       popsize=self.config['es.popsize'])
        elif self.config['es.algo'] == 'OpenAIES':
            es = OpenAIES(mu0=[self.config['es.mu0']]*self._network_size(), 
                          std0=self.config['es.std0'], 
                          popsize=self.config['es.popsize'], 
                          std_decay=0.999,
                          min_std=0.01, 
                          lr=5e-2, 
                          lr_decay=0.99, 
                          min_lr=1e-3, 
                          antithetic=True, 
                          rank_transform=True)
        
        self.logger = Logger()
        
        return es
        
    def _process_es_result(self, result):
        best_f_val = result['best_f_val']
        best_return = -best_f_val  # negate to get back reward
        
        # logging
        self.logger.log('generation', self.generation)
        self.logger.log('best_return', best_return)
        
        if self.generation == 0 or (self.generation+1) % self.config['log.interval'] == 0:
            print('-'*50)
            self.logger.dump(keys=None, index=-1, indent=0)
            print('-'*50)
            
        # Save the loggings and final parameters
        if (self.generation+1) == self.num_iteration:
            pickle_dump(obj=self.logger.logs, f=self.logdir/'result', ext='.pkl')
            np.save(self.logdir/'trained_param', result['best_param'])
Exemple #2
0
    def train(self, n=None):
        self.agent.train()  # set to training mode

        # Create a logger
        train_output = Logger()

        # Iterate over data batches for one epoch
        for i, (data, label) in enumerate(self.train_loader):
            # Put data to device
            data = data.to(self.device)
            # Zero-out gradient buffer
            self.optimizer.zero_grad()
            # Forward pass of data
            re_x, mu, logvar = self.agent(data)
            # Calculate loss
            out = self.agent.calculate_loss(re_x=re_x,
                                            x=data,
                                            mu=mu,
                                            logvar=logvar,
                                            loss_type='BCE')
            loss = out['loss']
            # Backward pass to calcualte gradients
            loss.backward()
            # Take a gradient step
            self.optimizer.step()

            # Record train output
            train_output.log('epoch', n)
            train_output.log('iteration', i)
            train_output.log('train_loss',
                             out['loss'].item())  # item() saves memory
            train_output.log('reconstruction_loss', out['re_loss'].item())
            train_output.log('KL_loss', out['KL_loss'].item())

            # Dump logging
            if i == 0 or (i + 1) % self.config['log.interval'] == 0:
                print('-' * 50)
                train_output.dump(keys=None, index=-1, indent=0)
                print('-' * 50)

        return train_output.logs
Exemple #3
0
    def eval(self, n=None):
        self.agent.eval()  # set to evaluation mode

        # Create a logger
        eval_output = Logger()

        # Iterate over test batches
        for i, (data, label) in enumerate(self.test_loader):
            # Put data to device
            data = data.to(self.device)
            with torch.no_grad():  # fast, disable grad
                # Forward pass of data
                re_x, mu, logvar = self.agent(data)
                # Calculate loss
                out = self.agent.calculate_loss(re_x=re_x,
                                                x=data,
                                                mu=mu,
                                                logvar=logvar,
                                                loss_type='BCE')

            # Record eval output
            eval_output.log('eval_loss', out['loss'].item())

        return eval_output.logs
    def log_train(self, train_output):
        # Create training logger
        logger = Logger(name='train_logger')

        # Unpack training output for logging
        D = train_output['D']
        out_agent = train_output['out_agent']
        n = train_output['n']

        # Loggings
        # Use item() for tensor to save memory
        logger.log(key='train_iteration', val=n + 1)  # iteration starts from 1
        if self.config['algo:use_lr_scheduler']:
            logger.log(key='current_lr', val=out_agent['current_lr'])

        logger.log(key='loss', val=out_agent['loss'].item())
        policy_loss = torch.stack(out_agent['batch_policy_loss']).mean().item()
        logger.log(key='policy_loss', val=policy_loss)
        entropy_loss = torch.stack(
            out_agent['batch_entropy_loss']).mean().item()
        logger.log(key='policy_entropy',
                   val=-entropy_loss)  # negation of entropy loss
        value_loss = torch.stack(out_agent['batch_value_loss']).mean().item()
        logger.log(key='value_loss', val=value_loss)

        # Get some data from trajectory list
        batch_returns = [trajectory.all_returns[0] for trajectory in D]
        batch_discounted_returns = [
            trajectory.all_discounted_returns[0] for trajectory in D
        ]
        num_timesteps = sum([trajectory.T for trajectory in D])

        # Log more information
        logger.log(key='num_trajectories', val=len(D))
        logger.log(key='num_timesteps', val=num_timesteps)
        logger.log(key='accumulated_trained_timesteps',
                   val=self.accumulated_trained_timesteps)
        logger.log(key='average_return', val=np.mean(batch_returns))
        logger.log(key='average_discounted_return',
                   val=np.mean(batch_discounted_returns))
        logger.log(key='std_return', val=np.std(batch_returns))
        logger.log(key='min_return', val=np.min(batch_returns))
        logger.log(key='max_return', val=np.max(batch_returns))

        # Dump the loggings
        print('-' * 50)
        logger.dump(keys=None, index=None, indent=0)
        print('-' * 50)

        return logger
    def log_eval(self, eval_output):
        # Create evaluation logger
        logger = Logger(name='eval_logger')

        # Unpack evaluation for logging
        D = eval_output['D']
        n = eval_output['n']

        # Compute some metrics
        batch_returns = [sum(trajectory.all_r) for trajectory in D]
        batch_T = [trajectory.T for trajectory in D]

        # Loggings
        # Use item() for tensor to save memory
        logger.log(key='evaluation_iteration', val=n + 1)
        logger.log(key='num_trajectories', val=len(D))
        logger.log(key='max_allowed_horizon', val=self.config['eval:T'])
        logger.log(key='average_horizon', val=np.mean(batch_T))
        logger.log(key='num_timesteps', val=np.sum(batch_T))
        logger.log(key='accumulated_trained_timesteps',
                   val=self.accumulated_trained_timesteps)
        logger.log(key='average_return', val=np.mean(batch_returns))
        logger.log(key='std_return', val=np.std(batch_returns))
        logger.log(key='min_return', val=np.min(batch_returns))
        logger.log(key='max_return', val=np.max(batch_returns))

        # Dump the loggings
        print('-' * 50)
        logger.dump(keys=None, index=None, indent=0)
        print('-' * 50)

        return logger
Exemple #6
0
    def log_eval(self, eval_output, **kwargs):
        # Create evaluation logger
        logger = Logger(name='eval_logger')

        # Unpack evaluation for logging
        D = eval_output['D']
        n = eval_output['n']
        T = eval_output['T']

        # Loggings: use item() to save memory
        # Log something about trajectories
        batch_returns = [sum(trajectory.all_r) for trajectory in D]
        batch_T = [trajectory.T for trajectory in D]

        logger.log('evaluation_iteration', n + 1)
        logger.log('num_trajectories', len(D))
        logger.log('max_allowed_horizon', T)
        logger.log('average_horizon', np.mean(batch_T))
        logger.log('num_timesteps', np.sum(batch_T))
        logger.log('accumulated_trained_timesteps', self.agent.total_T)
        logger.log('average_return', np.mean(batch_returns))
        logger.log('std_return', np.std(batch_returns))
        logger.log('min_return', np.min(batch_returns))
        logger.log('max_return', np.max(batch_returns))

        # Dump loggings
        if n == 0 or (n + 1) % self.config['log.print_interval'] == 0:
            print(color_str('+' * 50, 'yellow', 'bold'))
            logger.dump(keys=None, index=None, indent=0)
            print(color_str('+' * 50, 'yellow', 'bold'))

        return logger.logs
Exemple #7
0
    def log_train(self, train_output, **kwargs):
        # Create training logger
        logger = Logger(name='train_logger')

        # Unpack training output for logging
        D = train_output['D']
        out_agent = train_output['out_agent']
        n = train_output['n']

        # Loggings: use item() to save memory
        logger.log('train_iteration', n + 1)  # iteration starts from 1
        if self.config['algo.use_lr_scheduler']:
            logger.log('current_lr', out_agent['current_lr'])

        logger.log('loss', out_agent['loss'])
        logger.log('policy_loss', out_agent['policy_loss'])
        logger.log(
            'policy_entropy',
            -out_agent['entropy_loss'])  # negate entropy loss is entropy
        logger.log('value_loss', out_agent['value_loss'])

        # Log something about segments
        all_immediate_reward = [segment.all_r for segment in D]
        num_timesteps = sum([segment.T for segment in D])

        logger.log('num_segments', len(D))
        logger.log('num_subsegments',
                   sum([len(segment.trajectories) for segment in D]))
        logger.log('num_timesteps', num_timesteps)
        logger.log('accumulated_trained_timesteps', self.agent.total_T)
        logger.log('average_immediate_reward', np.mean(all_immediate_reward))
        logger.log('std_immediate_reward', np.std(all_immediate_reward))
        logger.log('min_immediate_reward', np.min(all_immediate_reward))
        logger.log('max_immediate_reward', np.max(all_immediate_reward))

        # Dump loggings
        if n == 0 or (n + 1) % self.config['log.print_interval'] == 0:
            print('-' * 50)
            logger.dump(keys=None, index=None, indent=0)
            print('-' * 50)

        return logger.logs
    def log_train(self, train_output):
        # Create training logger
        logger = Logger(name='train_logger')

        # Unpack training output for logging
        D = train_output['D']
        out_agent = train_output['out_agent']
        n = train_output['n']

        # Loggings
        # Use item() for tensor to save memory
        logger.log(key='train_iteration', val=n + 1)  # iteration starts from 1
        if self.config['algo:use_lr_scheduler']:
            logger.log(key='current_lr', val=out_agent['current_lr'])

        logger.log(key='loss', val=out_agent['loss'].item())
        policy_loss = torch.stack(out_agent['batch_policy_loss']).mean().item()
        logger.log(key='policy_loss', val=policy_loss)
        entropy_loss = torch.stack(
            out_agent['batch_entropy_loss']).mean().item()
        logger.log(key='policy_entropy',
                   val=-entropy_loss)  # negation of entropy loss
        value_loss = torch.stack(out_agent['batch_value_loss']).mean().item()
        logger.log(key='value_loss', val=value_loss)

        # Get some data from segment list
        all_immediate_reward = [segment.all_r for segment in D]
        num_timesteps = sum([segment.T for segment in D])

        # Log more information
        logger.log(key='num_segments',
                   val=sum([len(segment.split_transitions) for segment in D]))
        logger.log(key='num_timesteps', val=num_timesteps)
        logger.log(key='accumulated_trained_timesteps',
                   val=self.accumulated_trained_timesteps)
        logger.log(key='average_immediate_reward',
                   val=np.mean(all_immediate_reward))
        logger.log(key='std_immediate_reward',
                   val=np.std(all_immediate_reward))
        logger.log(key='min_immediate_reward',
                   val=np.min(all_immediate_reward))
        logger.log(key='max_immediate_reward',
                   val=np.max(all_immediate_reward))

        # Dump the loggings
        print('-' * 50)
        logger.dump(keys=None, index=None, indent=0)
        print('-' * 50)

        return logger
Exemple #9
0
    def log_train(self, train_output, **kwargs):
        # Create training logger
        logger = Logger(name='train_logger')

        # Unpack training output for logging
        D = train_output['D']
        out_agent = train_output['out_agent']
        n = train_output['n']

        # Loggings: use item() to save memory
        logger.log('train_iteration', n + 1)  # iteration starts from 1
        if self.config['algo.use_lr_scheduler']:
            logger.log('current_lr', out_agent['current_lr'])

        logger.log('loss', out_agent['loss'])
        logger.log('policy_loss', out_agent['policy_loss'])
        logger.log(
            'policy_entropy',
            -out_agent['entropy_loss'])  # negate entropy loss is entropy

        # Log something about trajectories
        batch_returns = [sum(trajectory.all_r) for trajectory in D]
        batch_discounted_returns = [
            trajectory.all_discounted_returns[0] for trajectory in D
        ]
        num_timesteps = sum([trajectory.T for trajectory in D])

        logger.log('num_trajectories', len(D))
        logger.log('num_timesteps', num_timesteps)
        logger.log('accumulated_trained_timesteps', self.agent.total_T)
        logger.log('average_return', np.mean(batch_returns))
        logger.log('average_discounted_return',
                   np.mean(batch_discounted_returns))
        logger.log('std_return', np.std(batch_returns))
        logger.log('min_return', np.min(batch_returns))
        logger.log('max_return', np.max(batch_returns))

        # Dump loggings
        if n == 0 or (n + 1) % self.config['log.print_interval'] == 0:
            print('-' * 50)
            logger.dump(keys=None, index=None, indent=0)
            print('-' * 50)

        return logger.logs
Exemple #10
0
    def test_logger(self):
        logger = Logger(name='logger')

        logger.log('iteration', 1)
        logger.log('learning_rate', 1e-3)
        logger.log('training_loss', 0.12)
        logger.log('evaluation_loss', 0.14)

        logger.log('iteration', 2)
        logger.log('learning_rate', 5e-4)
        logger.log('training_loss', 0.11)
        logger.log('evaluation_loss', 0.13)

        logger.log('iteration', 3)
        logger.log('learning_rate', 1e-4)
        logger.log('training_loss', 0.09)
        logger.log('evaluation_loss', 0.10)

        # Test dump, because dump will call print, impossible to use assert
        logger.dump()
        logger.dump(keys=None, index=None, indent=1)
        logger.dump(keys=None, index=None, indent=2)
        logger.dump(keys=['iteration', 'evaluation_loss'],
                    index=None,
                    indent=0)
        logger.dump(keys=None, index=0, indent=0)
        logger.dump(keys=None, index=2, indent=0)
        logger.dump(keys=None, index=[0, 2], indent=0)
        logger.dump(keys=['iteration', 'training_loss'],
                    index=[0, 2],
                    indent=0)

        # Test save function
        file = './test_logger_file'
        logger.save(file=file)

        assert os.path.exists(file)

        # Load file
        logging = Logger.load(file)

        assert len(logging) == 4
        assert 'iteration' in logging
        assert 'learning_rate' in logging
        assert 'training_loss' in logging
        assert 'evaluation_loss' in logging

        assert np.allclose(logging['iteration'], [1, 2, 3])
        assert np.allclose(logging['learning_rate'], [1e-3, 5e-4, 1e-4])
        assert np.allclose(logging['training_loss'], [0.12, 0.11, 0.09])
        assert np.allclose(logging['evaluation_loss'], [0.14, 0.13, 0.1])

        # Delete the temp logger file
        os.unlink(file)