예제 #1
0
 def _setup(self, config):
     inject_tuned_hyperparameters(config, config)
     os.chdir(os.path.dirname(os.path.realpath(__file__)))
     print('Trainable got the following config after injection', config)
     self.config = config
     self.device = self.config['device']
     self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training(
         self.config)
     self.exp.set_name(config['experiment_name'] + self._experiment_id)
     self.exp_name = config['experiment_name'] + self._experiment_id
     self.exp.send_notification(title='Experiment ' +
                                str(self._experiment_id) + ' ended')
     self.train_data_iter = iter(self.train_dataloader)
     self.model = self.model.to(self.device)
     self.model.train()
     n_params = sum(p.numel() for p in self.model.parameters()
                    if p.requires_grad)
     log_dict = flatten_dict(config)
     log_dict.update({'trainable_params': n_params})
     self.exp.log_parameters(log_dict)
     self.optimizers = get_optimizers(self.model, self.config)
     self.evaluator = Evaluation(self.eval_dataloader, self.config)
     self.num_examples = 0
     self.batch_idx = 0
     self.epoch = 1
     self.ewma = EWMA(beta=0.75)
     self.last_accu = -1.0
     self.max_accu = -1.0
     self.back_prop_every_n_batches = config['training'][
         'back_prop_every_n_batches']
     self.checkpoint_best = config['training']['checkpoint_best']
예제 #2
0
    def __init__(self, params, step_size=1, b1=0.9, b2=0.999, bp1=0, decay=0, power=1,
                 biased_g1=False):
        """Initializes the optimizer."""
        self.params = params
        self.step_size = step_size
        self.decay, self.power = decay, power

        self.i = 1
        self.xy = np.zeros(2, dtype=np.int32)
        self.g1 = EWMA.like(params, b1, correct_bias=not biased_g1)
        self.g2 = EWMA.like(params, b2)
        self.p1 = EWMA.like(params, bp1)
예제 #3
0
    def __init__(self,
                 params,
                 step_size=1,
                 b1=0.9,
                 b2=0.999,
                 bp1=0,
                 decay=0,
                 power=1,
                 biased_g1=False):
        """Initializes the optimizer."""
        self.params = params
        self.step_size = step_size
        self.decay, self.power = decay, power

        self.i = 1
        self.xy = np.zeros(2, dtype=np.int32)
        self.g1 = EWMA.like(params, b1, correct_bias=not biased_g1)
        self.g2 = EWMA.like(params, b2)
        self.p1 = EWMA.like(params, bp1)
예제 #4
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_like(self):
     arr = np.float32(np.eye(4))
     avg = EWMA.like(arr)
     out = avg.update(np.eye(4))
     self.assertEqual(out.shape, arr.shape)
     self.assertEqual(out.dtype, arr.dtype)
예제 #5
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_get_est(self):
     avg = EWMA(beta=0.5)
     avg.update(0)
     out = avg.get_est(1)
     self.assertEqual(out, 2 / 3)
예제 #6
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_returns_array_nan(self):
     avg = EWMA((4, 4))
     out = avg.get()
     self.assertTrue(np.isnan(out).all())
예제 #7
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_returns_scalar_nan(self):
     avg = EWMA()
     out = avg.get()
     self.assertNotEqual(out, out)
예제 #8
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_raises_on_wrong_type(self):
     avg = EWMA((4, 4))
     with self.assertRaises(TypeError):
         avg.update('abc')
예제 #9
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_raises_on_wrong_shape(self):
     avg = EWMA((4, 4))
     with self.assertRaises(ValueError):
         avg.update(np.eye(3))
예제 #10
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_returns_float(self):
     avg = EWMA()
     avg.update(1)
     self.assertEqual(type(avg.get()), float)
예제 #11
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_array_once(self):
     avg = EWMA((4, 4))
     avg.update(np.eye(4))
     self.assertTrue((avg.get() == np.eye(4)).all())
예제 #12
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_scalar_twice(self):
     avg = EWMA(beta=0.5)
     avg.update(0)
     avg.update(1)
     self.assertEqual(avg.get(), 2 / 3)
예제 #13
0
파일: ewma.py 프로젝트: ml-edu/average
 def test_scalar_once(self):
     avg = EWMA()
     avg.update(1)
     self.assertEqual(avg.get(), 1)
예제 #14
0
class TuneTrainable(Trainable):
    def _setup(self, config):
        inject_tuned_hyperparameters(config, config)
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        print('Trainable got the following config after injection', config)
        self.config = config
        self.device = self.config['device']
        self.exp, self.model, self.train_dataloader, self.eval_dataloader = setup_training(
            self.config)
        self.exp.set_name(config['experiment_name'] + self._experiment_id)
        self.exp_name = config['experiment_name'] + self._experiment_id
        self.exp.send_notification(title='Experiment ' +
                                   str(self._experiment_id) + ' ended')
        self.train_data_iter = iter(self.train_dataloader)
        self.model = self.model.to(self.device)
        self.model.train()
        n_params = sum(p.numel() for p in self.model.parameters()
                       if p.requires_grad)
        log_dict = flatten_dict(config)
        log_dict.update({'trainable_params': n_params})
        self.exp.log_parameters(log_dict)
        self.optimizers = get_optimizers(self.model, self.config)
        self.evaluator = Evaluation(self.eval_dataloader, self.config)
        self.num_examples = 0
        self.batch_idx = 0
        self.epoch = 1
        self.ewma = EWMA(beta=0.75)
        self.last_accu = -1.0
        self.max_accu = -1.0
        self.back_prop_every_n_batches = config['training'][
            'back_prop_every_n_batches']
        self.checkpoint_best = config['training']['checkpoint_best']

    def get_batch(self):
        try:
            batch = next(self.train_data_iter)
            return batch

        except StopIteration:
            self.train_data_iter = iter(self.train_dataloader)
            batch = next(self.train_data_iter)
            self.batch_idx = 0
            self.epoch += 1
            return batch

    def _train(self):
        total_log_step_loss = 0
        total_log_step_train_accu = 0
        total_log_step_n = 0

        [opt.zero_grad() for opt in self.optimizers]
        while True:
            batch = self.get_batch()
            self.batch_idx += 1
            self.num_examples += len(batch[0])
            batch = (batch[0].to(self.device), batch[1].to(self.device))
            loss, train_accu = training_step(
                batch,
                self.model,
                self.optimizers,
                step=(self.batch_idx % self.back_prop_every_n_batches == 0))
            total_log_step_loss += loss.cpu().detach().numpy()
            total_log_step_train_accu += train_accu
            total_log_step_n += 1

            if self.batch_idx % self.config['training'][
                    'log_every_n_batches'] == 0:
                avg_loss = total_log_step_loss / total_log_step_n
                avg_accu = total_log_step_train_accu / total_log_step_n
                total_log_step_n = 0
                print(f'{Fore.YELLOW}Total number of seen examples:',
                      self.num_examples, 'Average loss of current log step:',
                      avg_loss, 'Average train accuracy of current log step:',
                      avg_accu, f"{Style.RESET_ALL}")
                self.exp.log_metric('train_loss',
                                    avg_loss,
                                    step=self.num_examples,
                                    epoch=self.epoch)
                self.exp.log_metric('train_accuracy',
                                    avg_accu,
                                    step=self.num_examples,
                                    epoch=self.epoch)
                total_log_step_loss = 0
                total_log_step_train_accu = 0

            if (self.batch_idx +
                    1) % self.config['training']['eval_every_n_batches'] == 0:
                results, assets, image_fns = self.evaluator.eval_model(
                    self.model)
                print(self.config['tune']['discriminating_metric'],
                      results[self.config['tune']['discriminating_metric']])
                self.exp.log_metrics(results,
                                     step=self.num_examples,
                                     epoch=self.epoch)
                [
                    self.exp.log_asset_data(asset, step=self.num_examples)
                    for asset in assets
                ]
                [
                    self.exp.log_image(fn, step=self.num_examples)
                    for fn in image_fns
                ]

                accu_diff_avg = abs(
                    results[self.config['tune']['discriminating_metric']] -
                    self.ewma.get())
                accu_diff_cons = abs(
                    results[self.config['tune']['discriminating_metric']] -
                    self.last_accu)

                no_change_in_accu = 1 if accu_diff_avg < 0.0005 and accu_diff_cons < 0.002 and self.num_examples > 70000 else 0
                self.ewma.update(
                    results[self.config['tune']['discriminating_metric']])
                self.last_accu = results[self.config['tune']
                                         ['discriminating_metric']]

                if self.max_accu < results[self.config['tune']
                                           ['discriminating_metric']]:
                    self.max_accu = results[self.config['tune']
                                            ['discriminating_metric']]
                    if self.checkpoint_best:
                        self.save_checkpoint('checkpoints',
                                             self.exp_name + '.pt')
                        print(
                            f'{Fore.GREEN}New best model saved.{Style.RESET_ALL}'
                        )

                self.exp.log_metric('max_accuracy',
                                    self.max_accu,
                                    step=self.num_examples,
                                    epoch=self.epoch)

                training_results = {
                    self.config['tune']['discriminating_metric']:
                    self.max_accu,
                    'num_examples': self.num_examples,
                    'no_change_in_accu': no_change_in_accu
                }

                return training_results

    def _save(self, checkpoint_dir):
        return self.save_checkpoint(checkpoint_dir, 'checkpoint_file.pt')

    def save_checkpoint(self, checkpoint_dir, fname='checkpoint_file.pt'):
        print(f'{Fore.CYAN}Saving model ...{Style.RESET_ALL}')
        save_dict = {'model_state_dict': self.model.state_dict()}
        for i, optimizer in enumerate(self.optimizers):
            save_dict['op_' + str(i) + '_state_dict'] = optimizer.state_dict()
        torch.save(save_dict, os.path.join(checkpoint_dir, fname))
        return os.path.join(checkpoint_dir, fname)

    def _restore(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        self.model.load_state_dict(checkpoint['model_state_dict'])

        for i, optimizer in enumerate(self.optimizers):
            optimizer.load_state_dict(checkpoint['op_' + str(i) +
                                                 '_state_dict'])

    def stop(self):
        results, assets, image_fns = self.evaluator.eval_model(
            self.model, finished_training=True)
        self.exp.log_metrics(results, step=self.num_examples, epoch=self.epoch)
        [
            self.exp.log_asset_data(asset, step=self.num_examples)
            for asset in assets
        ]
        [self.exp.log_image(fn, step=self.num_examples) for fn in image_fns]

        return super().stop()