def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() if offset > 100: # If resumed, sample several iterations first to reduce sampling bias for i in range(16): self.sample(False) for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 2 == 0: self.set_optim(self.c.lr(), self.c.reg_l2()) self.set_game_param(self.c.right_gain(), self.c.fix_prob(), self.c.neg_mul(), self.c.step_reward()) self.set_weight_param(self.c.entropy_weight(), self.c.prob_reg_weight(), self.c.target_prob_weight(), self.c.gamma(), self.c.lamda()) if (update + 1) % 25 == 0: logger.log() if (update + 1) % 200 == 0: experiment.save_checkpoint()
def _collect_value(self, key: str, value): from labml import tracker if self._density is None: self._values[key] = value else: steps = tracker.get_global_step() - self._last_add_step steps *= self._density if np.random.uniform() < 1 - 0.99**steps: self._values[key] = value self._last_add_step = tracker.get_global_step()
def __call__(self): if not self._change_tracked or self._last_tracked_step != tracker.get_global_step(): if self._key is None: warnings.warn('Register dynamic schedules with `experiment.configs` to update them live from the app') else: tracker.add(f'hp.{self._key}', self._value) self._change_tracked = True self._last_tracked_step = tracker.get_global_step() return self._value
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() logger.log() if (update + 1) % 500 == 0: experiment.save_checkpoint()
def train_epoch(self, model: nn.Module, data_loader: DataLoader, name: str): """ Train/Validate for an epoch """ model.train(name == 'train') correct_predictions = 0 total = 0 total_loss = 0 with torch.set_grad_enabled(name == 'train'): for i, data in monit.enum(name, data_loader): input_ids = data["input_ids"].to(self.device) attention_mask = data["attention_mask"].to(self.device) targets = data["targets"].to(self.device) outputs = model(input_ids=input_ids, attention_mask=attention_mask) _, preds = torch.max(outputs, dim=1) loss = self.loss_fn(outputs, targets) total_loss += loss.item() * len(preds) correct_predictions += torch.sum(preds == targets).item() total += len(preds) tracker.add('loss.', loss) if name == 'train': tracker.add_global_step(len(preds)) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) self.optimizer.step() self.optimizer.zero_grad() if (i + 1) % 10 == 0: tracker.save() tracker.save('accuracy.', correct_predictions / total) mlflow.log_metric(f"{name}_acc", float(correct_predictions / total), step=tracker.get_global_step()) mlflow.log_metric(f"{name}_loss", float(total_loss / total), step=tracker.get_global_step()) return correct_predictions / total, total_loss / total
def __iter__(self): self.__loop = monit.loop( range(tracker.get_global_step(), self.__loop_count, self.__loop_step)) iter(self.__loop) try: self.old_handler = signal.signal(signal.SIGINT, self.__handler) except ValueError: pass return self
def __iter__(self): self._iter = TrainingLoopIterator(tracker.get_global_step(), self.__loop_count, self.__loop_step) self.__loop = monit.loop(typing.cast(Collection, self._iter)) iter(self.__loop) try: self.old_handler = signal.signal(signal.SIGINT, self.__handler) except ValueError: pass return self
def is_interval(self, interval: int, global_step: Optional[int] = None): if global_step is None: global_step = tracker.get_global_step() if global_step - self.__loop_step < 0: return False if global_step // interval > (global_step - self.__loop_step) // interval: return True else: return False
def __next__(self): if self.step is not None: if self.i is None: self.i = self.start else: self.i += self.step else: if self.i is None: self.i = 0 else: self.i += 1 if self.i >= self.total: raise StopIteration() if self.step is None: return tracker.get_global_step() else: return self.i
def sample(self, train=True) -> (Dict[str, np.ndarray], List): """### Sample data with current policy""" actions = torch.zeros((self.envs, self.c.worker_steps), dtype=torch.int32, device=device) obs = torch.zeros((self.envs, self.c.worker_steps, *kTensorDim), dtype=torch.float32, device=device) log_pis = torch.zeros((self.envs, self.c.worker_steps), dtype=torch.float32, device=device) values = torch.zeros((self.envs, self.c.worker_steps, 3), dtype=torch.float32, device=device) # sample `worker_steps` from each worker tot_lines = 0 tot_score = 0 for t in range(self.c.worker_steps): with torch.no_grad(): # `self.obs` keeps track of the last observation from each worker, # which is the input for the model to sample the next action obs[:, t] = self.obs # sample actions from $\pi_{\theta_{OLD}}$ pi, v = self.model(self.obs) values[:, t] = v a = pi.sample() actions[:, t] = a log_pis[:, t] = pi.log_prob(a) actions_cpu = a.cpu().numpy() # run sampled actions on each worker # workers will place results in self.obs_np,rewards,done for w, worker in enumerate(self.workers): worker.child.send(('step', (t, actions_cpu[self.w_range(w)], tracker.get_global_step()))) for i in self.workers: info_arr = i.child.recv() # collect episode info, which is available if an episode finished if train: self.total_games += len(info_arr) for info in info_arr: tot_lines += info['lines'] tot_score += info['score'] tracker.add('reward', info['reward']) tracker.add('scorek', info['score'] * 1e-3) tracker.add('lines', info['lines']) tracker.add('length', info['length']) self.obs = obs_to_torch(self.obs_np, device) # reshape rewards & log rewards reward_max = self.rewards[:, :, 0].max() if train: tracker.add('maxk', reward_max / 1e-2) tracker.add('mil_games', self.total_games * 1e-6) tracker.add('perline', tot_score * 1e-3 / tot_lines) # calculate advantages advantages = self._calc_advantages(self.done, self.rewards, values) samples = { 'obs': obs, 'actions': actions, 'values': values, 'log_pis': log_pis, 'advantages': advantages } # samples are currently in [workers, time] table, flatten it for i in samples: samples[i] = samples[i].view(-1, *samples[i].shape[2:]) return samples
def step(self): rate = self.rate(tracker.get_global_step() / self.step_factor) for p in self.optimizer.param_groups: p['lr'] = rate self._rate = rate self.optimizer.step()
def main(): parser = argparse.ArgumentParser(description="PyTorch BERT Example") parser.add_argument( "--max_epochs", type=int, default=5, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "--batch_size", type=int, default=16, metavar="N", help="batch size (default: 16)", ) parser.add_argument( "--max_len", type=int, default=160, metavar="N", help="number of tokens per sample (rest is truncated) (default: 140)", ) parser.add_argument( "--num_samples", type=int, default=1_000, metavar="N", help="Number of samples to be used for training " "and evaluation steps (default: 15000) Maximum:100000", ) parser.add_argument( "--save_model", type=bool, default=True, help="For Saving the current Model", ) parser.add_argument( "--vocab_file", default= "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", help="Custom vocab file", ) parser.add_argument("--model_save_path", type=str, default="models", help="Path to save mlflow model") experiment.create(name='bert_news') args = parser.parse_args() experiment.configs(args.__dict__) # This is set as an environment variable, check the Makefile # mlflow.set_tracking_uri("http://localhost:5005") mlflow.start_run() mlflow.log_param("epochs", args.max_epochs) mlflow.log_param("samples", args.num_samples) with experiment.start(): trainer = NewsClassifierTrainer(epochs=args.max_epochs, n_samples=args.num_samples, vocab_file_url=args.vocab_file, is_save_model=args.save_model, model_path=args.model_save_path, batch_size=args.batch_size, max_len=args.max_len) model = Model() model = model.to(trainer.device) trainer.prepare_data() trainer.set_optimizer(model) trainer.start_training(model) with tracker.namespace('test'): test_acc, test_loss = trainer.train_epoch(model, trainer.test_data_loader, 'test') y_review_texts, y_pred, y_pred_probs, y_test = trainer.get_predictions( model, trainer.test_data_loader) inspect(y_review_texts) inspect(torch.stack((y_pred, y_test), dim=1)) mlflow.log_metric("test_acc", float(test_acc), step=tracker.get_global_step()) mlflow.log_metric("test_loss", float(test_loss), step=tracker.get_global_step()) mlflow.end_run()