def train(self) -> None: logger.info("===== Model =====") logger.info(self.model) print_model_parameters(self.model) logger.info("Starting training...") self.trainer.fit(self.model, self.data_module)
def train(self) -> None: logger.info("===== Model =====") logger.info(self.model) print_model_parameters(self.model) logger.info("Starting training...") self.trainer.fit(self.model, self.data_module) # TODO: Look for a better way to hook this self.data_module.teardown()
def train(self): self.writer.write("===== Model =====") self.writer.write(self.model) print_model_parameters(self.model) if "train" not in self.run_type: self.inference() return should_break = False if self.max_epochs is None: self.max_epochs = math.inf else: self.max_updates = math.inf self.model.train() self.train_timer = Timer() self.snapshot_timer = Timer() self.profile("Setup Time") torch.autograd.set_detect_anomaly(True) self.writer.write("Starting training...") while self.num_updates < self.max_updates and not should_break: self.current_epoch += 1 registry.register("current_epoch", self.current_epoch) # Seed the sampler in case if it is distributed self.dataset_loader.seed_sampler("train", self.current_epoch) if self.current_epoch > self.max_epochs: break for batch in self.train_loader: self.profile("Batch load time") self.current_iteration += 1 self.writer.write(self.num_updates + 1, "debug") report = self._forward_pass(batch) loss = self._extract_loss(report) self._backward(loss) should_break = self._logistics(report) if self.num_updates > self.max_updates: should_break = True if should_break: break # In distributed, each worker will complete one epoch when we reach this # as each worker is an individual instance self.current_epoch += get_world_size() - 1 self.finalize()
def train(self): logger.info("===== Model =====") logger.info(self.model) print_model_parameters(self.model) if "train" in self.run_type: self.on_train_start() self.training_loop() self.on_train_end() self.inference() self.finalize()
def train(self): self.writer.write("===== Model =====") self.writer.write(self.model) print_model_parameters(self.model) if "train" not in self.run_type: self.inference() return self.on_train_start() self.training_loop() self.on_train_end() self.inference()
def train(self) -> None: logger.info("===== Model =====") logger.info(self.model) print_model_parameters(self.model) logger.info("Starting training...") if "train" not in self.run_type: self.inference() return self.trainer.fit(self.model, self.data_module) # TODO: Look for a better way to hook this self.data_module.teardown()
def train(self): logger.info("===== Model =====") logger.info(self.model) print_model_parameters(self.model) if "train" not in self.run_type: self.inference() return self.on_train_start() self.training_loop() self.on_train_end() self.inference() self.dataset_loader.teardown()