def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): if type(value) == np.float32: value = float(value) dvclive.log(metric, value) dvclive.next_step()
def main(): """Train model and evaluate on test data.""" torch.manual_seed(0) model = ConvNet() # Load model. if os.path.exists("model.pt"): model.load_state_dict(torch.load("model.pt")) # Load train and test data. mnist_train = torchvision.datasets.MNIST("data", download=True) x_train, y_train = transform(mnist_train) mnist_test = torchvision.datasets.MNIST("data", download=True, train=False) x_test, y_test = transform(mnist_test) try: # Iterate over training epochs. for i in range(1, EPOCHS+1): # Train in batches. train_loader = torch.utils.data.DataLoader( dataset=list(zip(x_train, y_train)), batch_size=512, shuffle=True) for x_batch, y_batch in train_loader: train(model, x_batch, y_batch) torch.save(model.state_dict(), "model.pt") # Evaluate and checkpoint. metrics = evaluate(model, x_test, y_test) for metric, value in metrics.items(): dvclive.log(metric, value) dvclive.next_step() except KeyboardInterrupt: pass
def after_iteration(self, model, epoch, evals_log): for key, values in evals_log[self._metric_data].items(): if values: latest_metric = values[-1] dvclive.log(key, latest_metric) if self.model_file: model.save_model(self.model_file) dvclive.next_step()
def __call__(self, env): for eval_result in env.evaluation_result_list: metric = eval_result[1] value = eval_result[2] dvclive.log(metric, value) if self.model_file: env.model.save_model(self.model_file) dvclive.next_step()
def after_epoch(self): for key, value in zip(self.learn.recorder.metric_names, self.learn.recorder.log): key = key.replace("_", "/") dvclive.log(f"{key}", float(value), self.learn.epoch) if self.model_file: self.learn.save(self.model_file) dvclive.next_step()
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): dvclive.log(metric, value) if self.model_file: if self.save_weights_only: self.model.save_weights(self.model_file) else: self.model.save(self.model_file) dvclive.next_step()
def test_get_step_control_flow(tmp_dir): dvclive.init("logs") while dvclive.get_step() < 10: dvclive.log("i", dvclive.get_step()) dvclive.next_step() steps, values = read_history("logs", "i") assert steps == list(range(10)) assert values == [float(x) for x in range(10)]
def test_logging(tmp_dir, summary): dvclive.init("logs", summary=summary) dvclive.log("m1", 1) assert (tmp_dir / "logs").is_dir() assert (tmp_dir / "logs" / "m1.tsv").is_file() assert not (tmp_dir / "logs.json").is_file() dvclive.next_step() assert (tmp_dir / "logs.json").is_file() == summary
def test_html(tmp_dir, dvc_repo, html, signal_exists): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) dvclive.init("logs", html=html) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def test_html(tmp_dir, dvc_repo, html, signal_exists, monkeypatch): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) monkeypatch.setenv(env.DVCLIVE_PATH, "logs") monkeypatch.setenv(env.DVCLIVE_HTML, str(int(html))) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): float_value = value if type(value) == np.float32: float_value = float(value) dvclive.log(metric, float_value) mem = psutil.virtual_memory().used / 8 / 1024 / 1024 / 1024 dvclive.log('memory_use_GB', mem) mlflow.log_metric('memory_use_GB', mem) dvclive.next_step()
def test_get_step_resume(tmp_dir): dvclive.init("logs") for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert dvclive.get_step() == 2 dvclive.init("logs", resume=True) assert dvclive.get_step() == 2 dvclive.init("logs", resume=False) assert dvclive.get_step() == 0
def on_epoch_end(self, runner) -> None: step = runner.stage_epoch_step for loader_key, per_loader_metrics in runner.epoch_metrics.items(): for key, value in per_loader_metrics.items(): key = key.replace("/", "_") dvclive.log(f"{loader_key}/{key}", float(value), step) if self.model_file: checkpoint = runner.engine.pack_checkpoint( model=runner.model, criterion=runner.criterion, optimizer=runner.optimizer, scheduler=runner.scheduler, ) runner.engine.save_checkpoint(checkpoint, self.model_file) dvclive.next_step()
def test_nested_logging(tmp_dir): dvclive.init("logs", summary=True) dvclive.log("train/m1", 1) dvclive.log("val/val_1/m1", 1) assert (tmp_dir / "logs").is_dir() assert (tmp_dir / "logs" / "train").is_dir() assert (tmp_dir / "logs" / "val" / "val_1").is_dir() assert (tmp_dir / "logs" / "train" / "m1.tsv").is_file() assert (tmp_dir / "logs" / "val" / "val_1" / "m1.tsv").is_file() dvclive.next_step() _, summary = read_logs("logs") assert summary["train"]["m1"] == 1 assert summary["val"]["val_1"]["m1"] == 1
def test_continue(tmp_dir, resume, steps, metrics): dvclive.init("logs") for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert read_history("logs", "metric") == ([0, 1], [0.9, 0.8]) assert read_latest("logs", "metric") == (1, 0.8) dvclive.init("logs", resume=resume) for new_metric in [0.7, 0.6]: dvclive.log("metric", new_metric) dvclive.next_step() assert read_history("logs", "metric") == (steps, metrics) assert read_latest("logs", "metric") == (last(steps), last(metrics))
def test_cleanup(tmp_dir, summary, html): dvclive.init("logs", summary=summary) dvclive.log("m1", 1) dvclive.next_step() if html: (tmp_dir / "logs.html").touch() (tmp_dir / "logs" / "some_user_file.txt").touch() assert (tmp_dir / "logs" / "m1.tsv").is_file() assert (tmp_dir / "logs.json").is_file() == summary assert (tmp_dir / "logs.html").is_file() == html dvclive.init("logs") assert (tmp_dir / "logs" / "some_user_file.txt").is_file() assert not (tmp_dir / "logs" / "m1.tsv").is_file() assert not (tmp_dir / "logs.json").is_file() assert not (tmp_dir / "logs.html").is_file()
def main(): """Train model and evaluate on test data.""" torch.manual_seed(0) model = ConvNet() # Load model. if os.path.exists("model.pt"): model.load_state_dict(torch.load("model.pt")) # Load train and test data. mnist_train = torchvision.datasets.MNIST("data", download=True) x_train, y_train = transform(mnist_train) mnist_test = torchvision.datasets.MNIST("data", download=True, train=False) x_test, y_test = transform(mnist_test) # Iterate over training epochs. for i in range(1, EPOCHS+1): train(model, x_train, y_train) torch.save(model.state_dict(), "model.pt") # Evaluate and checkpoint. metrics = evaluate(model, x_test, y_test) for metric, value in metrics.items(): dvclive.log(metric, value) dvclive.next_step()
def dvclive_next_step(self): dvclive.next_step()
def test_initialization_error(tmp_dir): with pytest.raises(InitializationError): dvclive.next_step()
collate_fn=collator) valid_loader = DataLoader(valid_subset, batch_size=256, collate_fn=collator) model = SimpleConv(n_channels=1, n_classes=1) criterion = torch.nn.BCEWithLogitsLoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=lr) for n in range(nepochs): global_loss = 0.0 model.train() for batch in train_loader: optimizer.zero_grad() y = model.forward(batch['light_curve']) loss = criterion(y.squeeze(1), batch['label'].float()) loss.backward() optimizer.step() global_loss += loss.item() dvclive.log('train/loss', global_loss / len(train_subset)) global_loss = 0.0 model.eval() for batch in valid_loader: y = model.forward(batch['light_curve']) loss = criterion(y.squeeze(1), batch['label'].float()) global_loss += loss.item() dvclive.log('valid/loss', global_loss / len(valid_subset)) dvclive.next_step() torch.save(model, model_path)
def next_epoch(self): dvclive.next_step()
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): dvclive.log(metric, value) dvclive.next_step()
def on_epoch_end(self, trainer, pl_module): dvclive.next_step()