def test_epoch(self, epoch, test_loader, loss_fn, metrics, device, phase="val"): self.logger.debug("{}|{}|start".format(phase, epoch)) self.model.eval() with torch.no_grad(): if isinstance(metrics, list): for metric in metrics: metric.reset(device) else: for metric in metrics.values(): metric.reset(device) epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) epoch_loss = m.Mean(device) auc = AUCMeter() for i, (features, label) in enumerate(test_loader): features = features.to(device) call_label = None if "call" in label: call_label = label["call"].to(device, non_blocking=True, dtype=torch.int64) data_loading_time.update( torch.Tensor([(time.time() - start_data_loading)])) output = self.model(features) loss = loss_fn(output, call_label) epoch_loss.update(loss) prediction = None if call_label is not None: prediction = torch.argmax(output.data, dim=1) if isinstance(metrics, list): for metric in metrics: metric.update(call_label, prediction) else: for metric in metrics.values(): metric.update(call_label, prediction) score = nn.functional.softmax(output, dim=1)[:, 1] if auc is not None: auc.add(score, call_label) if i == 0: self.write_summaries( features=features, labels=call_label, prediction=prediction, file_names=label["file_name"], epoch=epoch, phase=phase, ) start_data_loading = time.time() self.write_scalar_summaries_logs( loss=epoch_loss.get(), metrics=metrics, epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase=phase, ) if call_label is not None and auc is not None: self.write_roc_curve_summary(*auc.value(), epoch, phase=phase) self.writer.flush() return epoch_loss.get()
def train_epoch(self, epoch, train_loader, loss_fn, optimizer, metrics, device): self.logger.debug("train|{}|start".format(epoch)) if isinstance(metrics, list): for metric in metrics: metric.reset(device) else: for metric in metrics.values(): metric.reset(device) self.model.train() epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) epoch_loss = m.Mean(device) for i, (features, label) in enumerate(train_loader): features = features.to(device) call_label = None if "call" in label: call_label = label["call"].to(device, non_blocking=True, dtype=torch.int64) data_loading_time.update( torch.Tensor([(time.time() - start_data_loading)])) optimizer.zero_grad() output = self.model(features) loss = loss_fn(output, call_label) loss.backward() optimizer.step() epoch_loss.update(loss) prediction = None if call_label is not None: prediction = torch.argmax(output.data, dim=1) if isinstance(metrics, list): for metric in metrics: metric.update(call_label, prediction) else: for metric in metrics.values(): metric.update(call_label, prediction) if i == 0: self.write_summaries( features=features, labels=call_label, prediction=prediction, file_names=label["file_name"], epoch=epoch, phase="train", ) start_data_loading = time.time() self.write_scalar_summaries_logs( loss=epoch_loss.get(), metrics=metrics, lr=optimizer.param_groups[0]["lr"], epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase="train", ) self.writer.flush() return epoch_loss.get()
def validate(self, epoch, val_dataloader, val_ds, loss_fn, device, phase="val"): self.logger.debug("{}|{}|start".format(phase, epoch)) self.model.eval() val_running_loss = 0.0 with torch.no_grad(): epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) for i, (val_specs, label) in enumerate(val_dataloader): val_specs = val_specs.to(device) if "call" in label: call_label = label["call"].to(device, non_blocking=True, dtype=torch.int64) # bool data_loading_time.update( torch.Tensor([(time.time() - start_data_loading)])) # instead of converting spec. to color img, we save the 1-chn outputs directly produced by the network if i % 2 == 0: #grid = make_grid(val_specs) self.writer.add_images("Original", val_specs, epoch) #val_specs outputs = self.model(val_specs) if i % 2 == 0: # tb = SummaryWriter() #grid = make_grid(outputs) self.writer.add_images("Reconstructed", outputs, epoch) #outputs loss = loss_fn(outputs, val_specs) val_running_loss += loss.item() * val_specs.size(0) prediction = None if i % 2 == 0: self.write_summaries( features=val_specs, # original #labels=call_label, #prediction=prediction, reconstructed=outputs, file_names=label["file_name"], epoch=epoch, phase=phase, ) start_data_loading = time.time() val_epoch_loss = val_running_loss / len(val_ds) self.write_scalar_summaries_logs( loss=val_epoch_loss, #metrics=metrics, epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase=phase, ) self.writer.flush() return val_epoch_loss
def train(self, epoch, train_dataloader, train_ds, loss_fn, optimizer, device): """ Training of one epoch on training data, loss function, optimizer, and respective metrics """ self.logger.debug("train|{}|start".format(epoch)) self.model.train() epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) train_running_loss = 0.0 for i, (train_specs, label) in enumerate(train_dataloader): train_specs = train_specs.to(device) call_label = None if "call" in label: call_label = label["call"].to( device, non_blocking=True, dtype=torch.int64 ) # e.g. tensor([True, True, True, True, True, True]) if "ground_truth" in label: ground_truth = label["ground_truth"].to(device, non_blocking=True) data_loading_time.update( torch.Tensor([(time.time() - start_data_loading)])) optimizer.zero_grad() # compute reconstructions outputs = self.model(train_specs) # compute training reconstruction loss, when augmentation is used # loss = loss_fn(outputs, ground_truth) # compute training reconstruction loss, when no augmentation is used loss = loss_fn(outputs, train_specs) # compute accumulated gradients loss.backward() # perform parameter update based on current gradients optimizer.step() # add the mini-batch training loss to epoch loss # the value of total cost averaged across all training examples of the current batch # loss.item()*data.size(0): total loss of the current batch (not averaged). train_running_loss += loss.item() * train_specs.size(0) prediction = None #print("label is ", label, "call_label is ", call_label) if i % 2 == 0: self.write_summaries( features=train_specs, #labels=call_label, #prediction=prediction, reconstructed=outputs, file_names=label["file_name"], epoch=epoch, phase="train", ) start_data_loading = time.time() # compute the epoch training loss train_epoch_loss = train_running_loss / len(train_ds) self.write_scalar_summaries_logs( loss=train_epoch_loss, #metrics=metrics, lr=optimizer.param_groups[0]["lr"], epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase="train", ) self.writer.flush() return train_epoch_loss
def test_epoch(self, epoch, test_loader, loss_fn, device, phase="val"): self.logger.debug("{}|{}|start".format(phase, epoch)) self.model.eval() with torch.no_grad(): epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) epoch_loss = m.Mean(device) for i, (features, label) in enumerate(test_loader): features = features.to(device) ground_truth = label["ground_truth"].to(device, non_blocking=True) data_loading_time.update(torch.Tensor([(time.time() - start_data_loading)])) denoised_output = self.model(features) loss = loss_fn(denoised_output, ground_truth) epoch_loss.update(loss) if i % 5 == 0: self.writer.add_image( tag=phase + "/ground_truth", img_tensor=prepare_img( ground_truth.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) self.writer.add_image( tag=phase + "/input", img_tensor=prepare_img( features.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) self.writer.add_image( tag=phase + "/masks_pred", img_tensor=prepare_img( denoised_output.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) start_data_loading = time.time() self.write_scalar_summaries_logs( loss=epoch_loss.get(), epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase=phase, ) self.writer.flush() return epoch_loss.get()
def train_epoch(self, epoch, train_loader, loss_fn, optimizer, device): self.logger.debug("train|{}|start".format(epoch)) self.model.train() epoch_start = time.time() start_data_loading = epoch_start data_loading_time = m.Sum(torch.device("cpu")) epoch_loss = m.Mean(device) for i, (features, label) in enumerate(train_loader): features = features.to(device) ground_truth = label["ground_truth"].to(device, non_blocking=True) data_loading_time.update(torch.Tensor([(time.time() - start_data_loading)])) optimizer.zero_grad() denoised_output = self.model(features) loss = loss_fn(denoised_output, ground_truth) loss.backward() optimizer.step() epoch_loss.update(loss) start_data_loading = time.time() if i % 5 == 0: self.writer.add_image( tag="train" + "/ground_truth", img_tensor=prepare_img( ground_truth.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) self.writer.add_image( tag="train" + "/input", img_tensor=prepare_img( features.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) self.writer.add_image( tag="train" + "/masks_pred", img_tensor=prepare_img( denoised_output.transpose(0, 1).squeeze(dim=0), num_images=self.n_summaries, file_names=label["file_name"], ), global_step=epoch, ) self.write_scalar_summaries_logs( loss=epoch_loss.get(), lr=optimizer.param_groups[0]["lr"], epoch_time=time.time() - epoch_start, data_loading_time=data_loading_time.get(), epoch=epoch, phase="train", ) self.writer.flush() return epoch_loss.get()