def _valid_epoch(self, epoch): """ Validate after training an epoch :return: A log that contains information about validation Note: The validation metrics in log must have the key 'val_metrics'. """ self.model.eval() total_val_loss = 0 total_val_metrics = np.zeros(len(self.metrics)) all_t = [] all_o = [] with torch.no_grad(): for batch_idx, (data, target) in enumerate(self.valid_data_loader): target = target.to(self.device) if len(target.shape) == 0: target = target.unsqueeze(dim=0) output = None if self.config["loss"] == "bce_loss": output, _ = self.model(data, device=self.device) elif self.config["loss"] == "bce_loss_with_logits": _, output = self.model(data, device=self.device) loss = self.loss( output, target.reshape(-1, ), ) all_o.append(output.detach().cpu().numpy()) all_t.append(target.detach().cpu().numpy()) self.writer.set_step( (epoch - 1) * len(self.valid_data_loader) + batch_idx, "valid") self.writer.add_scalar("loss", loss.item()) total_val_loss += loss.item() total_val_metrics += self._eval_metrics(output, target) del output del target total_val_metrics = (total_val_metrics / len(self.valid_data_loader)).tolist() if self.prauc_flag: all_o = np.hstack(all_o) all_t = np.hstack(all_t) total_val_metrics[-2] = pr_auc_1(all_o, all_t) total_val_metrics[-1] = roc_auc_1(all_o, all_t) return { "val_loss": total_val_loss / len(self.valid_data_loader), "val_metrics": total_val_metrics, }
def _valid_epoch(self, epoch): """ Validate after training an epoch :return: A log that contains information about validation Note: The validation metrics in log must have the key 'val_metrics'. """ self.model.eval() total_val_loss = 0 total_val_metrics = np.zeros(len(self.metrics)) all_t = [] all_o = [] with torch.no_grad(): for batch_idx, (data, target) in enumerate(self.valid_data_loader): all_t.append(target.numpy()) target = target.to(self.device) output, logits = self.model(data, self.device) loss = self.loss(output, target.reshape(-1,)) all_o.append(output.detach().cpu().numpy()) self.writer.set_step((epoch - 1) * len(self.valid_data_loader) + batch_idx, 'valid') self.writer.add_scalar('loss', loss.item()) total_val_loss += loss.item() total_val_metrics += self._eval_metrics(output, target) #self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True)) total_val_metrics = (total_val_metrics / len(self.valid_data_loader)).tolist() if self.prauc_flag: all_o = np.hstack(all_o) all_t = np.hstack(all_t) total_val_metrics[-2] = pr_auc_1(all_o, all_t) total_val_metrics[-1] = roc_auc_1(all_o, all_t) return { 'val_loss': total_val_loss / len(self.valid_data_loader), 'val_metrics': total_val_metrics }
def main(test_config): # load model architecture model_path = test_config["trained_model_path"] model_config = torch.load(model_path)["config"] model = import_module("model", model_config)(**model_config["model"]["args"]) model.summary() # setup data_loader instances data_loader = get_instance(module_data, "data_loader", test_config) weight = data_loader.dataset.get_pos_weight() print(weight) # get function handles of loss and metrics loss_fn = getattr(module_loss, model_config["loss"]) metric_fns = [getattr(module_metric, met) for met in model_config["metrics"]] # load state dict checkpoint = torch.load(model_path) state_dict = checkpoint["state_dict"] if model_config["n_gpu"] > 1: model = torch.nn.DaaParallel(model) model.load_state_dict(state_dict) # prepare model for testing device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) model.eval() total_loss = 0.0 total_metrics = np.zeros(len(metric_fns)) predictions = {"output": [], "target": []} with torch.no_grad(): for data, target in data_loader: target = target.to(device) if len(target.shape) == 0: target = target.unsqueeze(dim=0) output = model(data, device) if model_config["loss"] == "bce_loss": output, _ = model(data, device=device) elif model_config["loss"] == "bce_loss_with_logits": _ ,output= model(data, device=device) predictions["output"].append(output.cpu().numpy()) predictions["target"].append(target.cpu().numpy()) # computing loss, metrics on test set loss = loss_fn(output, target) batch_size = target.shape[0] total_loss += loss.item() * batch_size for i, metric in enumerate(metric_fns): total_metrics[i] += metric(output, target) * batch_size del output del target n_samples = len(data_loader.sampler) log = {"loss": total_loss / n_samples} log.update( { met.__name__: total_metrics[i].item() / n_samples for i, met in enumerate(metric_fns) } ) predictions["output"] = np.hstack(predictions["output"]) predictions["target"] = np.hstack(predictions["target"]) print(len(data_loader.dataset), n_samples, predictions["output"].shape, predictions["target"].shape) total_metrics[-2] = pr_auc_1(predictions["output"], predictions["target"]) total_metrics[-1] = roc_auc_1(predictions["output"], predictions["target"]) log.update({metric_fns[-2].__name__: total_metrics[-2]}) log.update({metric_fns[-1].__name__: total_metrics[-1]}) print(log) save_dir = os.path.join(os.path.abspath(os.path.join(model_path, ".."))) with open(os.path.join(save_dir, "predictions.pkl"), "wb") as handle: pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL) with open(os.path.join(save_dir, "test-results.pkl"), "wb") as handle: pickle.dump(log, handle, protocol=pickle.HIGHEST_PROTOCOL)
def _train_epoch(self, epoch): """ Training logic for an epoch :param epoch: Current training epoch. :return: A log that contains all information you want to save. Note: If you have additional information to record, for example: > additional_log = {"x": x, "y": y} merge it with log before return. i.e. > log = {**log, **additional_log} > return log The metrics in log must have the key 'metrics'. """ self.model.train() total_loss = 0 total_metrics = np.zeros(len(self.metrics)) all_t = [] all_o = [] for batch_idx, (data, target) in enumerate(self.data_loader): all_t.append(target.numpy()) target = target.to(self.device) self.optimizer.zero_grad() output, logits = self.model(data, device=self.device) loss = self.loss(output, target) loss.backward() self.optimizer.step() self.writer.set_step((epoch - 1) * len(self.data_loader) + batch_idx) self.writer.add_scalar('loss', loss.item()) total_loss += loss total_metrics += self._eval_metrics(output, target) all_o.append(output.detach().cpu().numpy()) if self.verbosity >= 2 and batch_idx % self.log_step == 0: self.logger.info('Train Epoch: {} [{}/{} ({:.0f}%)] {}: {:.6f}'.format( epoch, batch_idx * self.data_loader.batch_size, self.data_loader.n_samples, 100.0 * batch_idx / len(self.data_loader), 'loss', loss)) #self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True)) total_metrics = total_metrics / len(self.data_loader) if (self.prauc_flag): all_o = np.hstack(all_o) all_t = np.hstack(all_t) total_metrics[-2] = pr_auc_1(all_o, all_t) total_metrics[-1] = roc_auc_1(all_o, all_t) log = { 'loss': total_loss / len(self.data_loader), 'metrics': total_metrics, } if self.do_validation: val_log = self._valid_epoch(epoch) log = {**log, **val_log} if self.lr_scheduler is not None: self.lr_scheduler.step() return log
def _train_epoch(self, epoch): """ Training logic for an epoch :param epoch: Current training epoch. :return: A log that contains all information you want to save. Note: If you have additional information to record, for example: > additional_log = {"x": x, "y": y} merge it with log before return. i.e. > log = {**log, **additional_log} > return log The metrics in log must have the key 'metrics'. """ self.model.train() total_loss = 0 total_metrics = np.zeros(len(self.metrics)) all_t = [] all_o = [] for batch_idx, (data, target) in enumerate(self.data_loader): target = target.to(self.device) if len(target.shape) == 0: target = target.unsqueeze(dim=0) output = None self.optimizer.zero_grad() if self.config["loss"] == "bce_loss": output, _ = self.model(data, device=self.device) elif self.config["loss"] == "bce_loss_with_logits": _, output = self.model(data, device=self.device) loss = self.loss(output, target) loss.backward() self.optimizer.step() self.writer.set_step((epoch - 1) * len(self.data_loader) + batch_idx) self.writer.add_scalar("loss", loss.item()) total_loss += loss total_metrics += self._eval_metrics(output, target) all_t.append(target.detach().cpu().numpy()) all_o.append(output.detach().cpu().numpy()) del target del output if self.verbosity >= 2 and batch_idx % self.log_step == 0: self.logger.info( "Train Epoch: {} [{}/{} ({:.0f}%)] {}: {:.6f}".format( epoch, batch_idx * self.data_loader.batch_size, self.data_loader.n_samples, 100.0 * batch_idx / len(self.data_loader), "loss", loss, )) total_metrics = total_metrics / len(self.data_loader) if self.prauc_flag: all_o = np.hstack(all_o) all_t = np.hstack(all_t) total_metrics[-2] = pr_auc_1(all_o, all_t) total_metrics[-1] = roc_auc_1(all_o, all_t) log = { "loss": total_loss / len(self.data_loader), "metrics": total_metrics, } if self.do_validation: val_log = self._valid_epoch(epoch) log = {**log, **val_log} if self.lr_scheduler is not None: self.lr_scheduler.step() return log