def loop_through_data_for_eval(self, dataset: DataLoader, model: Model, graph: Graph) -> Result: graph.to(self.device) model.to(device=self.device) model.eval() result = Result(entity_dict=self.entity_id_to_str_dict, relation_dict=self.relation_id_to_str_dict) for idx, (paths, mask, _, triplet, num_paths) in enumerate(tqdm(dataset)): labels = triplet[:, 1] assert len(triplet) == len(labels) if num_paths.size() == torch.Size([1, 1]) and num_paths.item() == 0: score = torch.randn(1, self.num_relations) else: paths = paths.to(device=self.device) mask = mask.to(device=self.device) triplet = triplet.to(device=self.device) score = model(triplet, graph, paths=paths, masks=mask, num_paths=num_paths) result.append(score.cpu(), labels.cpu()) return result
def loop_through_data_for_eval( self, dataset: np.ndarray, # assuming batch_size * 3 model: Model, graph: Graph, batch_size: int) -> Result: graph.to(self.device) model.to(device=self.device) model.eval() result = Result() num_batches = ceil(batch_size / len(dataset)) for batch_idx in range(num_batches): start_idx, end_idx = batch_idx * batch_size, batch_idx * batch_size + batch_size batch = torch.from_numpy( dataset[start_idx:end_idx]).long().to(device=self.device) if self.config.link_predict: labels = batch[:, 1] else: labels = batch[:, 2] # the objects in <subject, relation, object> scores = model(batch, graph, link_predict=self.config.link_predict) result.append(scores.cpu(), labels.cpu()) return result
def pretty_print_results(self, result: Result, step: str, name: str, epoch: int = 0) -> None: """Prints the Result object in a human-friendly format as well as logging the results to the session tensorboard instance. Args: result: The lib.utils.result.Result object. step: e.g. "train", "validation", or "test" name: e.g. "1st fold", " epoch: The nth training epoch """ overall_accuracy_pct = {"accuracy": result.calculate_accuracy_pct()} overall_accuracy_num = result.calculate_accuracy_num() class_accuracy_pct = result.calculate_accuracy_by_class_pct() class_accuracy_num = result.calculate_accuracy_by_class_num() loss = result.calculate_mean_loss() self.logger.info(f"Epoch {epoch + 1} {name} results:" f"\n\t mean loss: {loss}" f"\n\t overall accuracy pct: {overall_accuracy_pct}" f"\n\t overall accuracy: {overall_accuracy_num}" f"\n\t class accuracy pct: {class_accuracy_pct}" f"\n\t class accuracy: {class_accuracy_num}") # space is illegal in Tensorboard name = "_".join(name.split(" ")) if loss is not None: self.tensorboard.add_scalar(f"{step}/{name}/loss", loss, epoch) self.tensorboard.add_scalars(f"{step}/{name}/pct_accuracy", { **overall_accuracy_pct, **class_accuracy_pct }, epoch) self.tensorboard.add_scalars(f"{step}/{name}/num_accuracy", { **overall_accuracy_num, **class_accuracy_num }, epoch)
def pretty_print_results(self, result: Result, split: str, epoch: int = 0) -> None: mrr = result.calculate_mrr().item() top_1 = result.calculate_top_hits(hit=1).detach().item() top_3 = result.calculate_top_hits(hit=3).detach().item() top_10 = result.calculate_top_hits(hit=10).detach().item() self.logger.info(f"Epoch {epoch} {split} results:" f"\n\t MRR: {round(mrr, 6)}" f"\n\t TOP 1 HIT: {round(top_1, 6)}" f"\n\t TOP 3 HIT: {round(top_3, 6)}" f"\n\t TOP 10 HIT: {round(top_10, 6)}") self.tensorboard.add_scalar(f"{split}/mrr", mrr, epoch) self.tensorboard.add_scalars(f"{split}/top_hits", { "Top 1": top_1, "Top 3": top_3, "Top 10": top_10 }, epoch)
def train(self, num_epochs: int, ith_fold: int, train_mapping: Mapping, valid_mapping: Mapping) -> None: lowest_validation_loss = float("inf") for num_epoch in range(num_epochs): # ========================================================================================================== # Training # ========================================================================================================== self.logger.info(f"Starting training for epoch {num_epoch + 1}.") self.model.train() train_args = self.get_training_args(mapping=train_mapping) train_loop = super().loop_through_data_for_training(**train_args) train_result = Result(label_encoder=self.label_encoder) for iter_idx, (_, labels, loss, scores) in enumerate(train_loop): for param_group in self.optimizer.param_groups: param_group['lr'] = 0.01 * ( 1 - float(num_epochs * train_mapping.__len__() / 10 + iter_idx) / 1000)**0.005 train_result.append_loss(loss) train_result.append_scores(scores, labels) self.logger.info( f"Finished training {num_epoch + 1} for {ith_fold + 1}th fold." ) self.pretty_print_results(train_result, "train", f"{ith_fold + 1}th fold", num_epoch) # ========================================================================================================== # Validation # ========================================================================================================== self.logger.info(f"Starting validation for epoch {num_epoch + 1}.") self.model.eval() valid_args = self.get_testing_args(mapping=valid_mapping) valid_result = super().loop_through_data_for_testing(**valid_args) self.logger.info( f"Finished validation {num_epoch + 1} for {ith_fold + 1}th fold." ) self.pretty_print_results(valid_result, "validate", f"{ith_fold + 1}th fold", num_epoch) validation_loss = valid_result.calculate_mean_loss() if lowest_validation_loss > validation_loss and self.config.save_best_model: self.logger.info( f"{lowest_validation_loss} > {validation_loss}, saving model..." ) self.save_current_model(file_name="lowest_loss.pt") if num_epochs * train_mapping.__len__() / 10 > 1000: return valid_result
def loop_through_data_for_testing(self, model: Union[Model, nn.DataParallel], mapping: Mapping, **loader_params) -> Result: """A function that loops through the provided data and returns a lib.utils.result.Result object, which can then be used to calculate scores. Args: model: The model to make predictions. mapping: A lib.util.mapping.Mapping object. **loader_params: Any key-value arguments applicable to the torch.utils.data.DataLoader class, see a list here https://pytorch.org/docs/stable/data.html Returns: A lib.utils.result.Result object. """ loader = Dataset.build_loader( mapping, label_encoder=self.label_encoder, image_transforms=self.provide_image_transforms(), **loader_params) result = Result(label_encoder=self.label_encoder) for iter_idx, (images, labels) in enumerate(loader): model.eval() model.to(device=self.device) images = images.float().to(device=self.device) labels = labels.squeeze().long().to(device=self.device) if type(model) == torch.nn.DataParallel: loss, scores = model.module.classification_loss(images, labels) else: loss, scores = model.classification_loss(images, labels) result.append_scores(scores, labels) result.append_loss(loss) return result