def loop_through_data_for_eval(self,
                                   dataset: DataLoader,
                                   model: Model,
                                   graph: Graph) -> Result:
        graph.to(self.device)
        model.to(device=self.device)
        model.eval()

        result = Result(entity_dict=self.entity_id_to_str_dict, relation_dict=self.relation_id_to_str_dict)

        for idx, (paths, mask, _, triplet, num_paths) in enumerate(tqdm(dataset)):
            labels = triplet[:, 1]
            assert len(triplet) == len(labels)

            if num_paths.size() == torch.Size([1, 1]) and num_paths.item() == 0:
                score = torch.randn(1, self.num_relations)
            else:
                paths = paths.to(device=self.device)
                mask = mask.to(device=self.device)
                triplet = triplet.to(device=self.device)

                score = model(triplet, graph, paths=paths, masks=mask, num_paths=num_paths)

            result.append(score.cpu(), labels.cpu())

        return result
Esempio n. 2
0
    def loop_through_data_for_eval(
            self,
            dataset: np.ndarray,  # assuming batch_size * 3
            model: Model,
            graph: Graph,
            batch_size: int) -> Result:
        graph.to(self.device)
        model.to(device=self.device)
        model.eval()

        result = Result()

        num_batches = ceil(batch_size / len(dataset))
        for batch_idx in range(num_batches):
            start_idx, end_idx = batch_idx * batch_size, batch_idx * batch_size + batch_size
            batch = torch.from_numpy(
                dataset[start_idx:end_idx]).long().to(device=self.device)

            if self.config.link_predict:
                labels = batch[:, 1]
            else:
                labels = batch[:,
                               2]  # the objects in <subject, relation, object>

            scores = model(batch, graph, link_predict=self.config.link_predict)
            result.append(scores.cpu(), labels.cpu())

        return result
Esempio n. 3
0
    def pretty_print_results(self,
                             result: Result,
                             step: str,
                             name: str,
                             epoch: int = 0) -> None:
        """Prints the Result object in a human-friendly format as well as logging the results to the session tensorboard
        instance.

        Args:
            result: The lib.utils.result.Result object.
            step: e.g. "train", "validation", or "test"
            name: e.g. "1st fold", "
            epoch: The nth training epoch
        """
        overall_accuracy_pct = {"accuracy": result.calculate_accuracy_pct()}
        overall_accuracy_num = result.calculate_accuracy_num()
        class_accuracy_pct = result.calculate_accuracy_by_class_pct()
        class_accuracy_num = result.calculate_accuracy_by_class_num()
        loss = result.calculate_mean_loss()

        self.logger.info(f"Epoch {epoch + 1} {name} results:"
                         f"\n\t mean loss: {loss}"
                         f"\n\t overall accuracy pct: {overall_accuracy_pct}"
                         f"\n\t overall accuracy: {overall_accuracy_num}"
                         f"\n\t class accuracy pct: {class_accuracy_pct}"
                         f"\n\t class accuracy: {class_accuracy_num}")

        # space is illegal in Tensorboard
        name = "_".join(name.split(" "))

        if loss is not None:
            self.tensorboard.add_scalar(f"{step}/{name}/loss", loss, epoch)

        self.tensorboard.add_scalars(f"{step}/{name}/pct_accuracy", {
            **overall_accuracy_pct,
            **class_accuracy_pct
        }, epoch)

        self.tensorboard.add_scalars(f"{step}/{name}/num_accuracy", {
            **overall_accuracy_num,
            **class_accuracy_num
        }, epoch)
Esempio n. 4
0
    def pretty_print_results(self,
                             result: Result,
                             split: str,
                             epoch: int = 0) -> None:
        mrr = result.calculate_mrr().item()
        top_1 = result.calculate_top_hits(hit=1).detach().item()
        top_3 = result.calculate_top_hits(hit=3).detach().item()
        top_10 = result.calculate_top_hits(hit=10).detach().item()

        self.logger.info(f"Epoch {epoch} {split} results:"
                         f"\n\t MRR: {round(mrr, 6)}"
                         f"\n\t TOP 1 HIT: {round(top_1, 6)}"
                         f"\n\t TOP 3 HIT: {round(top_3, 6)}"
                         f"\n\t TOP 10 HIT: {round(top_10, 6)}")

        self.tensorboard.add_scalar(f"{split}/mrr", mrr, epoch)
        self.tensorboard.add_scalars(f"{split}/top_hits", {
            "Top 1": top_1,
            "Top 3": top_3,
            "Top 10": top_10
        }, epoch)
    def train(self, num_epochs: int, ith_fold: int, train_mapping: Mapping,
              valid_mapping: Mapping) -> None:

        lowest_validation_loss = float("inf")

        for num_epoch in range(num_epochs):
            # ==========================================================================================================
            # Training
            # ==========================================================================================================
            self.logger.info(f"Starting training for epoch {num_epoch + 1}.")
            self.model.train()
            train_args = self.get_training_args(mapping=train_mapping)
            train_loop = super().loop_through_data_for_training(**train_args)
            train_result = Result(label_encoder=self.label_encoder)

            for iter_idx, (_, labels, loss, scores) in enumerate(train_loop):
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = 0.01 * (
                        1 - float(num_epochs * train_mapping.__len__() / 10 +
                                  iter_idx) / 1000)**0.005
                train_result.append_loss(loss)
                train_result.append_scores(scores, labels)

            self.logger.info(
                f"Finished training {num_epoch + 1} for {ith_fold + 1}th fold."
            )
            self.pretty_print_results(train_result, "train",
                                      f"{ith_fold + 1}th fold", num_epoch)

            # ==========================================================================================================
            # Validation
            # ==========================================================================================================
            self.logger.info(f"Starting validation for epoch {num_epoch + 1}.")
            self.model.eval()
            valid_args = self.get_testing_args(mapping=valid_mapping)
            valid_result = super().loop_through_data_for_testing(**valid_args)

            self.logger.info(
                f"Finished validation {num_epoch + 1} for {ith_fold + 1}th fold."
            )
            self.pretty_print_results(valid_result, "validate",
                                      f"{ith_fold + 1}th fold", num_epoch)

            validation_loss = valid_result.calculate_mean_loss()

            if lowest_validation_loss > validation_loss and self.config.save_best_model:
                self.logger.info(
                    f"{lowest_validation_loss} > {validation_loss}, saving model..."
                )
                self.save_current_model(file_name="lowest_loss.pt")

            if num_epochs * train_mapping.__len__() / 10 > 1000:
                return valid_result
Esempio n. 6
0
    def loop_through_data_for_testing(self, model: Union[Model,
                                                         nn.DataParallel],
                                      mapping: Mapping,
                                      **loader_params) -> Result:
        """A function that loops through the provided data and returns a lib.utils.result.Result object, which can then
        be used to calculate scores.

        Args:
            model: The model to make predictions.
            mapping: A lib.util.mapping.Mapping object.
            **loader_params: Any key-value arguments applicable to the torch.utils.data.DataLoader class, see a list
                here https://pytorch.org/docs/stable/data.html

        Returns: A lib.utils.result.Result object.
        """
        loader = Dataset.build_loader(
            mapping,
            label_encoder=self.label_encoder,
            image_transforms=self.provide_image_transforms(),
            **loader_params)

        result = Result(label_encoder=self.label_encoder)

        for iter_idx, (images, labels) in enumerate(loader):
            model.eval()
            model.to(device=self.device)

            images = images.float().to(device=self.device)
            labels = labels.squeeze().long().to(device=self.device)

            if type(model) == torch.nn.DataParallel:
                loss, scores = model.module.classification_loss(images, labels)
            else:
                loss, scores = model.classification_loss(images, labels)

            result.append_scores(scores, labels)
            result.append_loss(loss)

        return result