def setUp(self) -> None:

        n_ary = 6
        n_digits = 4
        tau = 4.0

        self._n_ary = n_ary
        self._n_digits = n_digits

        vec_p_x_zero = np.array([0.02,0.05,0.8,0.8])
        vec_x_repr = np.array([1,2,0,0])
        vec_p_y_zero = np.array([0.02,0.05,0.1,0.6])
        vec_y_repr = np.array([1,2,3,0])
        mat_p_x_1 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_1 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        vec_p_x_zero = np.array([0.02,0.05,0.05,0.8])
        vec_x_repr = np.array([1,2,3,0])
        vec_p_y_zero = np.array([0.02,0.05,0.6,0.6])
        vec_y_repr = np.array([1,2,0,0])
        mat_p_x_2 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_2 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        vec_p_x_zero = np.array([0.02,0.02,0.02,0.02])
        vec_x_repr = np.array([2,2,3,3])
        vec_p_y_zero = np.array([0.02,0.02,0.02,0.02])
        vec_y_repr = np.array([1,1,2,2])
        mat_p_x_3 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_3 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        # x:hypernym, y:hyponym
        # mat_p_*: (n_dim, n_ary)
        self._mat_p_x = mat_p_x_1
        self._mat_p_y = mat_p_y_1
        # arry_p_*: (n_batch, n_dim, n_ary)
        self._arry_p_x = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3])
        self._arry_p_y = np.stack([mat_p_y_1, mat_p_y_2, mat_p_y_3])
        self._arry_p_batch = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3, mat_p_y_1, mat_p_y_2, mat_p_y_3])
        # train_signal: (hypernym_index, hyponym_index, hyponymy_score)
        self._hyponymy_tuples = [(0, 3, 1.0), (1, 4, -1.0), (2, 5, -4.0)] # [(x1, y1, 1.0), (x2, y2, -1.0), (x3, y3, -4.0)]

        self._t_mat_p_x = torch.from_numpy(self._mat_p_x)
        self._t_mat_p_y = torch.from_numpy(self._mat_p_y)
        self._t_arry_p_x = torch.from_numpy(self._arry_p_x)
        self._t_arry_p_y = torch.from_numpy(self._arry_p_y)
        self._t_arry_p_batch = torch.from_numpy(self._arry_p_batch)

        self._normalize_code_length = False
        self._normalize_coefficient_for_ground_truth = None
        self._loss_layer = HyponymyScoreLoss(normalize_hyponymy_score=self._normalize_code_length,
                                             normalize_coefficient_for_ground_truth=self._normalize_coefficient_for_ground_truth,
                                             distance_metric="mse")
    def __init__(self, threshold: Optional[float] = 0.5):
        """
        predicts hyponymy relation based on the entailment probability of entity pair.

        @param threshold: threshold of the entailment probability. larget than specified value is regarded as hyponymy relation.
        """
        self._auxiliary = HyponymyScoreLoss()
        self._threshold = threshold
    def __init__(
        self,
        model: MaskedAutoEncoder,
        loss_reconst: _Loss,
        loss_mutual_info: Optional[_Loss] = None,
        dataloader_train: Optional[DataLoader] = None,
        dataloader_val: Optional[DataLoader] = None,
        dataloader_test: Optional[DataLoader] = None,
        learning_rate: Optional[float] = 0.001,
        model_parameter_schedulers: Optional[Dict[str,
                                                  Callable[[float],
                                                           float]]] = None,
        loss_parameter_schedulers: Optional[Dict[str, Dict[str, Callable[
            [float], float]]]] = None,
    ):

        super(UnsupervisedTrainer, self).__init__()

        self._scale_loss_reconst = loss_reconst.scale
        self._scale_loss_mi = loss_mutual_info.scale if loss_mutual_info is not None else 1.

        self._model = model
        self._encoder = model._encoder
        self._decoder = model._decoder
        self._loss_reconst = loss_reconst
        self._loss_mutual_info = loss_mutual_info
        self._learning_rate = learning_rate
        self._dataloaders = {
            "train": dataloader_train,
            "val": dataloader_val,
            "test": dataloader_test
        }
        # auxiliary function that is solely used for validation
        self._auxiliary = HyponymyScoreLoss()

        # set model parameter scheduler
        if model_parameter_schedulers is None:
            self._model_parameter_schedulers = {}
        else:
            self._model_parameter_schedulers = model_parameter_schedulers

        if loss_parameter_schedulers is None:
            self._loss_parameter_schedulers = {}
        else:
            self._loss_parameter_schedulers = loss_parameter_schedulers
class UnsupervisedTrainer(pl.LightningModule):
    def __init__(
        self,
        model: MaskedAutoEncoder,
        loss_reconst: _Loss,
        loss_mutual_info: Optional[_Loss] = None,
        dataloader_train: Optional[DataLoader] = None,
        dataloader_val: Optional[DataLoader] = None,
        dataloader_test: Optional[DataLoader] = None,
        learning_rate: Optional[float] = 0.001,
        model_parameter_schedulers: Optional[Dict[str,
                                                  Callable[[float],
                                                           float]]] = None,
        loss_parameter_schedulers: Optional[Dict[str, Dict[str, Callable[
            [float], float]]]] = None,
    ):

        super(UnsupervisedTrainer, self).__init__()

        self._scale_loss_reconst = loss_reconst.scale
        self._scale_loss_mi = loss_mutual_info.scale if loss_mutual_info is not None else 1.

        self._model = model
        self._encoder = model._encoder
        self._decoder = model._decoder
        self._loss_reconst = loss_reconst
        self._loss_mutual_info = loss_mutual_info
        self._learning_rate = learning_rate
        self._dataloaders = {
            "train": dataloader_train,
            "val": dataloader_val,
            "test": dataloader_test
        }
        # auxiliary function that is solely used for validation
        self._auxiliary = HyponymyScoreLoss()

        # set model parameter scheduler
        if model_parameter_schedulers is None:
            self._model_parameter_schedulers = {}
        else:
            self._model_parameter_schedulers = model_parameter_schedulers

        if loss_parameter_schedulers is None:
            self._loss_parameter_schedulers = {}
        else:
            self._loss_parameter_schedulers = loss_parameter_schedulers

    def _numpy_to_tensor(self, np_array: np.array):
        return torch.from_numpy(np_array).to(self._device)

    def _get_model_device(self):
        return (next(self._model.parameters())).device

    def configure_optimizers(self):
        opt = Adam(self.parameters(), lr=self._learning_rate)
        return opt

    @pl.data_loader
    def tng_dataloader(self):
        return self._dataloaders["train"]

    @pl.data_loader
    def val_dataloader(self):
        return self._dataloaders["val"]

    @pl.data_loader
    def test_dataloader(self):
        return self._dataloaders["test"]

    def forward(self, x):
        return self._model.forward(x)

    def training_step(self, data_batch, batch_nb):

        current_step = self.trainer.global_step / (self.trainer.max_nb_epochs *
                                                   self.trainer.total_batches)
        self._update_model_parameters(current_step, verbose=False)
        self._update_loss_parameters(current_step, verbose=False)

        # forward computation
        t_x = data_batch["embedding"]
        t_latent_code, t_code_prob, t_x_dash = self._model.forward(t_x)

        # (required) reconstruction loss
        loss_reconst = self._loss_reconst.forward(t_x_dash, t_x)

        if self._loss_mutual_info is not None:
            loss_mi = self._loss_mutual_info(t_code_prob)
        else:
            loss_mi = torch.tensor(0.0,
                                   dtype=torch.float32,
                                   device=t_code_prob.device)

        loss = loss_reconst + loss_mi

        dict_losses = {
            "train_loss_reconst": loss_reconst / self._scale_loss_reconst,
            "train_loss_mutual_info": loss_mi / self._scale_loss_mi,
            "train_loss": loss
        }
        return {"loss": loss, "log": dict_losses}

    def _evaluate_code_stats(self, t_code_prob):

        _EPS = 1E-6
        n_ary = self._model.n_ary
        soft_code_length = self._auxiliary.calc_soft_code_length(t_code_prob)
        code_probability_divergence = torch.mean(
            np.log(n_ary) +
            torch.sum(t_code_prob * torch.log(t_code_prob + _EPS), axis=-1),
            axis=-1)

        metrics = {
            "val_soft_code_length_mean":
            torch.mean(soft_code_length),
            "val_soft_code_length_std":
            torch.std(soft_code_length),
            "val_code_probability_divergence":
            torch.mean(code_probability_divergence)
        }
        return metrics

    def validation_step(self, data_batch, batch_nb):

        # forward computation without back-propagation
        t_x = data_batch["embedding"]
        t_intermediate, t_code_prob, t_x_dash = self._model._predict(t_x)

        loss_reconst = self._loss_reconst.forward(t_x_dash, t_x)
        if self._loss_mutual_info is not None:
            loss_mi = self._loss_mutual_info(t_code_prob)
        else:
            loss_mi = torch.tensor(0.0,
                                   dtype=torch.float32,
                                   device=t_code_prob.device)

        loss = loss_reconst + loss_mi

        metrics = {
            "val_loss_reconst": loss_reconst / self._scale_loss_reconst,
            "val_mutual_info": loss_mi / self._scale_loss_mi,
            "val_loss": loss
        }
        # if self._loss_mutual_info is not None:
        metrics_repr = self._evaluate_code_stats(t_code_prob)
        metrics.update(metrics_repr)

        return {"val_loss": loss, "log": metrics}

    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()

        avg_metrics = defaultdict(list)
        for output in outputs:
            for key, value in output["log"].items():
                avg_metrics[key].append(value)
        for key, values in avg_metrics.items():
            avg_metrics[key] = torch.stack(values).mean()
        return {'avg_val_loss': avg_loss, 'log': avg_metrics}

    def on_save_checkpoint(self, checkpoint):
        device = self._get_model_device()
        if device != torch.device("cpu"):
            # convert device to cpu. it changes self._model instance itself.
            _ = self._model.to(device=torch.device("cpu"))
        # save model dump
        checkpoint["model_dump"] = pickle.dumps(self._model)
        # then revert back if necessary.
        if device != torch.device("cpu"):
            # revert to original device (probably cuda).
            _ = self._model.to(device=device)

    @classmethod
    def load_model_from_checkpoint(self,
                                   weights_path: str,
                                   on_gpu,
                                   map_location=None):
        if on_gpu:
            if map_location is not None:
                checkpoint = torch.load(weights_path,
                                        map_location=map_location)
            else:
                checkpoint = torch.load(weights_path)
        else:
            checkpoint = torch.load(weights_path,
                                    map_location=lambda storage, loc: storage)

        model = pickle.loads(checkpoint["model_dump"])
        if on_gpu:
            model = model.cuda(device=map_location)
        state_dict = {
            key.replace("_model.", ""): param
            for key, param in checkpoint["state_dict"].items()
        }
        model.load_state_dict(state_dict)

        return model

    def _update_model_parameters(self,
                                 current_step: Optional[float] = None,
                                 verbose: bool = False):
        if current_step is None:
            current_step = self.current_epoch / self.trainer.max_nb_epochs

        for parameter_name, scheduler_function in self._model_parameter_schedulers.items(
        ):
            if scheduler_function is None:
                continue

            current_value = getattr(self._model, parameter_name, None)
            if current_value is not None:
                new_value = scheduler_function(current_step,
                                               self.current_epoch)
                setattr(self._model, parameter_name, new_value)

                if verbose:
                    print(
                        f"{parameter_name}: {current_value:.2f} -> {new_value:.2f}"
                    )

    def _update_loss_parameters(self,
                                current_step: Optional[float] = None,
                                verbose: bool = False):
        if current_step is None:
            current_step = self.current_epoch / self.trainer.max_nb_epochs

        for loss_name, dict_property_scheduler in self._loss_parameter_schedulers.items(
        ):
            # get loss layer
            if not loss_name.startswith("_"):
                loss_name = "_" + loss_name
            loss_layer = getattr(self, loss_name, None)
            if loss_layer is None:
                continue

            # get property name and apply scheduler function
            for property_name, scheduler_function in dict_property_scheduler.items(
            ):
                if scheduler_function is None:
                    continue

                # check if property exists
                if not hasattr(loss_layer, property_name):
                    continue

                current_value = getattr(loss_layer, property_name, None)
                new_value = scheduler_function(current_step,
                                               self.current_epoch)
                setattr(loss_layer, property_name, new_value)

                if verbose:
                    print(
                        f"{loss_name}.{property_name}: {current_value:.2f} -> {new_value:.2f}"
                    )

    def on_epoch_start(self):
        pass
 def __init__(self, threshold: Optional[float] = 0.0):
     # this is used to compute soft code length
     self._auxiliary = HyponymyScoreLoss()
     self._threshold = threshold
class BasePredictor(object, metaclass=ABCMeta):
    def __init__(self, threshold: Optional[float] = 0.0):
        # this is used to compute soft code length
        self._auxiliary = HyponymyScoreLoss()
        self._threshold = threshold

    def _tensor_to_numpy(self, object: Array_like) -> np.ndarray:
        if isinstance(object, torch.Tensor):
            return object.cpu().numpy()
        elif isinstance(object, np.ndarray):
            return object
        else:
            raise TypeError(f"unsupported type: {type(object)}")

    def _numpy_to_tensor(self, object: Array_like) -> torch.Tensor:
        if isinstance(object, torch.Tensor):
            return object
        elif isinstance(object, np.ndarray):
            return torch.from_numpy(object)
        else:
            raise TypeError(f"unsupported type: {type(object)}")

    def calc_optimal_threshold_fvalue(self,
                                      y_true,
                                      probas_pred,
                                      verbose: bool = True,
                                      **kwargs):
        def _f1_score_safe(prec, recall):
            if prec == recall == 0.0:
                return 0.0
            else:
                return 2 * prec * recall / (prec + recall)

        # compute the threshold that maximizes f-value.
        v_prec, v_recall, v_threshold = precision_recall_curve(
            y_true=y_true, probas_pred=probas_pred, **kwargs)
        v_f1_score = np.vectorize(_f1_score_safe)(v_prec, v_recall)
        idx = np.nanargmax(v_f1_score)
        threshold_opt = v_threshold[idx]

        if verbose:
            report = {
                "threshold_opt": threshold_opt,
                "precision": v_prec[idx],
                "recall": v_recall[idx],
                "f1-score": v_f1_score[idx]
            }
            pprint(report)

        return threshold_opt

    def calc_optimal_threshold_accuracy(self,
                                        y_true,
                                        probas_pred,
                                        verbose: bool = True,
                                        **kwargs):

        # compute the threshold that maximizes accuracy using receiver operating curve.
        v_fpr, v_tpr, v_threshold = roc_curve(y_true=y_true,
                                              y_score=probas_pred,
                                              **kwargs)
        n_sample = len(y_true)
        n_positive = np.sum(np.array(y_true) == True)
        n_negative = n_sample - n_positive
        v_accuracy = (v_tpr * n_positive + (1 - v_fpr) * n_negative) / n_sample

        idx = np.nanargmax(v_accuracy)
        threshold_opt = v_threshold[idx]

        if verbose:
            report = {
                "threshold_opt": threshold_opt,
                "tpr": v_tpr[idx],
                "fpr": v_fpr[idx],
                "accuracy": v_accuracy[idx]
            }
            pprint(report)

        return threshold_opt

    def calc_soft_hyponymy_score(self, mat_code_prob_x: Array_like,
                                 mat_code_prob_y: Array_like):
        t_code_prob_x = self._numpy_to_tensor(mat_code_prob_x)
        t_code_prob_y = self._numpy_to_tensor(mat_code_prob_y)
        s_xy = self._auxiliary.calc_soft_hyponymy_score(
            t_code_prob_x, t_code_prob_y).item()

        return s_xy

    def calc_hyponymy_propensity_score(self,
                                       mat_code_prob_x: Array_like,
                                       mat_code_prob_y: Array_like,
                                       directionality: bool = False):
        s_xy = self.calc_soft_hyponymy_score(mat_code_prob_x, mat_code_prob_y)
        s_yx = self.calc_soft_hyponymy_score(mat_code_prob_y, mat_code_prob_x)

        # calculate hyponymy propensity score
        score = utils.hypernymy_propensity_score(s_xy,
                                                 s_yx,
                                                 directionality=directionality)

        return score

    def calc_entailment_probability(self, mat_code_prob_x: Array_like,
                                    mat_code_prob_y: Array_like):
        t_code_prob_x = self._numpy_to_tensor(mat_code_prob_x)
        t_code_prob_y = self._numpy_to_tensor(mat_code_prob_y)
        p_xy = self._auxiliary.calc_ancestor_probability(
            t_code_prob_x, t_code_prob_y).item()
        return p_xy

    @abstractmethod
    def THRESHOLD(self):
        pass

    @abstractmethod
    def predict_directionality(self, mat_code_prob_x: Array_like,
                               mat_code_prob_y: Array_like) -> str:
        """
        assume (x,y) is hyponymy relation, it predicts which one, x or y, is hypernym

        :param mat_code_prob_x: code probability of the entity x
        :param mat_code_prob_y: code probability of the entity y
        :return: "x" if x is hypernym, "y" otherwise.
        """

        pass

    @abstractmethod
    def predict_is_hyponymy_relation(
            self,
            mat_code_prob_x: Array_like,
            mat_code_prob_y: Array_like,
            threshold: Optional[float] = None) -> bool:
        """
        it predicts whether (x,y) pair is hyponymy or other relations (c.f. co-hyponymy, reverse-hyponymy, ...).
        this function is order-dependent. when you swap the order of arguments, response may be different.

        :param mat_code_prob_x: code probability of the hypernym candidate
        :param mat_code_prob_y: code probability of the hyponym candidate
        """

        pass

    @abstractmethod
    def predict_hyponymy_relation(self,
                                  mat_code_prob_x: Array_like,
                                  mat_code_prob_y: Array_like,
                                  threshold: Optional[float] = None) -> str:
        """
        it predicts what relation of the (x,y) pair holds among hyponymy, reverse-hyponymy, and other relations.
        this function is order-dependent only if (x,y) pair is either hyponymy or reverse-hyponymy relation.

        :param mat_code_prob_x: code probability of the entity x
        :param mat_code_prob_y: code probability of the other entity y
        :return: "hyponymy", "reverse-hyponymy", or "other"
        """

        pass

    @abstractmethod
    def infer_score(self, mat_code_prob_x: Array_like,
                    mat_code_prob_y: Array_like) -> str:
        """
        it returns the inferred score that is used for classification.
        conceptually, it represents a degree of hyponymy relation.

        @param mat_code_prob_x: code probability of the entity x
        @param mat_code_prob_y: code probability of the other entity y
        @return: scalar value
        """

        pass

    @property
    def CLASS_LABELS(self):
        return {
            "directionality": {"x", "y"},
            "is_hyponymy_relation": {True, False},
            "hyponymy_relation": {"hyponymy", "reverse-hyponymy", "other"}
        }
class HyponymyScoreLossLayer(unittest.TestCase):

    _EPS = 1E-5

    def setUp(self) -> None:

        n_ary = 6
        n_digits = 4
        tau = 4.0

        self._n_ary = n_ary
        self._n_digits = n_digits

        vec_p_x_zero = np.array([0.02,0.05,0.8,0.8])
        vec_x_repr = np.array([1,2,0,0])
        vec_p_y_zero = np.array([0.02,0.05,0.1,0.6])
        vec_y_repr = np.array([1,2,3,0])
        mat_p_x_1 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_1 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        vec_p_x_zero = np.array([0.02,0.05,0.05,0.8])
        vec_x_repr = np.array([1,2,3,0])
        vec_p_y_zero = np.array([0.02,0.05,0.6,0.6])
        vec_y_repr = np.array([1,2,0,0])
        mat_p_x_2 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_2 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        vec_p_x_zero = np.array([0.02,0.02,0.02,0.02])
        vec_x_repr = np.array([2,2,3,3])
        vec_p_y_zero = np.array([0.02,0.02,0.02,0.02])
        vec_y_repr = np.array([1,1,2,2])
        mat_p_x_3 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau)
        mat_p_y_3 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau)

        # x:hypernym, y:hyponym
        # mat_p_*: (n_dim, n_ary)
        self._mat_p_x = mat_p_x_1
        self._mat_p_y = mat_p_y_1
        # arry_p_*: (n_batch, n_dim, n_ary)
        self._arry_p_x = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3])
        self._arry_p_y = np.stack([mat_p_y_1, mat_p_y_2, mat_p_y_3])
        self._arry_p_batch = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3, mat_p_y_1, mat_p_y_2, mat_p_y_3])
        # train_signal: (hypernym_index, hyponym_index, hyponymy_score)
        self._hyponymy_tuples = [(0, 3, 1.0), (1, 4, -1.0), (2, 5, -4.0)] # [(x1, y1, 1.0), (x2, y2, -1.0), (x3, y3, -4.0)]

        self._t_mat_p_x = torch.from_numpy(self._mat_p_x)
        self._t_mat_p_y = torch.from_numpy(self._mat_p_y)
        self._t_arry_p_x = torch.from_numpy(self._arry_p_x)
        self._t_arry_p_y = torch.from_numpy(self._arry_p_y)
        self._t_arry_p_batch = torch.from_numpy(self._arry_p_batch)

        self._normalize_code_length = False
        self._normalize_coefficient_for_ground_truth = None
        self._loss_layer = HyponymyScoreLoss(normalize_hyponymy_score=self._normalize_code_length,
                                             normalize_coefficient_for_ground_truth=self._normalize_coefficient_for_ground_truth,
                                             distance_metric="mse")

    def test_intensity_to_probability(self):

        t_test = self._t_mat_p_x[:,0]
        arry_test = t_test.data.numpy()

        expected = utils._intensity_to_probability(arry_test)
        actual = self._loss_layer._intensity_to_probability(t_test).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_intensity_to_probability_two_dim(self):

        t_test = self._t_arry_p_x[:,:,0]
        arry_test = t_test.data.numpy()

        expected = np.stack(list(map(utils._intensity_to_probability, arry_test)))
        actual = self._loss_layer._intensity_to_probability(t_test).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_soft_code_length(self):

        t_test = self._t_arry_p_x
        arry_test = t_test.data.numpy()

        expected = np.array([utils.calc_soft_code_length(mat_p[:, 0]) for mat_p in arry_test])
        actual = self._loss_layer.calc_soft_code_length(t_test).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_break_intensity(self):

        t_test_x = self._t_mat_p_x
        t_test_y = self._t_mat_p_y
        arry_test_x = t_test_x.data.numpy()
        arry_test_y = t_test_y.data.numpy()

        expected = np.array([utils._calc_break_intensity(v_x, v_y) for v_x, v_y in zip(arry_test_x, arry_test_y)])
        actual = self._loss_layer._calc_break_intensity(t_test_x, t_test_y).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_break_intensity_two_dim(self):

        def calc_break_intensity_(mat_x, mat_y):
            return np.array([utils._calc_break_intensity(v_x, v_y) for v_x, v_y in zip(mat_x, mat_y)])

        t_test_x = self._t_arry_p_x
        t_test_y = self._t_arry_p_y
        arry_test_x = t_test_x.data.numpy()
        arry_test_y = t_test_y.data.numpy()

        expected = np.stack([calc_break_intensity_(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)])
        actual = self._loss_layer._calc_break_intensity(t_test_x, t_test_y).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_soft_lowest_common_ancestor_length(self):

        t_test_x = self._t_arry_p_x
        t_test_y = self._t_arry_p_y
        arry_test_x = t_test_x.data.numpy()
        arry_test_y = t_test_y.data.numpy()

        expected = np.array([utils.calc_soft_lowest_common_ancestor_length(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)])
        actual = self._loss_layer.calc_soft_lowest_common_ancestor_length(t_test_x, t_test_y).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_soft_hyponymy_score(self):

        t_test_x = self._t_arry_p_x
        t_test_y = self._t_arry_p_y
        arry_test_x = t_test_x.data.numpy()
        arry_test_y = t_test_y.data.numpy()

        expected = np.array([utils.calc_soft_hyponymy_score(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)])
        actual = self._loss_layer.calc_soft_hyponymy_score(t_test_x, t_test_y).data.numpy()

        self.assertTrue(np.allclose(expected, actual))

    def test_loss_value(self):

        t_test = self._t_arry_p_batch
        lst_train = self._hyponymy_tuples
        arry_test = t_test.data.numpy()

        lst_idx_x = [tup[0] for tup in lst_train]
        lst_idx_y = [tup[1] for tup in lst_train]
        y_true = np.array([tup[2] for tup in lst_train])
        arry_test_x = arry_test[lst_idx_x]
        arry_test_y = arry_test[lst_idx_y]

        y_pred = np.array([utils.calc_soft_hyponymy_score(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)])

        if self._normalize_code_length:
            y_pred /= self._n_digits
            y_true *= self._normalize_coefficient_for_ground_truth
        print(y_pred)
        print(y_true)
        expected = np.mean((y_pred - y_true)**2) # L2 loss
        actual = self._loss_layer.forward(t_test, lst_train)

        self.assertTrue(np.allclose(expected, actual))