def setUp(self) -> None: n_ary = 6 n_digits = 4 tau = 4.0 self._n_ary = n_ary self._n_digits = n_digits vec_p_x_zero = np.array([0.02,0.05,0.8,0.8]) vec_x_repr = np.array([1,2,0,0]) vec_p_y_zero = np.array([0.02,0.05,0.1,0.6]) vec_y_repr = np.array([1,2,3,0]) mat_p_x_1 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_1 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) vec_p_x_zero = np.array([0.02,0.05,0.05,0.8]) vec_x_repr = np.array([1,2,3,0]) vec_p_y_zero = np.array([0.02,0.05,0.6,0.6]) vec_y_repr = np.array([1,2,0,0]) mat_p_x_2 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_2 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) vec_p_x_zero = np.array([0.02,0.02,0.02,0.02]) vec_x_repr = np.array([2,2,3,3]) vec_p_y_zero = np.array([0.02,0.02,0.02,0.02]) vec_y_repr = np.array([1,1,2,2]) mat_p_x_3 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_3 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) # x:hypernym, y:hyponym # mat_p_*: (n_dim, n_ary) self._mat_p_x = mat_p_x_1 self._mat_p_y = mat_p_y_1 # arry_p_*: (n_batch, n_dim, n_ary) self._arry_p_x = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3]) self._arry_p_y = np.stack([mat_p_y_1, mat_p_y_2, mat_p_y_3]) self._arry_p_batch = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3, mat_p_y_1, mat_p_y_2, mat_p_y_3]) # train_signal: (hypernym_index, hyponym_index, hyponymy_score) self._hyponymy_tuples = [(0, 3, 1.0), (1, 4, -1.0), (2, 5, -4.0)] # [(x1, y1, 1.0), (x2, y2, -1.0), (x3, y3, -4.0)] self._t_mat_p_x = torch.from_numpy(self._mat_p_x) self._t_mat_p_y = torch.from_numpy(self._mat_p_y) self._t_arry_p_x = torch.from_numpy(self._arry_p_x) self._t_arry_p_y = torch.from_numpy(self._arry_p_y) self._t_arry_p_batch = torch.from_numpy(self._arry_p_batch) self._normalize_code_length = False self._normalize_coefficient_for_ground_truth = None self._loss_layer = HyponymyScoreLoss(normalize_hyponymy_score=self._normalize_code_length, normalize_coefficient_for_ground_truth=self._normalize_coefficient_for_ground_truth, distance_metric="mse")
def __init__(self, threshold: Optional[float] = 0.5): """ predicts hyponymy relation based on the entailment probability of entity pair. @param threshold: threshold of the entailment probability. larget than specified value is regarded as hyponymy relation. """ self._auxiliary = HyponymyScoreLoss() self._threshold = threshold
def __init__( self, model: MaskedAutoEncoder, loss_reconst: _Loss, loss_mutual_info: Optional[_Loss] = None, dataloader_train: Optional[DataLoader] = None, dataloader_val: Optional[DataLoader] = None, dataloader_test: Optional[DataLoader] = None, learning_rate: Optional[float] = 0.001, model_parameter_schedulers: Optional[Dict[str, Callable[[float], float]]] = None, loss_parameter_schedulers: Optional[Dict[str, Dict[str, Callable[ [float], float]]]] = None, ): super(UnsupervisedTrainer, self).__init__() self._scale_loss_reconst = loss_reconst.scale self._scale_loss_mi = loss_mutual_info.scale if loss_mutual_info is not None else 1. self._model = model self._encoder = model._encoder self._decoder = model._decoder self._loss_reconst = loss_reconst self._loss_mutual_info = loss_mutual_info self._learning_rate = learning_rate self._dataloaders = { "train": dataloader_train, "val": dataloader_val, "test": dataloader_test } # auxiliary function that is solely used for validation self._auxiliary = HyponymyScoreLoss() # set model parameter scheduler if model_parameter_schedulers is None: self._model_parameter_schedulers = {} else: self._model_parameter_schedulers = model_parameter_schedulers if loss_parameter_schedulers is None: self._loss_parameter_schedulers = {} else: self._loss_parameter_schedulers = loss_parameter_schedulers
class UnsupervisedTrainer(pl.LightningModule): def __init__( self, model: MaskedAutoEncoder, loss_reconst: _Loss, loss_mutual_info: Optional[_Loss] = None, dataloader_train: Optional[DataLoader] = None, dataloader_val: Optional[DataLoader] = None, dataloader_test: Optional[DataLoader] = None, learning_rate: Optional[float] = 0.001, model_parameter_schedulers: Optional[Dict[str, Callable[[float], float]]] = None, loss_parameter_schedulers: Optional[Dict[str, Dict[str, Callable[ [float], float]]]] = None, ): super(UnsupervisedTrainer, self).__init__() self._scale_loss_reconst = loss_reconst.scale self._scale_loss_mi = loss_mutual_info.scale if loss_mutual_info is not None else 1. self._model = model self._encoder = model._encoder self._decoder = model._decoder self._loss_reconst = loss_reconst self._loss_mutual_info = loss_mutual_info self._learning_rate = learning_rate self._dataloaders = { "train": dataloader_train, "val": dataloader_val, "test": dataloader_test } # auxiliary function that is solely used for validation self._auxiliary = HyponymyScoreLoss() # set model parameter scheduler if model_parameter_schedulers is None: self._model_parameter_schedulers = {} else: self._model_parameter_schedulers = model_parameter_schedulers if loss_parameter_schedulers is None: self._loss_parameter_schedulers = {} else: self._loss_parameter_schedulers = loss_parameter_schedulers def _numpy_to_tensor(self, np_array: np.array): return torch.from_numpy(np_array).to(self._device) def _get_model_device(self): return (next(self._model.parameters())).device def configure_optimizers(self): opt = Adam(self.parameters(), lr=self._learning_rate) return opt @pl.data_loader def tng_dataloader(self): return self._dataloaders["train"] @pl.data_loader def val_dataloader(self): return self._dataloaders["val"] @pl.data_loader def test_dataloader(self): return self._dataloaders["test"] def forward(self, x): return self._model.forward(x) def training_step(self, data_batch, batch_nb): current_step = self.trainer.global_step / (self.trainer.max_nb_epochs * self.trainer.total_batches) self._update_model_parameters(current_step, verbose=False) self._update_loss_parameters(current_step, verbose=False) # forward computation t_x = data_batch["embedding"] t_latent_code, t_code_prob, t_x_dash = self._model.forward(t_x) # (required) reconstruction loss loss_reconst = self._loss_reconst.forward(t_x_dash, t_x) if self._loss_mutual_info is not None: loss_mi = self._loss_mutual_info(t_code_prob) else: loss_mi = torch.tensor(0.0, dtype=torch.float32, device=t_code_prob.device) loss = loss_reconst + loss_mi dict_losses = { "train_loss_reconst": loss_reconst / self._scale_loss_reconst, "train_loss_mutual_info": loss_mi / self._scale_loss_mi, "train_loss": loss } return {"loss": loss, "log": dict_losses} def _evaluate_code_stats(self, t_code_prob): _EPS = 1E-6 n_ary = self._model.n_ary soft_code_length = self._auxiliary.calc_soft_code_length(t_code_prob) code_probability_divergence = torch.mean( np.log(n_ary) + torch.sum(t_code_prob * torch.log(t_code_prob + _EPS), axis=-1), axis=-1) metrics = { "val_soft_code_length_mean": torch.mean(soft_code_length), "val_soft_code_length_std": torch.std(soft_code_length), "val_code_probability_divergence": torch.mean(code_probability_divergence) } return metrics def validation_step(self, data_batch, batch_nb): # forward computation without back-propagation t_x = data_batch["embedding"] t_intermediate, t_code_prob, t_x_dash = self._model._predict(t_x) loss_reconst = self._loss_reconst.forward(t_x_dash, t_x) if self._loss_mutual_info is not None: loss_mi = self._loss_mutual_info(t_code_prob) else: loss_mi = torch.tensor(0.0, dtype=torch.float32, device=t_code_prob.device) loss = loss_reconst + loss_mi metrics = { "val_loss_reconst": loss_reconst / self._scale_loss_reconst, "val_mutual_info": loss_mi / self._scale_loss_mi, "val_loss": loss } # if self._loss_mutual_info is not None: metrics_repr = self._evaluate_code_stats(t_code_prob) metrics.update(metrics_repr) return {"val_loss": loss, "log": metrics} def validation_end(self, outputs): avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() avg_metrics = defaultdict(list) for output in outputs: for key, value in output["log"].items(): avg_metrics[key].append(value) for key, values in avg_metrics.items(): avg_metrics[key] = torch.stack(values).mean() return {'avg_val_loss': avg_loss, 'log': avg_metrics} def on_save_checkpoint(self, checkpoint): device = self._get_model_device() if device != torch.device("cpu"): # convert device to cpu. it changes self._model instance itself. _ = self._model.to(device=torch.device("cpu")) # save model dump checkpoint["model_dump"] = pickle.dumps(self._model) # then revert back if necessary. if device != torch.device("cpu"): # revert to original device (probably cuda). _ = self._model.to(device=device) @classmethod def load_model_from_checkpoint(self, weights_path: str, on_gpu, map_location=None): if on_gpu: if map_location is not None: checkpoint = torch.load(weights_path, map_location=map_location) else: checkpoint = torch.load(weights_path) else: checkpoint = torch.load(weights_path, map_location=lambda storage, loc: storage) model = pickle.loads(checkpoint["model_dump"]) if on_gpu: model = model.cuda(device=map_location) state_dict = { key.replace("_model.", ""): param for key, param in checkpoint["state_dict"].items() } model.load_state_dict(state_dict) return model def _update_model_parameters(self, current_step: Optional[float] = None, verbose: bool = False): if current_step is None: current_step = self.current_epoch / self.trainer.max_nb_epochs for parameter_name, scheduler_function in self._model_parameter_schedulers.items( ): if scheduler_function is None: continue current_value = getattr(self._model, parameter_name, None) if current_value is not None: new_value = scheduler_function(current_step, self.current_epoch) setattr(self._model, parameter_name, new_value) if verbose: print( f"{parameter_name}: {current_value:.2f} -> {new_value:.2f}" ) def _update_loss_parameters(self, current_step: Optional[float] = None, verbose: bool = False): if current_step is None: current_step = self.current_epoch / self.trainer.max_nb_epochs for loss_name, dict_property_scheduler in self._loss_parameter_schedulers.items( ): # get loss layer if not loss_name.startswith("_"): loss_name = "_" + loss_name loss_layer = getattr(self, loss_name, None) if loss_layer is None: continue # get property name and apply scheduler function for property_name, scheduler_function in dict_property_scheduler.items( ): if scheduler_function is None: continue # check if property exists if not hasattr(loss_layer, property_name): continue current_value = getattr(loss_layer, property_name, None) new_value = scheduler_function(current_step, self.current_epoch) setattr(loss_layer, property_name, new_value) if verbose: print( f"{loss_name}.{property_name}: {current_value:.2f} -> {new_value:.2f}" ) def on_epoch_start(self): pass
def __init__(self, threshold: Optional[float] = 0.0): # this is used to compute soft code length self._auxiliary = HyponymyScoreLoss() self._threshold = threshold
class BasePredictor(object, metaclass=ABCMeta): def __init__(self, threshold: Optional[float] = 0.0): # this is used to compute soft code length self._auxiliary = HyponymyScoreLoss() self._threshold = threshold def _tensor_to_numpy(self, object: Array_like) -> np.ndarray: if isinstance(object, torch.Tensor): return object.cpu().numpy() elif isinstance(object, np.ndarray): return object else: raise TypeError(f"unsupported type: {type(object)}") def _numpy_to_tensor(self, object: Array_like) -> torch.Tensor: if isinstance(object, torch.Tensor): return object elif isinstance(object, np.ndarray): return torch.from_numpy(object) else: raise TypeError(f"unsupported type: {type(object)}") def calc_optimal_threshold_fvalue(self, y_true, probas_pred, verbose: bool = True, **kwargs): def _f1_score_safe(prec, recall): if prec == recall == 0.0: return 0.0 else: return 2 * prec * recall / (prec + recall) # compute the threshold that maximizes f-value. v_prec, v_recall, v_threshold = precision_recall_curve( y_true=y_true, probas_pred=probas_pred, **kwargs) v_f1_score = np.vectorize(_f1_score_safe)(v_prec, v_recall) idx = np.nanargmax(v_f1_score) threshold_opt = v_threshold[idx] if verbose: report = { "threshold_opt": threshold_opt, "precision": v_prec[idx], "recall": v_recall[idx], "f1-score": v_f1_score[idx] } pprint(report) return threshold_opt def calc_optimal_threshold_accuracy(self, y_true, probas_pred, verbose: bool = True, **kwargs): # compute the threshold that maximizes accuracy using receiver operating curve. v_fpr, v_tpr, v_threshold = roc_curve(y_true=y_true, y_score=probas_pred, **kwargs) n_sample = len(y_true) n_positive = np.sum(np.array(y_true) == True) n_negative = n_sample - n_positive v_accuracy = (v_tpr * n_positive + (1 - v_fpr) * n_negative) / n_sample idx = np.nanargmax(v_accuracy) threshold_opt = v_threshold[idx] if verbose: report = { "threshold_opt": threshold_opt, "tpr": v_tpr[idx], "fpr": v_fpr[idx], "accuracy": v_accuracy[idx] } pprint(report) return threshold_opt def calc_soft_hyponymy_score(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like): t_code_prob_x = self._numpy_to_tensor(mat_code_prob_x) t_code_prob_y = self._numpy_to_tensor(mat_code_prob_y) s_xy = self._auxiliary.calc_soft_hyponymy_score( t_code_prob_x, t_code_prob_y).item() return s_xy def calc_hyponymy_propensity_score(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like, directionality: bool = False): s_xy = self.calc_soft_hyponymy_score(mat_code_prob_x, mat_code_prob_y) s_yx = self.calc_soft_hyponymy_score(mat_code_prob_y, mat_code_prob_x) # calculate hyponymy propensity score score = utils.hypernymy_propensity_score(s_xy, s_yx, directionality=directionality) return score def calc_entailment_probability(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like): t_code_prob_x = self._numpy_to_tensor(mat_code_prob_x) t_code_prob_y = self._numpy_to_tensor(mat_code_prob_y) p_xy = self._auxiliary.calc_ancestor_probability( t_code_prob_x, t_code_prob_y).item() return p_xy @abstractmethod def THRESHOLD(self): pass @abstractmethod def predict_directionality(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like) -> str: """ assume (x,y) is hyponymy relation, it predicts which one, x or y, is hypernym :param mat_code_prob_x: code probability of the entity x :param mat_code_prob_y: code probability of the entity y :return: "x" if x is hypernym, "y" otherwise. """ pass @abstractmethod def predict_is_hyponymy_relation( self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like, threshold: Optional[float] = None) -> bool: """ it predicts whether (x,y) pair is hyponymy or other relations (c.f. co-hyponymy, reverse-hyponymy, ...). this function is order-dependent. when you swap the order of arguments, response may be different. :param mat_code_prob_x: code probability of the hypernym candidate :param mat_code_prob_y: code probability of the hyponym candidate """ pass @abstractmethod def predict_hyponymy_relation(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like, threshold: Optional[float] = None) -> str: """ it predicts what relation of the (x,y) pair holds among hyponymy, reverse-hyponymy, and other relations. this function is order-dependent only if (x,y) pair is either hyponymy or reverse-hyponymy relation. :param mat_code_prob_x: code probability of the entity x :param mat_code_prob_y: code probability of the other entity y :return: "hyponymy", "reverse-hyponymy", or "other" """ pass @abstractmethod def infer_score(self, mat_code_prob_x: Array_like, mat_code_prob_y: Array_like) -> str: """ it returns the inferred score that is used for classification. conceptually, it represents a degree of hyponymy relation. @param mat_code_prob_x: code probability of the entity x @param mat_code_prob_y: code probability of the other entity y @return: scalar value """ pass @property def CLASS_LABELS(self): return { "directionality": {"x", "y"}, "is_hyponymy_relation": {True, False}, "hyponymy_relation": {"hyponymy", "reverse-hyponymy", "other"} }
class HyponymyScoreLossLayer(unittest.TestCase): _EPS = 1E-5 def setUp(self) -> None: n_ary = 6 n_digits = 4 tau = 4.0 self._n_ary = n_ary self._n_digits = n_digits vec_p_x_zero = np.array([0.02,0.05,0.8,0.8]) vec_x_repr = np.array([1,2,0,0]) vec_p_y_zero = np.array([0.02,0.05,0.1,0.6]) vec_y_repr = np.array([1,2,3,0]) mat_p_x_1 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_1 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) vec_p_x_zero = np.array([0.02,0.05,0.05,0.8]) vec_x_repr = np.array([1,2,3,0]) vec_p_y_zero = np.array([0.02,0.05,0.6,0.6]) vec_y_repr = np.array([1,2,0,0]) mat_p_x_2 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_2 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) vec_p_x_zero = np.array([0.02,0.02,0.02,0.02]) vec_x_repr = np.array([2,2,3,3]) vec_p_y_zero = np.array([0.02,0.02,0.02,0.02]) vec_y_repr = np.array([1,1,2,2]) mat_p_x_3 = utils.generate_probability_matrix(vec_p_x_zero, vec_x_repr, n_digits, n_ary, tau) mat_p_y_3 = utils.generate_probability_matrix(vec_p_y_zero, vec_y_repr, n_digits, n_ary, tau) # x:hypernym, y:hyponym # mat_p_*: (n_dim, n_ary) self._mat_p_x = mat_p_x_1 self._mat_p_y = mat_p_y_1 # arry_p_*: (n_batch, n_dim, n_ary) self._arry_p_x = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3]) self._arry_p_y = np.stack([mat_p_y_1, mat_p_y_2, mat_p_y_3]) self._arry_p_batch = np.stack([mat_p_x_1, mat_p_x_2, mat_p_x_3, mat_p_y_1, mat_p_y_2, mat_p_y_3]) # train_signal: (hypernym_index, hyponym_index, hyponymy_score) self._hyponymy_tuples = [(0, 3, 1.0), (1, 4, -1.0), (2, 5, -4.0)] # [(x1, y1, 1.0), (x2, y2, -1.0), (x3, y3, -4.0)] self._t_mat_p_x = torch.from_numpy(self._mat_p_x) self._t_mat_p_y = torch.from_numpy(self._mat_p_y) self._t_arry_p_x = torch.from_numpy(self._arry_p_x) self._t_arry_p_y = torch.from_numpy(self._arry_p_y) self._t_arry_p_batch = torch.from_numpy(self._arry_p_batch) self._normalize_code_length = False self._normalize_coefficient_for_ground_truth = None self._loss_layer = HyponymyScoreLoss(normalize_hyponymy_score=self._normalize_code_length, normalize_coefficient_for_ground_truth=self._normalize_coefficient_for_ground_truth, distance_metric="mse") def test_intensity_to_probability(self): t_test = self._t_mat_p_x[:,0] arry_test = t_test.data.numpy() expected = utils._intensity_to_probability(arry_test) actual = self._loss_layer._intensity_to_probability(t_test).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_intensity_to_probability_two_dim(self): t_test = self._t_arry_p_x[:,:,0] arry_test = t_test.data.numpy() expected = np.stack(list(map(utils._intensity_to_probability, arry_test))) actual = self._loss_layer._intensity_to_probability(t_test).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_soft_code_length(self): t_test = self._t_arry_p_x arry_test = t_test.data.numpy() expected = np.array([utils.calc_soft_code_length(mat_p[:, 0]) for mat_p in arry_test]) actual = self._loss_layer.calc_soft_code_length(t_test).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_break_intensity(self): t_test_x = self._t_mat_p_x t_test_y = self._t_mat_p_y arry_test_x = t_test_x.data.numpy() arry_test_y = t_test_y.data.numpy() expected = np.array([utils._calc_break_intensity(v_x, v_y) for v_x, v_y in zip(arry_test_x, arry_test_y)]) actual = self._loss_layer._calc_break_intensity(t_test_x, t_test_y).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_break_intensity_two_dim(self): def calc_break_intensity_(mat_x, mat_y): return np.array([utils._calc_break_intensity(v_x, v_y) for v_x, v_y in zip(mat_x, mat_y)]) t_test_x = self._t_arry_p_x t_test_y = self._t_arry_p_y arry_test_x = t_test_x.data.numpy() arry_test_y = t_test_y.data.numpy() expected = np.stack([calc_break_intensity_(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)]) actual = self._loss_layer._calc_break_intensity(t_test_x, t_test_y).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_soft_lowest_common_ancestor_length(self): t_test_x = self._t_arry_p_x t_test_y = self._t_arry_p_y arry_test_x = t_test_x.data.numpy() arry_test_y = t_test_y.data.numpy() expected = np.array([utils.calc_soft_lowest_common_ancestor_length(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)]) actual = self._loss_layer.calc_soft_lowest_common_ancestor_length(t_test_x, t_test_y).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_soft_hyponymy_score(self): t_test_x = self._t_arry_p_x t_test_y = self._t_arry_p_y arry_test_x = t_test_x.data.numpy() arry_test_y = t_test_y.data.numpy() expected = np.array([utils.calc_soft_hyponymy_score(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)]) actual = self._loss_layer.calc_soft_hyponymy_score(t_test_x, t_test_y).data.numpy() self.assertTrue(np.allclose(expected, actual)) def test_loss_value(self): t_test = self._t_arry_p_batch lst_train = self._hyponymy_tuples arry_test = t_test.data.numpy() lst_idx_x = [tup[0] for tup in lst_train] lst_idx_y = [tup[1] for tup in lst_train] y_true = np.array([tup[2] for tup in lst_train]) arry_test_x = arry_test[lst_idx_x] arry_test_y = arry_test[lst_idx_y] y_pred = np.array([utils.calc_soft_hyponymy_score(mat_x, mat_y) for mat_x, mat_y in zip(arry_test_x, arry_test_y)]) if self._normalize_code_length: y_pred /= self._n_digits y_true *= self._normalize_coefficient_for_ground_truth print(y_pred) print(y_true) expected = np.mean((y_pred - y_true)**2) # L2 loss actual = self._loss_layer.forward(t_test, lst_train) self.assertTrue(np.allclose(expected, actual))