def load_triples(self, triples_in: Union[np.ndarray, List[np.ndarray]]): """ Loads triples from arrays, creates mappings and returns the mapped triples :param data_paths: The paths for all files that are going to be used for training and testing :return: List where each items represents the mapped triples of a file """ if isinstance(triples_in, np.ndarray): all_triples = triples_in self.entity_label_to_id, self.relation_label_to_id = create_mappings( triples=all_triples) mapped_triples, _, _ = create_mapped_triples( triples=all_triples, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id) else: all_triples: np.ndarray = np.concatenate(triples_in, axis=0) self.entity_label_to_id, self.relation_label_to_id = create_mappings( triples=all_triples) mapped_triples = [ create_mapped_triples( triples, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id)[0] for triples in triples_in ] self.num_entities = len(self.entity_label_to_id) self.num_relations = len(self.relation_label_to_id) return mapped_triples
def _handle_train_and_test(self, train_pos, test_pos) -> Tuple[np.ndarray, np.ndarray]: """""" all_triples: np.ndarray = np.concatenate([train_pos, test_pos], axis=0) self.entity_label_to_id, self.relation_label_to_id = create_mappings(triples=all_triples) mapped_pos_train_triples, _, _ = create_mapped_triples( triples=train_pos, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id, ) mapped_pos_test_triples, _, _ = create_mapped_triples( triples=test_pos, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id, ) return mapped_pos_train_triples, mapped_pos_test_triples
def _get_train_triples(self): train_pos = load_data(self.config[pkc.TRAINING_SET_PATH]) self.entity_label_to_id, self.relation_label_to_id = create_mappings(triples=train_pos) mapped_pos_train_triples, _, _ = create_mapped_triples( triples=train_pos, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id, ) return mapped_pos_train_triples
def map_triples(self, triples_in: Union[np.ndarray, List[np.ndarray]]): """ Loads triples and returns the mapped triples given the mappings of the model :param data_paths: The paths for the triples files that should be mapped :return: List where each items represents the mapped triples of a file """ if isinstance(triples_in, np.ndarray): mapped_triples, _, _ = create_mapped_triples( triples=triples_in, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id) else: mapped_triples = [ create_mapped_triples( triples=triples, entity_label_to_id=self.entity_label_to_id, relation_label_to_id=self.relation_label_to_id)[0] for triples in triples_in ] return mapped_triples
def _get_train_triples(data_path: str): train_pos = load_data(data_path) entity_label_to_id, relation_label_to_id = create_mappings( triples=train_pos) mapped_pos_train_triples, _, _ = create_mapped_triples( triples=train_pos, entity_label_to_id=entity_label_to_id, relation_label_to_id=relation_label_to_id, ) return mapped_pos_train_triples, entity_label_to_id, relation_label_to_id
def make_predictions( kge_model, entities, relations, entity_to_id, rel_to_id, device, blacklist_path=None, ): all_entity_pairs = np.array(list(product(entities, entities))) if relations.size == 1: all_triples = create_triples(entity_pairs=all_entity_pairs, relation=relations) else: all_triples = create_triples(entity_pairs=all_entity_pairs, relation=relations[0]) for relation in relations[1:]: triples = create_triples(entity_pairs=all_entity_pairs, relation=relation) all_triples = np.append(all_triples, triples, axis=0) if blacklist_path is not None: blacklisted_triples = load_data(blacklist_path) blacklisted_triples = pd.DataFrame(data=blacklisted_triples) all_triples = pd.DataFrame(data=all_triples) merged = all_triples.merge(blacklisted_triples, indicator=True, how='outer') merged = merged[merged['_merge'] == 'left_only'].values all_triples = np.array(merged[:, :-1], dtype=np.str) mapped_triples, _, _ = create_mapped_triples( all_triples, entity_label_to_id=entity_to_id, relation_label_to_id=rel_to_id) mapped_triples = torch.tensor(mapped_triples, dtype=torch.long, device=device) id_to_entity = {value: key for key, value in entity_to_id.items()} id_to_relation = {value: key for key, value in rel_to_id.items()} subject_column = np.vectorize(id_to_entity.get)(mapped_triples[:, 0:1]) predicate_column = np.vectorize(id_to_relation.get)(mapped_triples[:, 1:2]) object_column = np.vectorize(id_to_entity.get)(mapped_triples[:, 2:3]) triples_ordered_by_ids = np.concatenate( [subject_column, predicate_column, object_column], axis=1) predicted_scores = kge_model.predict(mapped_triples) _, sorted_indices = torch.sort(torch.tensor(predicted_scores, dtype=torch.float), descending=False) sorted_indices = sorted_indices.cpu().numpy() ranked_triples = triples_ordered_by_ids[sorted_indices, :] subs = np.reshape(ranked_triples[:, 0:1], newshape=(-1)) objs = np.reshape(ranked_triples[:, 2:3], newshape=(-1)) indices = np.where(subs != objs)[0] ranked_scores = np.reshape(predicted_scores[sorted_indices], newshape=(-1, 1)) ranked_triples = np.concatenate([ranked_triples, ranked_scores], axis=1) return ranked_triples[indices, :]