def loss(self, data: Graph, split="train"): if split == "train": mask = data.train_mask elif split == "val": mask = data.val_mask else: mask = data.test_mask edge_index, edge_types = data.edge_index[:, mask], data.edge_attr[mask] self.get_edge_set(edge_index, edge_types) batch_edges, batch_attr, samples, rels, labels = sampling_edge_uniform( edge_index, edge_types, self.edge_set, self.sampling_rate, self.num_rels, label_smoothing=self.lbl_smooth, num_entities=self.num_entities, ) with data.local_graph(): data.edge_index = batch_edges data.edge_attr = batch_attr node_embed, rel_embed = self.forward(data) sampled_nodes, reindexed_edges = torch.unique(samples, sorted=True, return_inverse=True) assert (self.cache_index == sampled_nodes).any() loss_n = self._loss(node_embed[reindexed_edges[0]], node_embed[reindexed_edges[1]], rel_embed[rels], labels) loss_r = self.penalty * self._regularization([self.emb(sampled_nodes), rel_embed]) return loss_n + loss_r
def loss(self, data: Graph, scoring): row, col = data.edge_index edge_types = data.edge_attr edge_index = torch.stack([row, col]) self.get_edge_set(edge_index, edge_types) batch_edges, batch_attr, samples, rels, labels = sampling_edge_uniform( (row, col), edge_types, self.edge_set, self.sampling_rate, self.num_rels, label_smoothing=self.lbl_smooth, num_entities=self.num_entities, ) with data.local_graph(): data.edge_index = batch_edges data.edge_attr = batch_attr node_embed, rel_embed = self.forward(data) sampled_nodes, reindexed_edges = torch.unique(samples, sorted=True, return_inverse=True) assert (self.cache_index == sampled_nodes).any() loss_n = self._loss(node_embed[reindexed_edges[0]], node_embed[reindexed_edges[1]], rel_embed[rels], labels, scoring) loss_r = self.penalty * self._regularization( [self.emb(sampled_nodes), rel_embed]) return loss_n + loss_r
def build_toy_data(): x = torch.randn(100, 10) edge_index = torch.randint(0, 100, (2, 200)) g = Graph(x=x, edge_index=edge_index) nedge = g.num_edges edge_attr = torch.randn(nedge, 10) g.edge_attr = edge_attr return g
def read_triplet_data(folder): filenames = ["train2id.txt", "valid2id.txt", "test2id.txt"] count = 0 edge_index = [] edge_attr = [] count_list = [] triples = [] num_entities = 0 num_relations = 0 entity_dic = {} relation_dic = {} for filename in filenames: with open(osp.join(folder, filename), "r") as f: _ = int(f.readline().strip()) if "train" in filename: train_start_idx = len(triples) elif "valid" in filename: valid_start_idx = len(triples) elif "test" in filename: test_start_idx = len(triples) for line in f: items = line.strip().split() edge_index.append([int(items[0]), int(items[1])]) edge_attr.append(int(items[2])) triples.append((int(items[0]), int(items[2]), int(items[1]))) if items[0] not in entity_dic: entity_dic[items[0]] = num_entities num_entities += 1 if items[1] not in entity_dic: entity_dic[items[1]] = num_entities num_entities += 1 if items[2] not in relation_dic: relation_dic[items[2]] = num_relations num_relations += 1 count += 1 count_list.append(count) edge_index = torch.LongTensor(edge_index).t() edge_attr = torch.LongTensor(edge_attr) data = Graph() data.edge_index = edge_index data.edge_attr = edge_attr def generate_mask(start, end): mask = torch.BoolTensor(count) mask[:] = False mask[start:end] = True return mask data.train_mask = generate_mask(0, count_list[0]) data.val_mask = generate_mask(count_list[0], count_list[1]) data.test_mask = generate_mask(count_list[1], count_list[2]) return data, triples, train_start_idx, valid_start_idx, test_start_idx, num_entities, num_relations