Ejemplo n.º 1
0
    def loss(self, data: Graph, split="train"):
        if split == "train":
            mask = data.train_mask
        elif split == "val":
            mask = data.val_mask
        else:
            mask = data.test_mask
        edge_index, edge_types = data.edge_index[:, mask], data.edge_attr[mask]

        self.get_edge_set(edge_index, edge_types)
        batch_edges, batch_attr, samples, rels, labels = sampling_edge_uniform(
            edge_index,
            edge_types,
            self.edge_set,
            self.sampling_rate,
            self.num_rels,
            label_smoothing=self.lbl_smooth,
            num_entities=self.num_entities,
        )
        with data.local_graph():
            data.edge_index = batch_edges
            data.edge_attr = batch_attr
            node_embed, rel_embed = self.forward(data)

        sampled_nodes, reindexed_edges = torch.unique(samples, sorted=True, return_inverse=True)
        assert (self.cache_index == sampled_nodes).any()
        loss_n = self._loss(node_embed[reindexed_edges[0]], node_embed[reindexed_edges[1]], rel_embed[rels], labels)
        loss_r = self.penalty * self._regularization([self.emb(sampled_nodes), rel_embed])
        return loss_n + loss_r
Ejemplo n.º 2
0
    def loss(self, data: Graph, scoring):
        row, col = data.edge_index
        edge_types = data.edge_attr
        edge_index = torch.stack([row, col])

        self.get_edge_set(edge_index, edge_types)
        batch_edges, batch_attr, samples, rels, labels = sampling_edge_uniform(
            (row, col),
            edge_types,
            self.edge_set,
            self.sampling_rate,
            self.num_rels,
            label_smoothing=self.lbl_smooth,
            num_entities=self.num_entities,
        )
        with data.local_graph():
            data.edge_index = batch_edges
            data.edge_attr = batch_attr
            node_embed, rel_embed = self.forward(data)

        sampled_nodes, reindexed_edges = torch.unique(samples,
                                                      sorted=True,
                                                      return_inverse=True)
        assert (self.cache_index == sampled_nodes).any()
        loss_n = self._loss(node_embed[reindexed_edges[0]],
                            node_embed[reindexed_edges[1]], rel_embed[rels],
                            labels, scoring)
        loss_r = self.penalty * self._regularization(
            [self.emb(sampled_nodes), rel_embed])
        return loss_n + loss_r
Ejemplo n.º 3
0
def build_toy_data():
    x = torch.randn(100, 10)
    edge_index = torch.randint(0, 100, (2, 200))
    g = Graph(x=x, edge_index=edge_index)
    nedge = g.num_edges
    edge_attr = torch.randn(nedge, 10)
    g.edge_attr = edge_attr
    return g
Ejemplo n.º 4
0
def read_triplet_data(folder):
    filenames = ["train2id.txt", "valid2id.txt", "test2id.txt"]
    count = 0
    edge_index = []
    edge_attr = []
    count_list = []
    triples = []
    num_entities = 0
    num_relations = 0
    entity_dic = {}
    relation_dic = {}
    for filename in filenames:
        with open(osp.join(folder, filename), "r") as f:
            _ = int(f.readline().strip())
            if "train" in filename:
                train_start_idx = len(triples)
            elif "valid" in filename:
                valid_start_idx = len(triples)
            elif "test" in filename:
                test_start_idx = len(triples)
            for line in f:
                items = line.strip().split()
                edge_index.append([int(items[0]), int(items[1])])
                edge_attr.append(int(items[2]))
                triples.append((int(items[0]), int(items[2]), int(items[1])))
                if items[0] not in entity_dic:
                    entity_dic[items[0]] = num_entities
                    num_entities += 1
                if items[1] not in entity_dic:
                    entity_dic[items[1]] = num_entities
                    num_entities += 1
                if items[2] not in relation_dic:
                    relation_dic[items[2]] = num_relations
                    num_relations += 1
                count += 1
            count_list.append(count)

    edge_index = torch.LongTensor(edge_index).t()
    edge_attr = torch.LongTensor(edge_attr)
    data = Graph()
    data.edge_index = edge_index
    data.edge_attr = edge_attr

    def generate_mask(start, end):
        mask = torch.BoolTensor(count)
        mask[:] = False
        mask[start:end] = True
        return mask

    data.train_mask = generate_mask(0, count_list[0])
    data.val_mask = generate_mask(count_list[0], count_list[1])
    data.test_mask = generate_mask(count_list[1], count_list[2])
    return data, triples, train_start_idx, valid_start_idx, test_start_idx, num_entities, num_relations