Ejemplo n.º 1
0
def get_big_graph(data, num_rels):

    add_reverse = True
    if add_reverse:

        src, rel, dst = data.transpose()  # node ids
        # uniq_v: range from 0 to the number of nodes acting as g.nodes();
        # edges: uniq_v[edges] = np.unique((src, dst)), mapping from (o, len(nodes)) to the original node idx
        uniq_v, edges = np.unique((src, dst), return_inverse=True)

        src, dst = np.reshape(edges, (2, -1))
        g = dgl.DGLGraph()
        g.add_nodes(len(uniq_v))
        src, dst = np.concatenate((src, dst)), np.concatenate((dst, src))

        rel_o = np.concatenate((rel + num_rels, rel))
        rel_s = np.concatenate((rel, rel + num_rels))
        g.add_edges(src, dst)
        norm = comp_deg_norm(g)
        # import pdb; pdb.set_trace()
        g.ndata.update({
            'id': torch.from_numpy(uniq_v).long().view(-1, 1),
            'norm': torch.from_numpy(norm).view(-1, 1)
        })
        g.edata['type_s'] = torch.LongTensor(rel_s)
        g.edata['type_o'] = torch.LongTensor(rel_o)
        g.ids = {}
        in_graph_idx = 0
        # graph.ids: node id in the entire node set -> node index
        for id in uniq_v:
            g.ids[in_graph_idx] = id
            in_graph_idx += 1
    else:
        src, rel, dst = data.transpose()  # node ids
        # uniq_v: range from 0 to the number of nodes acting as g.nodes();
        # edges: uniq_v[edges] = np.unique((src, dst)), mapping from (o, len(nodes)) to the original node idx
        uniq_v, edges = np.unique((src, dst), return_inverse=True)

        src, dst = np.reshape(edges, (2, -1))
        g = dgl.DGLGraph()
        g.add_nodes(len(uniq_v))
        g.add_edges(src, dst)
        norm = comp_deg_norm(g)
        g.ndata.update({
            'id': torch.from_numpy(uniq_v).long().view(-1, 1),
            'norm': torch.from_numpy(norm).view(-1, 1)
        })
        g.edata['type_s'] = torch.LongTensor(rel)
        g.ids = {}
        in_graph_idx = 0
        for id in uniq_v:
            g.ids[in_graph_idx] = id
            in_graph_idx += 1
    return g
Ejemplo n.º 2
0
    def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False):
        if val:
            sampled_graph_list = graph_train_list
        else:
            sampled_graph_list = []
            for g in graph_train_list:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                half_num_nodes = int(src.shape[0] / 2)
                graph_split_ids = np.random.choice(np.arange(half_num_nodes),
                                                   size=int(0.5 * half_num_nodes), replace=False)
                graph_split_rev_ids = graph_split_ids + half_num_nodes

                sg = g.edge_subgraph(np.concatenate((graph_split_ids, graph_split_rev_ids)), preserve_nodes=True)
                norm = comp_deg_norm(sg)
                sg.ndata.update({'id': g.ndata['id'], 'norm': torch.from_numpy(norm).view(-1, 1)})
                sg.edata['type_s'] = rel[np.concatenate((graph_split_ids, graph_split_rev_ids))]
                sg.ids = g.ids
                sampled_graph_list.append(sg)
        time_embeds = []
        for t, g in zip(t_list, graph_train_list):
            temp_ent_embeds = torch.sin(t * self.w_ent_embeds[g.ndata['id']].view(-1, self.embed_size) +
                          self.b_ent_embeds[g.ndata['id']].view(-1, self.embed_size))
            time_embeds.append(temp_ent_embeds)
        batched_graph = dgl.batch(sampled_graph_list)

        time_embeds = torch.cat(time_embeds, dim=0)
        ent_embeds = self.ent_embeds[batched_graph.ndata['id']].view(-1, self.embed_size)
        batched_graph.ndata['h'] = torch.cat([ent_embeds, time_embeds], dim=-1)
        if self.use_cuda:
            move_dgl_to_cuda(batched_graph)
        node_sizes = [len(g.nodes()) for g in graph_train_list]
        enc_ent_mean_graph = self.ent_encoder(batched_graph, reverse=False)
        ent_enc_embeds = enc_ent_mean_graph.ndata['h']
        per_graph_ent_embeds = ent_enc_embeds.split(node_sizes)
        return per_graph_ent_embeds
Ejemplo n.º 3
0
    def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False):
        if val:
            sampled_graph_list = graph_train_list
        else:
            sampled_graph_list = []
            for g in graph_train_list:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                total_idx = np.random.choice(np.arange(src.shape[0]),
                                             size=int(0.5 * src.shape[0]),
                                             replace=False)
                sg = g.edge_subgraph(total_idx, preserve_nodes=True)
                node_norm = comp_deg_norm(sg)
                sg.ndata.update({
                    'id': g.ndata['id'],
                    'norm': torch.from_numpy(node_norm).view(-1, 1)
                })
                sg.edata['norm'] = node_norm_to_edge_norm(
                    sg,
                    torch.from_numpy(node_norm).view(-1, 1))
                sg.edata['type_s'] = rel[total_idx]
                sg.ids = g.ids
                sampled_graph_list.append(sg)

        # time_embeds = []
        # for t, g in zip(t_list, graph_train_list):
        #     temp_ent_embeds = torch.sin(t * self.w_ent_embeds[g.ndata['id']].view(-1, self.embed_size) +
        #                   self.b_ent_embeds[g.ndata['id']].view(-1, self.embed_size))
        #     time_embeds.append(temp_ent_embeds)

        ent_embeds = []
        for t, g in zip(t_list, graph_train_list):
            static_ent_embeds = self.ent_embeds[g.ndata['id']].view(
                -1, self.embed_size)
            ones = static_ent_embeds.new_ones(static_ent_embeds.shape[0],
                                              self.static_embed_size)
            temp_ent_embeds = torch.sin(t * self.w_temp_ent_embeds[
                g.ndata['id']].view(-1, self.temporal_embed_size) +
                                        self.b_temp_ent_embeds[g.ndata['id']].
                                        view(-1, self.temporal_embed_size))

            ent_embeds.append(static_ent_embeds * torch.cat(
                (ones, temp_ent_embeds), dim=-1))

        batched_graph = dgl.batch(sampled_graph_list)
        batched_graph.ndata['h'] = torch.cat(ent_embeds, dim=0)
        if self.use_cuda:
            move_dgl_to_cuda(batched_graph)
        node_sizes = [len(g.nodes()) for g in graph_train_list]
        enc_ent_mean_graph = self.ent_encoder(batched_graph)
        ent_enc_embeds = enc_ent_mean_graph.ndata['h']
        per_graph_ent_embeds = ent_enc_embeds.split(node_sizes)

        return per_graph_ent_embeds
Ejemplo n.º 4
0
    def get_per_graph_ent_embeds(self,
                                 g_batched_list_t,
                                 node_sizes,
                                 first_prev_graph_embeds,
                                 second_prev_graph_embeds,
                                 val=False):
        if val:
            sampled_graph_list = g_batched_list_t
        else:
            sampled_graph_list = []
            for g in g_batched_list_t:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                half_num_nodes = int(src.shape[0] / 2)
                graph_split_ids = np.random.choice(np.arange(half_num_nodes),
                                                   size=int(0.5 *
                                                            half_num_nodes),
                                                   replace=False)
                graph_split_rev_ids = graph_split_ids + half_num_nodes

                sg = g.edge_subgraph(np.concatenate(
                    (graph_split_ids, graph_split_rev_ids)),
                                     preserve_nodes=True)
                node_norm = comp_deg_norm(sg)
                sg.ndata.update({
                    'id': g.ndata['id'],
                    'norm': torch.from_numpy(node_norm).view(-1, 1)
                })
                sg.edata['norm'] = node_norm_to_edge_norm(
                    sg,
                    torch.from_numpy(node_norm).view(-1, 1))
                sg.edata['type_s'] = rel[np.concatenate(
                    (graph_split_ids, graph_split_rev_ids))]
                sg.ids = g.ids
                sampled_graph_list.append(sg)

        batched_graph = dgl.batch(sampled_graph_list)
        batched_graph.ndata['h'] = self.ent_embeds[
            batched_graph.ndata['id']].view(-1, self.embed_size)

        if self.use_cuda:
            move_dgl_to_cuda(batched_graph)
        first_layer_graph, second_layer_graph = self.ent_encoder(
            batched_graph, first_prev_graph_embeds, second_prev_graph_embeds)

        first_layer_embeds = first_layer_graph.ndata['h']
        second_layer_embeds = second_layer_graph.ndata['h']
        return first_layer_embeds.split(node_sizes), second_layer_embeds.split(
            node_sizes)
Ejemplo n.º 5
0
    def get_per_graph_ent_embeds(self,
                                 g_batched_list_t,
                                 cur_h,
                                 node_sizes,
                                 val=False):
        if val:
            sampled_graph_list = g_batched_list_t
        else:
            sampled_graph_list = []
            for g in g_batched_list_t:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                half_num_nodes = int(src.shape[0] / 2)
                graph_split_ids = np.random.choice(np.arange(half_num_nodes),
                                                   size=int(0.5 *
                                                            half_num_nodes),
                                                   replace=False)
                graph_split_rev_ids = graph_split_ids + half_num_nodes

                sg = g.edge_subgraph(np.concatenate(
                    (graph_split_ids, graph_split_rev_ids)),
                                     preserve_nodes=True)
                norm = comp_deg_norm(sg)
                sg.ndata.update({
                    'id': g.ndata['id'],
                    'norm': torch.from_numpy(norm).view(-1, 1)
                })
                sg.edata['type_s'] = rel[np.concatenate(
                    (graph_split_ids, graph_split_rev_ids))]
                sg.ids = g.ids
                sampled_graph_list.append(sg)

        batched_graph = dgl.batch(sampled_graph_list)
        expanded_h = torch.cat([
            cur_h[i].unsqueeze(0).expand(size, self.embed_size)
            for i, size in enumerate(node_sizes)
        ],
                               dim=0)

        ent_embeds = self.ent_embeds[batched_graph.ndata['id']].view(
            -1, self.embed_size)

        batched_graph.ndata['h'] = torch.cat([ent_embeds, expanded_h], dim=-1)
        if self.use_cuda:
            move_dgl_to_cuda(batched_graph)
        enc_ent_mean_graph = self.ent_encoder(batched_graph, reverse=False)
        ent_enc_embeds = enc_ent_mean_graph.ndata['h']
        per_graph_ent_embeds = ent_enc_embeds.split(node_sizes)
        return per_graph_ent_embeds
Ejemplo n.º 6
0
    def get_batch_graph_embeds(self, g_batched_list_t, full, rate):
        if full:
            sampled_graph_list = g_batched_list_t
        else:
            sampled_graph_list = []
            for g in g_batched_list_t:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                total_idx = np.random.choice(np.arange(src.shape[0]), size=int(rate * src.shape[0]), replace=False)
                sg = g.edge_subgraph(total_idx, preserve_nodes=True)
                node_norm = comp_deg_norm(sg)
                sg.ndata.update({'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1)})
                sg.edata['norm'] = node_norm_to_edge_norm(sg, torch.from_numpy(node_norm).view(-1, 1))
                sg.edata['type_s'] = rel[total_idx]
                sg.ids = g.ids
                sampled_graph_list.append(sg)

        batched_graph = dgl.batch(sampled_graph_list)
        batched_graph.ndata['h'] = self.ent_embeds[batched_graph.ndata['id']].view(-1, self.embed_size)
        return batched_graph
Ejemplo n.º 7
0
 def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False):
     if val:
         sampled_graph_list = graph_train_list
     else:
         # TODO: modify half_num_nodes
         sampled_graph_list = []
         for g in graph_train_list:
             src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
             half_num_nodes = int(src.shape[0] / 2)
             # graph_split_ids = np.random.choice(np.arange(half_num_nodes),
             #                                    size=int(0.5 * half_num_nodes), replace=False)
             # graph_split_rev_ids = graph_split_ids + half_num_nodes
             # sg = g.edge_subgraph(np.concatenate((graph_split_ids, graph_split_rev_ids)), preserve_nodes=True)
             total_idx = np.random.choice(np.arange(src.shape[0]),
                                          size=int(0.5 * src.shape[0]),
                                          replace=False)
             sg = g.edge_subgraph(total_idx, preserve_nodes=True)
             node_norm = comp_deg_norm(sg)
             sg.ndata.update({
                 'id': g.ndata['id'],
                 'norm': torch.from_numpy(node_norm).view(-1, 1)
             })
             sg.edata['norm'] = node_norm_to_edge_norm(
                 sg,
                 torch.from_numpy(node_norm).view(-1, 1))
             sg.edata['type_s'] = rel[total_idx]
             sg.ids = g.ids
             sampled_graph_list.append(sg)
     batched_graph = dgl.batch(sampled_graph_list)
     batched_graph.ndata['h'] = self.ent_embeds[
         batched_graph.ndata['id']].view(-1, self.embed_size)
     if self.use_cuda:
         move_dgl_to_cuda(batched_graph)
     node_sizes = [len(g.nodes()) for g in graph_train_list]
     enc_ent_mean_graph = self.ent_encoder(batched_graph, t_list,
                                           node_sizes)
     ent_enc_embeds = enc_ent_mean_graph.ndata['h']
     per_graph_ent_embeds = ent_enc_embeds.split(node_sizes)
     return per_graph_ent_embeds
Ejemplo n.º 8
0
 def sample_subgraph(self, cur_time, target_time):
     # sampled_graph_list = []
     # upper = target_time if not self.future else min(self.max_time_step, target_time + self.train_seq_len)
     # for cur_time in range(max(0, target_time - self.train_seq_len + 1), upper):
     cur_g = self.graph_dict_train[cur_time]
     src, rel, dst = cur_g.edges()[0], cur_g.edata['type_s'], cur_g.edges(
     )[1]
     drop_rates = self.drop_rate_cache[target_time][cur_time]
     # pdb.set_trace()
     mask = torch.bernoulli(1 - torch.tensor(drop_rates)) == 1
     sampled_idx = torch.arange(src.shape[0])[mask]
     sg = cur_g.edge_subgraph(sampled_idx, preserve_nodes=True)
     node_norm = comp_deg_norm(sg)
     sg.ndata.update({
         'id': cur_g.ndata['id'],
         'norm': torch.from_numpy(node_norm).view(-1, 1)
     })
     sg.edata['norm'] = node_norm_to_edge_norm(
         sg,
         torch.from_numpy(node_norm).view(-1, 1))
     sg.edata['type_s'] = rel[sampled_idx]
     sg.ids = cur_g.ids
     return sg
Ejemplo n.º 9
0
    def get_per_graph_ent_embeds(self,
                                 g_batched_list_t,
                                 time_batched_list_t,
                                 node_sizes,
                                 time_diff_tensor,
                                 first_prev_graph_embeds,
                                 second_prev_graph_embeds,
                                 val=False):
        if val:
            sampled_graph_list = g_batched_list_t
        else:
            sampled_graph_list = []
            for g in g_batched_list_t:
                src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1]
                half_num_nodes = int(src.shape[0] / 2)
                # graph_split_ids = np.random.choice(np.arange(half_num_nodes), size=int(0.5 * src.shape[0]), replace=False)
                # graph_split_rev_ids = graph_split_ids + half_num_nodes
                # total_idx = np.concatenate((graph_split_ids, graph_split_rev_ids))
                total_idx = np.random.choice(np.arange(src.shape[0]),
                                             size=int(0.5 * src.shape[0]),
                                             replace=False)
                sg = g.edge_subgraph(total_idx, preserve_nodes=True)
                node_norm = comp_deg_norm(sg)
                sg.ndata.update({
                    'id': g.ndata['id'],
                    'norm': torch.from_numpy(node_norm).view(-1, 1)
                })
                sg.edata['norm'] = node_norm_to_edge_norm(
                    sg,
                    torch.from_numpy(node_norm).view(-1, 1))
                sg.edata['type_s'] = rel[total_idx]
                sg.ids = g.ids
                sampled_graph_list.append(sg)

        ent_embeds = []
        for t, g in zip(time_batched_list_t, g_batched_list_t):
            static_ent_embeds = self.ent_embeds[g.ndata['id']].view(
                -1, self.embed_size)
            ones = static_ent_embeds.new_ones(static_ent_embeds.shape[0],
                                              self.static_embed_size)
            temp_ent_embeds = torch.sin(t * self.w_temp_ent_embeds[
                g.ndata['id']].view(-1, self.temporal_embed_size) +
                                        self.b_temp_ent_embeds[g.ndata['id']].
                                        view(-1, self.temporal_embed_size))

            ent_embeds.append(static_ent_embeds * torch.cat(
                (ones, temp_ent_embeds), dim=-1))

        batched_graph = dgl.batch(sampled_graph_list)
        batched_graph.ndata['h'] = torch.cat(ent_embeds, dim=0)

        if self.use_cuda:
            move_dgl_to_cuda(batched_graph)
        first_layer_graph, second_layer_graph = self.ent_encoder(
            batched_graph, first_prev_graph_embeds, second_prev_graph_embeds,
            time_diff_tensor)

        first_layer_embeds = first_layer_graph.ndata['h']
        second_layer_embeds = second_layer_graph.ndata['h']
        return first_layer_embeds.split(node_sizes), second_layer_embeds.split(
            node_sizes)
Ejemplo n.º 10
0
def get_train_val_test_graph_at_t(triples, num_rels):
    train_triples, val_triples, test_triples = \
        np.array(triples['train']), np.array(triples['valid']), np.array(triples['test'])
    try:
        total_triples = np.concatenate(
            [train_triples, val_triples, test_triples], axis=0)
    except:
        # import pdb; pdb.set_trace()
        if test_triples.shape[0] == 0 and val_triples.shape[0] == 0:
            total_triples = train_triples
        elif test_triples.shape[0] == 0:
            total_triples = np.concatenate([train_triples, val_triples],
                                           axis=0)
        elif val_triples.shape[0] == 0:
            total_triples = np.concatenate([train_triples, test_triples],
                                           axis=0)

    src_total, rel_total, dst_total = total_triples.transpose()  # node ids
    # g.nodes() = len(uniq_v), uniq_v are the idx of nodes
    # edges: uniq_v[edges] = np.concat((src, dst)), mapping from (0, len(nodes)) to the original node idx
    uniq_v, edges = np.unique((src_total, dst_total), return_inverse=True)
    src, dst = np.reshape(edges, (2, -1))

    g_train = dgl.DGLGraph()
    g_val = dgl.DGLGraph()
    g_test = dgl.DGLGraph()

    # for training, add reverse tuples (o, r-1, s); not for val and test graphs
    src_train, rel_train, dst_train = src[:len(train_triples)], rel_total[:len(
        train_triples)], dst[:len(train_triples)]

    src_val, rel_val, dst_val = src[len(train_triples): len(train_triples) + len(val_triples)], \
                                rel_total[len(train_triples): len(train_triples) + len(val_triples)], \
                                dst[len(train_triples): len(train_triples) + len(val_triples)]

    src_test, rel_test, dst_test = src[len(train_triples) + len(val_triples):], \
                                   rel_total[len(train_triples) + len(val_triples):], \
                                   dst[len(train_triples) + len(val_triples):]

    add_reverse = False
    if add_reverse:
        src_train, dst_train = np.concatenate(
            (src_train, dst_train)), np.concatenate((dst_train, src_train))
        g_train.add_nodes(len(uniq_v))
        g_train.add_edges(src_train, dst_train)
        norm = comp_deg_norm(g_train)

        rel_o = np.concatenate((rel_train + num_rels, rel_train))
        rel_s = np.concatenate((rel_train, rel_train + num_rels))

        g_train.ndata.update({
            'id': torch.from_numpy(uniq_v).long().view(-1, 1),
            'norm': norm.view(-1, 1)
        })
        g_train.edata['type_s'] = torch.LongTensor(rel_s)
        g_train.edata['type_o'] = torch.LongTensor(rel_o)
        g_train.ids = {}
        in_graph_idx = 0
        for id in uniq_v:
            g_train.ids[id] = in_graph_idx
            in_graph_idx += 1

        g_list, src_list, rel_list, dst_list = [g_test, g_val], [
            src_test, src_val
        ], [rel_test, rel_val], [dst_test, dst_val]
    else:
        g_list, src_list, rel_list, dst_list = [g_train, g_test, g_val], [src_train, src_test, src_val], \
                                               [rel_train, rel_test, rel_val], [dst_train, dst_test, dst_val]

    for graph, cur_src, cur_rel, cur_dst in zip(g_list, src_list, rel_list,
                                                dst_list):
        graph.add_nodes(len(uniq_v))
        # shuffle tails
        # rand_obj = torch.randperm(len(uniq_v))[:cur_dst.shape[0]]
        # rand_sub = torch.randperm(len(uniq_v))[cur_dst.shape[0]:2 * cur_dst.shape[0]]
        # shuff_dst = graph.nodes()[rand_obj]
        # shuff_src = graph.nodes()[rand_sub]
        # graph.add_edges(shuff_src, shuff_dst)
        graph.add_edges(cur_src, cur_dst)
        node_norm = comp_deg_norm(graph)
        graph.ndata.update({
            'id': torch.from_numpy(uniq_v).long().view(-1, 1),
            'norm': torch.from_numpy(node_norm).view(-1, 1)
        })
        # import pdb; pdb.set_trace()
        graph.edata['norm'] = node_norm_to_edge_norm(
            graph,
            torch.from_numpy(node_norm).view(-1, 1))
        graph.edata['type_s'] = torch.LongTensor(cur_rel)
        graph.ids = {}
        in_graph_idx = 0
        # graph.ids: node id in the entire node set -> node index
        for id in uniq_v:
            graph.ids[in_graph_idx] = id
            in_graph_idx += 1
    return g_train, g_val, g_test