Ejemplo n.º 1
0
def test_pinsage_sampling():
    def _test_sampler(g, sampler, ntype):
        neighbor_g = sampler(F.tensor([0, 2], dtype=F.int64))
        assert neighbor_g.ntypes == [ntype]
        u, v = neighbor_g.all_edges(form='uv', order='eid')
        uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist()))
        assert (1, 0) in uv or (0, 0) in uv
        assert (2, 2) in uv or (3, 2) in uv

    g = dgl.heterograph({
        ('item', 'bought-by', 'user'): [(0, 0), (0, 1), (1, 0), (1, 1), (2, 2),
                                        (2, 3), (3, 2), (3, 3)],
        ('user', 'bought', 'item'): [(0, 0), (1, 0), (0, 1), (1, 1), (2, 2),
                                     (3, 2), (2, 3), (3, 3)]
    })
    sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2)
    _test_sampler(g, sampler, 'item')
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
                                                     ['bought-by', 'bought'])
    _test_sampler(g, sampler, 'item')
    sampler = dgl.sampling.RandomWalkNeighborSampler(
        g, 4, 0.5, 3, 2, [('item', 'bought-by', 'user'),
                          ('user', 'bought', 'item')])
    _test_sampler(g, sampler, 'item')
    g = dgl.graph([(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2),
                   (3, 3)])
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2)
    _test_sampler(g, sampler, g.ntypes[0])
    g = dgl.heterograph({
        ('A', 'AB', 'B'): [(0, 1), (2, 3)],
        ('B', 'BC', 'C'): [(1, 2), (3, 1)],
        ('C', 'CA', 'A'): [(2, 0), (1, 2)]
    })
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
                                                     ['AB', 'BC', 'CA'])
    _test_sampler(g, sampler, 'A')
Ejemplo n.º 2
0
def test_sage_conv_bi_empty(idtype, aggre_type, out_dim):
    # Test the case for graphs without edges
    g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3}).to(F.ctx())
    g = g.astype(idtype).to(F.ctx())
    sage = nn.SAGEConv((3, 3), out_dim, 'gcn')
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    h = sage(g, feat)
    assert h.shape[-1] == out_dim
    assert h.shape[0] == 3
    for aggre_type in ['mean', 'pool', 'lstm']:
        sage = nn.SAGEConv((3, 1), out_dim, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        h = sage(g, feat)
        assert h.shape[-1] == out_dim
        assert h.shape[0] == 3
Ejemplo n.º 3
0
 def _generate_dec_graph(self, rating_pairs):
     ones = np.ones_like(rating_pairs[0])
     user_movie_ratings_coo = sp.coo_matrix(
         (ones, rating_pairs),
         shape=(self.num_user, self.num_movie),
         dtype=np.float32)
     g = dgl.bipartite_from_scipy(user_movie_ratings_coo,
                                  utype='_U',
                                  etype='_E',
                                  vtype='_V')
     return dgl.heterograph({('user', 'rate', 'movie'): g.edges()},
                            num_nodes_dict={
                                'user': self.num_user,
                                'movie': self.num_movie
                            })
Ejemplo n.º 4
0
    def __init__(self, datadir, batch_size=128):
        self.train_items, self.train_edge_dict = self.get_edge_list(
            os.path.join(datadir, 'train.txt'))
        self.test_items, self.test_edge_dict = self.get_edge_list(
            os.path.join(datadir, 'test.txt'))

        self.n_train = len(self.train_edge_dict[('user', 'ui', 'item')])
        self.n_test = len(self.test_edge_dict[('user', 'ui', 'item')])
        self.G = dgl.heterograph(self.train_edge_dict)

        self.n_items = self.G.number_of_nodes('item')
        self.n_users = self.G.number_of_nodes('user')
        self.users = self.G.nodes('user').detach().cpu().numpy().tolist()

        self.batch_size = batch_size
 def get_mol_complete_graph(self, idx, e_start, e_end, pairwise_start,
                            n_atoms):
     if self.prefetch_graphs:
         g = self.mol_complete_graphs[idx]
     else:
         edge_indices = self.edge_indices[:, e_start:e_end]
         pairwise_indices = self.dist_dict[
             'pairwise_indices'][:, pairwise_start:pairwise_start +
                                 n_atoms * (n_atoms - 1)]
         g = dgl.heterograph({
             ('atom', 'bond', 'atom'): (edge_indices[0], edge_indices[1]),
             ('atom', 'complete', 'atom'):
             (pairwise_indices[0], pairwise_indices[1])
         })
     return g
Ejemplo n.º 6
0
def create_random_hetero():
    num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
    etypes = [('n1', 'r1', 'n2'),
              ('n1', 'r2', 'n3'),
              ('n2', 'r3', 'n3')]
    edges = {}
    for etype in etypes:
        src_ntype, _, dst_ntype = etype
        arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo',
                          random_state=100)
        edges[etype] = (arr.row, arr.col)
    g = dgl.heterograph(edges, num_nodes)
    g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1)
    g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1)
    return g
Ejemplo n.º 7
0
def _create_heterogeneous():
    edges = {}
    for utype, etype, vtype in [('A', 'AA', 'A'), ('A', 'AB', 'B')]:
        s = torch.randint(0, 200, (1000,), device=F.ctx())
        d = torch.randint(0, 200, (1000,), device=F.ctx())
        edges[utype, etype, vtype] = (s, d)
        edges[vtype, 'rev-' + etype, utype] = (d, s)
    g = dgl.heterograph(edges, num_nodes_dict={'A': 200, 'B': 200})
    reverse_etypes = {'AA': 'rev-AA', 'AB': 'rev-AB', 'rev-AA': 'AA', 'rev-AB': 'AB'}
    always_exclude = {
        'AA': torch.randint(0, 1000, (50,), device=F.ctx()),
        'AB': torch.randint(0, 1000, (50,), device=F.ctx())}
    seed_edges = {
        'AA': torch.arange(0, 1000, device=F.ctx()),
        'AB': torch.arange(0, 1000, device=F.ctx())}
    return g, reverse_etypes, always_exclude, seed_edges
Ejemplo n.º 8
0
def create_test_heterograph2():
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)
    develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')

    g = dgl.heterograph({
        ('user', 'follows', 'user'): [(0, 1), (1, 2)],
        ('user', 'plays', 'game'): plays_spmat,
        ('user', 'wishes', 'game'): wishes_nx,
        ('developer', 'develops', 'game'): develops_g,
        })
    return g
Ejemplo n.º 9
0
def heterograph0():
    g = dgl.heterograph({
        ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
        ('developer', 'develops', 'game'): ([0, 1], [0, 1])
    })
    g.nodes['user'].data['h'] = F.copy_to(
        F.randn((g.number_of_nodes('user'), 3)), F.cpu())
    g.nodes['game'].data['h'] = F.copy_to(
        F.randn((g.number_of_nodes('game'), 2)), F.cpu())
    g.nodes['developer'].data['h'] = F.copy_to(
        F.randn((g.number_of_nodes('developer'), 3)), F.cpu())
    g.edges['plays'].data['h'] = F.copy_to(
        F.randn((g.number_of_edges('plays'), 1)), F.cpu())
    g.edges['develops'].data['h'] = F.copy_to(
        F.randn((g.number_of_edges('develops'), 5)), F.cpu())
    return g
Ejemplo n.º 10
0
def create_test_heterograph_large(idtype):

    src = np.random.randint(0, 50, 2500)
    dst = np.random.randint(0, 50, 2500)
    g = dgl.heterograph(
        {
            ('user', 'follows', 'user'): (src, dst),
            ('user', 'plays', 'game'): (src, dst),
            ('user', 'wishes', 'game'): (src, dst),
            ('developer', 'develops', 'game'): (src, dst),
        },
        idtype=idtype,
        device=F.ctx())
    assert g.idtype == idtype
    assert g.device == F.ctx()
    return g
Ejemplo n.º 11
0
    def construct_negative_graph(self,):

        neg_srcdst = self.negative_sampler(self.hg, self.train_eid_dict)
        if not isinstance(neg_srcdst, Mapping):
            assert len(self.hg.etypes) == 1, \
                'graph has multiple or no edge types; '\
                'please return a dict in negative sampler.'
            neg_srcdst = {self.hg.canonical_etypes[0]: neg_srcdst}
        # Get dtype from a tuple of tensors
        #dtype = F.dtype(list(neg_srcdst.values())[0][0])
        neg_edges = {
            etype: neg_srcdst.get(etype, (th.tensor([]), th.tensor([])))
            for etype in self.hg.canonical_etypes}
        neg_pair_graph = dgl.heterograph(
            neg_edges, {ntype: self.hg.number_of_nodes(ntype) for ntype in self.hg.ntypes})
        return neg_pair_graph
Ejemplo n.º 12
0
def create_random_hetero():
    num_nodes = {'n1': 1010, 'n2': 1000, 'n3': 1020}
    etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')]
    edges = {}
    for etype in etypes:
        src_ntype, _, dst_ntype = etype
        arr = spsp.random(num_nodes[src_ntype],
                          num_nodes[dst_ntype],
                          density=0.001,
                          format='coo',
                          random_state=100)
        edges[etype] = (arr.row, arr.col)
    g = dgl.heterograph(edges, num_nodes)
    g.nodes['n1'].data['feat'] = F.ones((g.number_of_nodes('n1'), 10),
                                        F.float32, F.cpu())
    return g
Ejemplo n.º 13
0
def retHeterographProdCat(df):
    dictIDsMASTER = {}

    #Umiesczanie wszytskich ID w słowniku
    for x in range(0, 6):
        IDs = retIDsOneColumnATaTime(df[x], 0)
        dictIDsMASTER.update(IDs)
    dictIDsMASTER.update(retIDsOneColumnATaTime(df[6], 2))  # Also IDs

    #Problme polega na tym, że gdy parsuje podwójnia dane z csv to jedna kolumna sie wysrywa w momencie gdy w nbastepnej nie ma wartosci
    #Zrobic oddzielnie parsowanie samych kol i podkol i potem osttamnia kolumna -> item

    mappedRels = []
    for x in range(0, 7):
        mappedRels.append(retMappedRelationCategories(df[x], dictIDsMASTER))

    #Funkcja mapujące relacje prod_catx -> productID
    dict2 = {}
    prodCatsDone = []
    for x, y in df[6].iterrows():
        catName = y[0][:12]
        if catName in prodCatsDone:  #Juz prodcat ktorystam caly zrobiony wiec mozna pominac i szukac nastepnego
            continue
        relation = (catName, 'has_product', 'prod_id')
        ListOfSameCats = []
        for x2, y2 in df[6].iterrows():
            if y2[0][:12] == catName:
                IDofCat = dictIDsMASTER[y2[0]]
                IDofProd = dictIDsMASTER[y2[2]]
                ListOfSameCats.append((IDofCat, IDofProd))
        d = {relation: ListOfSameCats}
        dict2.update(d)
        prodCatsDone.append(catName)

    dataDict = {
        ('<prod_cat_1>', 'has_category12', '<prod_cat_2>'): mappedRels[0],
        ('<prod_cat_2>', 'has_category23', '<prod_cat_3>'): mappedRels[1],
        ('<prod_cat_3>', 'has_category34', '<prod_cat_4>'): mappedRels[2],
        ('<prod_cat_4>', 'has_category45', '<prod_cat_5>'): mappedRels[3],
        ('<prod_cat_5>', 'has_category56', '<prod_cat_6>'): mappedRels[4],
        ('<prod_cat_6>', 'has_category67', '<prod_cat_7>'): mappedRels[5]
    }

    dataDict.update(dict2)

    g = dgl.heterograph(dataDict)
    return g
Ejemplo n.º 14
0
def read_csv_heterograph_dgl(raw_dir, add_inverse_edge = False, additional_node_files = [], additional_edge_files = []):

    graph_list = read_csv_heterograph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files)
    dgl_graph_list = []

    print('Converting graphs into DGL objects...')

    for graph in tqdm(graph_list):
        g_dict = {}

        # add edge connectivity
        for triplet, edge_index in graph["edge_index_dict"].items():
            edge_tuple = [(i, j) for i, j in zip(graph["edge_index_dict"][triplet][0], graph["edge_index_dict"][triplet][1])]
            g_dict[triplet] = edge_tuple

        dgl_hetero_graph = dgl.heterograph(g_dict)

        if graph["edge_feat_dict"] is not None:
            for triplet in graph["edge_feat_dict"].keys():
                dgl_hetero_graph.edges[triplet].data["feat"] = torch.from_numpy(graph["edge_feat_dict"][triplet])

        if graph["node_feat_dict"] is not None:
            for nodetype in graph["node_feat_dict"].keys():
                dgl_hetero_graph.nodes[nodetype].data["feat"] = torch.from_numpy(graph["node_feat_dict"][nodetype])

        for key in additional_node_files:
            if 'node_' not in key:
                feat_name = 'node_' + key
            else:
                feat_name = key

            for nodetype in graph[feat_name].keys():
                dgl_hetero_graph.nodes[nodetype].data[feat_name] = torch.from_numpy(graph[feat_name][nodetype])

        for key in additional_edge_files:
            if 'edge_' not in key:
                feat_name = 'edge_' + key
            else:
                feat_name = key

            for triplet in graph[feat_name].keys():
                dgl_hetero_graph.edges[triplet].data[feat_name] = torch.from_numpy(graph[feat_name][triplet])

        dgl_graph_list.append(dgl_hetero_graph)


    return dgl_graph_list
Ejemplo n.º 15
0
def test_node_dataloader(sampler_name, pin_graph):
    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
    if F.ctx() != F.cpu() and pin_graph:
        g1.create_formats_()
        g1.pin_memory_()
    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
    g1.ndata['label'] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu())

    for num_workers in [0, 1, 2]:
        sampler = {
            'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
            'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]),
            'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3])}[sampler_name]
        dataloader = dgl.dataloading.NodeDataLoader(
            g1, g1.nodes(), sampler, device=F.ctx(),
            batch_size=g1.num_nodes(),
            num_workers=num_workers)
        for input_nodes, output_nodes, blocks in dataloader:
            _check_device(input_nodes)
            _check_device(output_nodes)
            _check_device(blocks)

    g2 = dgl.heterograph({
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
    })
    for ntype in g2.ntypes:
        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
    batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)
    sampler = {
        'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
        'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2),
        'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3])}[sampler_name]

    dataloader = dgl.dataloading.NodeDataLoader(
        g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
        sampler, device=F.ctx(), batch_size=batch_size)
    assert isinstance(iter(dataloader), Iterator)
    for input_nodes, output_nodes, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(output_nodes)
        _check_device(blocks)

    if g1.is_pinned():
        g1.unpin_memory_()
Ejemplo n.º 16
0
def load_ogb(dataset):
    if dataset == 'ogbn-mag':
        dataset = DglNodePropPredDataset(name=dataset)
        split_idx = dataset.get_idx_split()
        train_idx = split_idx["train"]['paper']
        val_idx = split_idx["valid"]['paper']
        test_idx = split_idx["test"]['paper']
        hg_orig, labels = dataset[0]
        subgs = {}
        for etype in hg_orig.canonical_etypes:
            u, v = hg_orig.all_edges(etype=etype)
            subgs[etype] = (u, v)
            subgs[(etype[2], 'rev-' + etype[1], etype[0])] = (v, u)
        hg = dgl.heterograph(subgs)
        hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat']
        paper_labels = labels['paper'].squeeze()

        num_rels = len(hg.canonical_etypes)
        num_of_ntype = len(hg.ntypes)
        num_classes = dataset.num_classes
        category = 'paper'
        print('Number of relations: {}'.format(num_rels))
        print('Number of class: {}'.format(num_classes))
        print('Number of train: {}'.format(len(train_idx)))
        print('Number of valid: {}'.format(len(val_idx)))
        print('Number of test: {}'.format(len(test_idx)))

        # get target category id
        category_id = len(hg.ntypes)
        for i, ntype in enumerate(hg.ntypes):
            if ntype == category:
                category_id = i

        train_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool)
        train_mask[train_idx] = True
        val_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool)
        val_mask[val_idx] = True
        test_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool)
        test_mask[test_idx] = True
        hg.nodes['paper'].data['train_mask'] = train_mask
        hg.nodes['paper'].data['val_mask'] = val_mask
        hg.nodes['paper'].data['test_mask'] = test_mask

        hg.nodes['paper'].data['labels'] = paper_labels
        return hg
    else:
        raise ("Do not support other ogbn datasets.")
Ejemplo n.º 17
0
def test_remove_edges():
    def check(g1, etype, g, edges_removed):
        src, dst, eid = g.edges(etype=etype, form='all')
        src1, dst1 = g1.edges(etype=etype, order='eid')
        if etype is not None:
            eid1 = g1.edges[etype].data[dgl.EID]
        else:
            eid1 = g1.edata[dgl.EID]
        src1 = F.asnumpy(src1)
        dst1 = F.asnumpy(dst1)
        eid1 = F.asnumpy(eid1)
        src = F.asnumpy(src)
        dst = F.asnumpy(dst)
        eid = F.asnumpy(eid)
        sde_set = set(zip(src, dst, eid))

        for s, d, e in zip(src1, dst1, eid1):
            assert (s, d, e) in sde_set
        assert not np.isin(edges_removed, eid1).any()

    for fmt in ['coo', 'csr', 'csc']:
        for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]:
            g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)],
                          restrict_format=fmt)
            g1 = dgl.remove_edges(g, F.tensor(edges_to_remove))
            check(g1, None, g, edges_to_remove)

            g = dgl.graph(spsp.csr_matrix(
                ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)),
                          restrict_format=fmt)
            g1 = dgl.remove_edges(g, F.tensor(edges_to_remove))
            check(g1, None, g, edges_to_remove)

    g = dgl.heterograph({
        ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
        ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
        ('B', 'BA', 'A'): [(2, 3), (3, 2)]
    })
    g2 = dgl.remove_edges(g, {
        'AA': F.tensor([2]),
        'AB': F.tensor([3]),
        'BA': F.tensor([1])
    })
    check(g2, 'AA', g, [2])
    check(g2, 'AB', g, [3])
    check(g2, 'BA', g, [1])
Ejemplo n.º 18
0
    def process(self):
        authors, papers, confs, paper_author, paper_conf = self._read_raw_data()

        pa_p, pa_a = paper_author['pid'].to_list(), paper_author['aid'].to_list()
        pc_p, pc_c = paper_conf['pid'].to_list(), paper_conf['cid'].to_list()
        self.g = dgl.heterograph({
            ('paper', 'pa', 'author'): (pa_p, pa_a),
            ('author', 'ap', 'paper'): (pa_a, pa_p),
            ('paper', 'pc', 'conf'): (pc_p, pc_c),
            ('conf', 'cp', 'paper'): (pc_c, pc_p)
        })
        self.g.nodes['author'].data['label'] = torch.from_numpy(authors['label'].to_numpy())
        self.g.nodes['conf'].data['label'] = torch.from_numpy(confs['label'].to_numpy())

        self.author_names = authors['name'].tolist()
        self.paper_titles = papers['title'].tolist()
        self.conf_names = confs['name'].tolist()
Ejemplo n.º 19
0
def add_reverse_edges(g):
    """给异构图的每种边添加反向边,返回新的异构图

    :param g: DGLGraph 异构图
    :return: DGLGraph 添加反向边之后的异构图
    """
    data = {}
    for stype, etype, dtype in g.canonical_etypes:
        u, v = g.edges(etype=(stype, etype, dtype))
        data[(stype, etype, dtype)] = u, v
        data[(dtype, etype + '_rev', stype)] = v, u
    new_g = dgl.heterograph(data,
                            {ntype: g.num_nodes(ntype)
                             for ntype in g.ntypes})
    node_frames = extract_node_subframes(g, None)
    set_new_frames(new_g, node_frames=node_frames)
    return new_g
Ejemplo n.º 20
0
 def _generate_dec_graph(self, rating_pairs):
     ones = np.ones_like(
         rating_pairs[0]
     )  # add one indicating edge for every (user, item) pair
     user_item_ratings_coo = sp.coo_matrix(  # create coo matrix for graph conversion
         (ones, rating_pairs),
         shape=(self.num_user, self.num_item),
         dtype=np.float32)
     g = dgl.bipartite_from_scipy(user_item_ratings_coo,
                                  utype='_U',
                                  etype='_E',
                                  vtype='_V')
     return dgl.heterograph({('user', 'rate', 'item'): g.edges()},
                            num_nodes_dict={
                                'user': self.num_user,
                                'item': self.num_item
                            })
Ejemplo n.º 21
0
def get_hetero_graph(glass_tc_pos_path: str,
                     control_tc_pos_path: str,
                     t2t_threshold: float = 850.0,
                     u2t_threshold: float = np.inf,
                     weight: float = (1.0, 1.0, 1.0)):
    dist_func = lambda u, v: np.sqrt(((u - v)**2 * weight).sum())
    POS_COLS = ['Position_x', 'Position_y', 'Position_z']

    df_glass_tc = pd.read_csv(glass_tc_pos_path)
    g_tc_pos = df_glass_tc[POS_COLS].to_numpy()

    df_control_tc = pd.read_csv(control_tc_pos_path)
    c_tc_pos = df_control_tc[POS_COLS].to_numpy()

    tc_pos = np.concatenate([g_tc_pos, c_tc_pos], axis=0)

    graph_data = dict()
    # construct 'tc' to 'tc' edges
    t2t_dist_mat = cdist(tc_pos, tc_pos, dist_func)
    u, v = torch.nonzero(torch.tensor(t2t_dist_mat <= t2t_threshold).bool(),
                         as_tuple=True)
    graph_data[t2t] = (u, v)

    # construct 'control' to 'tc' edges
    c2t_dist_mat = cdist(c_tc_pos, tc_pos, dist_func)
    u, v = torch.nonzero(torch.tensor(c2t_dist_mat <= u2t_threshold).bool(),
                         as_tuple=True)
    graph_data[u2t] = (u, v)

    g = dgl.heterograph(graph_data)

    # standardize positions
    scaler = MinMaxScaler()
    pos = np.concatenate([tc_pos, c_tc_pos], axis=0)
    pos_std = scaler.fit_transform(pos)
    g.nodes['tc'].data['position'] = torch.from_numpy(
        pos_std[:tc_pos.shape[0], :]).float()
    g.nodes['control'].data['position'] = torch.from_numpy(
        pos_std[tc_pos.shape[0]:, :]).float()

    # add binary indicator for noticing the node is glass tc or not.
    is_glass_tc = torch.ones(tc_pos.shape[0], 1)
    is_glass_tc[:g_tc_pos.shape[0], :] = 0
    g.nodes['tc'].data['is-glass-tc'] = is_glass_tc
    return g
Ejemplo n.º 22
0
def build_graph(relations_list, relations_data_list):
    relations_data_dic = {}
    i = 0
    for each in relations_list:
        relations_data_dic[each] = relations_data_list[i]
        i += 1
    graph = dgl.heterograph(relations_data_dic)

    print('Node types:', graph.ntypes)
    print('Edge types:', graph.etypes)
    print('Canonical edge types:', graph.canonical_etypes)
    for each in graph.canonical_etypes:
        print('graph number edges--' + str(each) + ':',
              graph.number_of_edges(each))
    for each in graph.ntypes:
        print('graph number nodes--' + str(each) + ':',
              graph.number_of_nodes(each))
    return graph
Ejemplo n.º 23
0
def create_test_heterograph_2(idtype):

    src = np.random.randint(0, 50, 25)
    dst = np.random.randint(0, 50, 25)
    src1 = np.random.randint(0, 25, 10)
    dst1 = np.random.randint(0, 25, 10)
    src2 = np.random.randint(0, 100, 1000)
    dst2 = np.random.randint(0, 100, 1000)
    g = dgl.heterograph({
        ('user', 'becomes', 'player'):  (src, dst),
        ('user', 'follows', 'user'):  (src, dst),
        ('user', 'plays', 'game'): (src, dst),
        ('user', 'wishes', 'game'): (src1, dst1),
        ('developer', 'develops', 'game'): (src2, dst2),
    }, idtype=idtype, device=F.ctx())
    assert g.idtype == idtype
    assert g.device == F.ctx()
    return g
Ejemplo n.º 24
0
    def giveGraphs(self, batch_size, voxel_pos):
        p2v = np.load("data/p2v_spec.npy", allow_pickle=True).tolist()
        p2v = [item for sublist in p2v for item in sublist]
        p2p = np.load("data/p2p.npy", allow_pickle=True).tolist()
        p2p = [item for sublist in p2p for item in sublist]
        v2v = np.load("data/v2v.npy", allow_pickle=True).tolist()
        v2v = [item for sublist in v2v for item in sublist]
        v2v_6 = np.load("data/v2v_6.npy", allow_pickle=True).tolist()
        v2v_6 = [item for sublist in v2v_6 for item in sublist]
        G_vox = dgl.graph(v2v)
        G_vox = dgl.add_self_loop(G_vox)

        graph_data = {('PMT', 'p2v', 'vox'): p2v, ('vox', 'v2v', 'vox'): v2v}
        g = dgl.heterograph(graph_data)
        g = dgl.to_homogeneous(g)
        g = dgl.add_self_loop(g)
        G = dgl.batch([g for i in range(batch_size)])
        return G, G_vox
Ejemplo n.º 25
0
 def _build_graph(self):
     ma, md = set(), set()
     for m, row in self.data.iterrows():
         d = self.directors.index(row['director_name'])
         md.add((m, d))
         for c in ('actor_1_name', 'actor_2_name', 'actor_3_name'):
             if row[c] in self.actors:
                 a = self.actors.index(row[c])
                 ma.add((m, a))
     ma, md = list(ma), list(md)
     ma_m, ma_a = [e[0] for e in ma], [e[1] for e in ma]
     md_m, md_d = [e[0] for e in md], [e[1] for e in md]
     return dgl.heterograph({
         ('movie', 'ma', 'actor'): (ma_m, ma_a),
         ('actor', 'am', 'movie'): (ma_a, ma_m),
         ('movie', 'md', 'director'): (md_m, md_d),
         ('director', 'dm', 'movie'): (md_d, md_m)
     })
Ejemplo n.º 26
0
def create_test_heterograph(idtype):
    # test heterograph from the docstring, plus a user -- wishes -- game relation
    # 3 users, 2 games, 2 developers
    # metagraph:
    #    ('user', 'follows', 'user'),
    #    ('user', 'plays', 'game'),
    #    ('user', 'wishes', 'game'),
    #    ('developer', 'develops', 'game')])

    g = dgl.heterograph({
        ('user', 'follows', 'user'):  ([0, 1, 2, 1], [0, 0, 1, 1]),
        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
        ('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]),
        ('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]),
    }, idtype=idtype, device=F.ctx())
    assert g.idtype == idtype
    assert g.device == F.ctx()
    return g
Ejemplo n.º 27
0
def test_sage_conv_bi2(idtype, aggre_type):
    # Test the case for graphs without edges
    g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3})
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    sage = nn.SAGEConv((3, 3), 2, 'gcn')
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    sage.initialize(ctx=ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 3
    for aggre_type in ['mean', 'pool']:
        sage = nn.SAGEConv((3, 1), 2, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        sage.initialize(ctx=ctx)
        h = sage(g, feat)
        assert h.shape[-1] == 2
        assert h.shape[0] == 3
Ejemplo n.º 28
0
def test_sort_with_tag_bipartite(idtype):
    num_nodes, num_adj, num_tags = 200, [20, 50], 5
    g = create_test_heterograph(num_nodes, num_adj, idtype=idtype)
    g = dgl.heterograph({('_U', '_E', '_V'): g.edges()})
    utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_U')))
    vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_V')))

    new_g = dgl.sort_csr_by_tag(g, vtag)
    old_csr = g.adjacency_matrix(scipy_fmt='csr')
    new_csr = new_g.adjacency_matrix(scipy_fmt='csr')
    assert (check_sort(new_csr, vtag, new_g.nodes['_U'].data['_TAG_OFFSET']))
    assert (not check_sort(old_csr, vtag))

    new_g = dgl.sort_csc_by_tag(g, utag)
    old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
    new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
    assert (check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET']))
    assert (not check_sort(old_csc, utag))
def construct_graph(edges, nodes, target_node_type):

    print("Getting relation graphs from the following edge lists : {} ".format(edges))
    edgelists, id_to_node = {}, {}
    for i, edge in enumerate(edges):
        edgelist, rev_edgelist, id_to_node, src, dst = parse_edgelist(edge, id_to_node, header=True)
        if src == target_node_type:
            src = 'target'
        if dst == target_node_type:
            dst = 'target'

        if src == 'target' and dst == 'target':
            print("Will add self loop for target later......")
        else:
            if (src, src + '<>' + dst, dst) in edgelists:
                edgelists[(src, src + '<>' + dst, dst)] = edgelists[(src, src + '<>' + dst, dst)] + edgelist
                edgelists[(dst, dst + '<>' + src, src)] = edgelists[(dst, dst + '<>' + src, src)] +rev_edgelist
                print("Append edges for {} from edgelist: {}".format(src + '<>' + dst, edge))
            else:
                edgelists[(src, src + '<>' + dst, dst)] = edgelist
                edgelists[(dst, dst + '<>' + src, src)] = rev_edgelist
                print("Read edges for {} from edgelist: {}".format(src + '<>' + dst, edge))

    # get features for target nodes
    features, new_nodes = get_features(id_to_node[target_node_type], nodes)
    print("Read in features for target nodes")

    # add self relation
    edgelists[('target', 'self_relation', 'target')] = [(t, t) for t in id_to_node[target_node_type].values()]

    g = dgl.heterograph(edgelists)
    print(
        "Constructed heterograph with the following metagraph structure: Node types {}, Edge types{}".format(
            g.ntypes, g.canonical_etypes))
    print("Number of nodes of type target : {}".format(g.number_of_nodes('target')))

    g.nodes['target'].data['features'] = th.from_numpy(features)

    target_id_to_node = id_to_node[target_node_type]
    id_to_node['target'] = target_id_to_node

    del id_to_node[target_node_type]

    return g, features, target_id_to_node, id_to_node
Ejemplo n.º 30
0
    def __init__(self,
                 all_click_train,
                 all_click_test,
                 click_qtime,
                 user_mapped_id,
                 item_mapped_id,
                 batch_size=128):

        self.mapped_train_df = self.map_id_df(all_click_train, user_mapped_id,
                                              item_mapped_id)
        self.mapped_test_df = self.map_id_df(all_click_test, user_mapped_id,
                                             item_mapped_id)

        self.mapped_qtime_df = pd.merge(left=click_qtime,
                                        right=user_mapped_id,
                                        left_on='user_id',
                                        right_on='org_id')
        self.mapped_qtime_df = self.mapped_qtime_df.drop(['org_id'], axis=1)
        self.mapped_qtime_df = self.mapped_qtime_df.rename(
            columns={'remap_id': 'mapped_user_id'})
        self.qtime_users = self.mapped_qtime_df['mapped_user_id'].tolist()

        self.train_items = self.get_users_items_dict(self.mapped_train_df)
        self.test_items = self.get_users_items_dict(self.mapped_test_df)

        self.train_edge_dict = self.get_edge_list(self.mapped_train_df)
        self.test_edge_dict = self.get_edge_list(self.mapped_test_df)

        self.n_train = len(self.train_edge_dict[('user', 'ui', 'item')])
        self.n_test = len(self.test_edge_dict[('user', 'ui', 'item')])

        self.mapped_userId_to_org_userId = dict(
            zip(user_mapped_id.remap_id, user_mapped_id.org_id))
        self.mapped_itemId_to_org_itemId = dict(
            zip(item_mapped_id.remap_id, item_mapped_id.org_id))

        self.G = dgl.heterograph(self.train_edge_dict)

        self.n_items = self.G.number_of_nodes('item')
        self.n_users = self.G.number_of_nodes('user')
        self.users = self.G.nodes('user').detach().cpu().numpy().tolist()
        self.items = self.G.nodes('item').detach().cpu().numpy().tolist()

        self.batch_size = batch_size