Beispiel #1
0
def subgraph_gen(hg, label_idx, neighbours=[10, 10]):
    """
    Subgraph sampling by graphsage_sampling
    """
    nt = hg.node_types
    drugs_idx = np.where(nt == 'drug')[0]
    layer1_neighs, layer1_eids = graphsage_sampling(hg, 
                                                drugs_idx, 
                                                num_neighbours = neighbours[0], 
                                                etype='dti')
    layer2_neighs, layer2_eids = graphsage_sampling(hg, 
                                                layer1_neighs, 
                                                num_neighbours = neighbours[1], 
                                                etype='ppi')
    sub_nodes = drugs_idx.tolist() + layer1_neighs + layer2_neighs
    sub_nodes_reidx = dict(zip(sub_nodes, range(len(sub_nodes))))

    label_mat = np.zeros((len(sub_nodes), len(sub_nodes))).astype('float32')
    for p in hg['dds'].edges.tolist():
        label_mat[sub_nodes_reidx[p[0]], sub_nodes_reidx[p[1]]] = label_idx[tuple(p)]
    
    sub_eids = {}
    sub_eids['dds'] = [(sub_nodes_reidx[src], sub_nodes_reidx[dst]) for (src, dst) in hg['dds'].edges.tolist()]
    sub_eids['dti'] = [(sub_nodes_reidx[src], sub_nodes_reidx[dst]) for (src, dst) in layer1_eids]
    sub_eids['dti'] += [(sub_nodes_reidx[dst], sub_nodes_reidx[src]) for (src, dst) in layer1_eids]

    sub_eids['ppi'] = [(sub_nodes_reidx[src], sub_nodes_reidx[dst]) for (src, dst) in layer2_eids]
    sub_eids['ppi'] += [(sub_nodes_reidx[dst], sub_nodes_reidx[src]) for (src, dst) in layer2_eids]

    sub_nodes_feat = hg['dds'].node_feat['features'][sub_nodes, :]
    sub_graph = pgl.HeterGraph(edges=sub_eids, num_nodes=len(sub_nodes), node_feat={'features': sub_nodes_feat})

    return {'sub_graph': (sub_graph, len(sub_nodes), sub_eids, sub_nodes_feat, label_mat)}
Beispiel #2
0
    def test_tensor(self):
        np.random.seed(1)
        dim = 4
        num_nodes = 15

        edges = {}
        # for test no successor
        edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
                        (3, 7), (3, 4), (3, 8)]
        edges['p2c'] = [(v, u) for u, v in edges['c2p']]
        edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
                        (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
        edges['a2p'] = [(v, u) for u, v in edges['p2a']]

        node_types = ['c' for _ in range(4)] + \
                     ['p' for _ in range(6)] + \
                     ['a' for _ in range(5)]
        node_types = [(i, t) for i, t in enumerate(node_types)]

        nfeat = {'nfeat': np.random.randn(num_nodes, dim)}
        efeat = {}
        for etype, _edges in edges.items():
            efeat[etype] = {'efeat': np.random.randn(len(_edges), dim)}

        hg = pgl.HeterGraph(edges=edges,
                            node_types=node_types,
                            node_feat=nfeat,
                            edge_feat=efeat)

        # inplace
        new_hg = hg.tensor(inplace=False)
        self.assertNotIsInstance(hg.node_feat['nfeat'], paddle.Tensor)
        self.assertNotIsInstance(hg.edge_feat['a2p']['efeat'], paddle.Tensor)

        self.assertIsInstance(new_hg.node_feat['nfeat'], paddle.Tensor)
        self.assertIsInstance(new_hg.edge_feat['a2p']['efeat'], paddle.Tensor)
        self.assertIsInstance(new_hg.num_nodes, paddle.Tensor)

        hg.tensor(inplace=True)
        self.assertIsInstance(hg.node_feat['nfeat'], paddle.Tensor)
        self.assertIsInstance(hg.edge_feat['a2p']['efeat'], paddle.Tensor)

        new_hg = hg.numpy(inplace=False)
        self.assertIsInstance(new_hg.node_feat['nfeat'], np.ndarray)
        self.assertIsInstance(new_hg.edge_feat['a2p']['efeat'], np.ndarray)
        self.assertIsInstance(hg.node_feat['nfeat'], paddle.Tensor)
        self.assertIsInstance(hg.edge_feat['a2p']['efeat'], paddle.Tensor)

        hg.numpy(inplace=True)
        self.assertIsInstance(hg.node_feat['nfeat'], np.ndarray)
        self.assertIsInstance(hg.edge_feat['a2p']['efeat'], np.ndarray)
Beispiel #3
0
def build_heter_graph(data_path, num_nodes):
    edges = {}
    idx = 0
    for filename in glob.glob(os.path.join(data_path, '*')):
        try:
            e = pd.read_csv(filename, header=None, sep="\t").values
            edges['etype%s' % idx] = e
            idx += 1
        except Exception as e:
            log.info(e)
            continue

    node_types = [(i, "n") for i in range(num_nodes)]

    hg = pgl.HeterGraph(edges=edges, node_types=node_types)

    return hg
Beispiel #4
0
    def test_build_hetergraph(self):
        np.random.seed(1)
        dim = 4
        num_nodes = 15

        edges = {}
        # for test no successor
        edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
                        (3, 7), (3, 4), (3, 8)]
        edges['p2c'] = [(v, u) for u, v in edges['c2p']]
        edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
                        (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
        edges['a2p'] = [(v, u) for u, v in edges['p2a']]

        node_types = ['c' for _ in range(4)] + \
                     ['p' for _ in range(6)] + \
                     ['a' for _ in range(5)]
        node_types = [(i, t) for i, t in enumerate(node_types)]

        nfeat = {'nfeat': np.random.randn(num_nodes, dim)}
        efeat = {}
        for etype, _edges in edges.items():
            efeat[etype] = {'efeat': np.random.randn(len(_edges), dim)}

        hg = pgl.HeterGraph(edges=edges,
                            node_types=node_types,
                            node_feat=nfeat,
                            edge_feat=efeat)

        self.assertFalse(hg.is_tensor())
        self.assertEqual(hg.indegree(5), [2])
        self.assertEqual(hg.outdegree(4), [7])
        self.assertEqual(hg.outdegree(4, 'c2p'), [0])
        self.assertEqual(hg.successor('c2p', [4]).tolist(), [[]])
        self.assertEqual(
            hg.predecessor('a2p', [4]).tolist(), [[10, 11, 12, 14, 13]])
        print()
        #  print(hg.predecessor('a2p', [4]))
        for batch in hg.node_batch_iter(3):
            break
Beispiel #5
0
    def test_build_tensor_hetergraph(self):
        np.random.seed(1)
        dim = 4
        num_nodes = paddle.to_tensor(15)

        edges = {}
        # for test no successor
        c2p = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6), (3, 7),
               (3, 4), (3, 8)]
        edges['c2p'] = paddle.to_tensor(np.array(c2p))
        p2c = [(v, u) for u, v in c2p]
        edges['p2c'] = paddle.to_tensor(np.array(p2c))

        p2a = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12), (6, 11),
               (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
        edges['p2a'] = paddle.to_tensor(np.array(p2a))
        a2p = [(v, u) for u, v in p2a]
        edges['a2p'] = paddle.to_tensor(np.array(a2p))

        node_types = ['c' for _ in range(4)] + \
                     ['p' for _ in range(6)] + \
                     ['a' for _ in range(5)]
        node_types = [(i, t) for i, t in enumerate(node_types)]

        hg = pgl.HeterGraph(edges=edges, node_types=node_types)

        self.assertTrue(hg.is_tensor())
        print()
        self.assertEqual(
            hg.indegree(paddle.to_tensor(5)).numpy(), np.array([2]))
        self.assertEqual(
            hg.indegree(paddle.to_tensor(5), 'c2p').numpy(), np.array([2]))
        self.assertEqual(
            hg.outdegree(paddle.to_tensor(4)).numpy(), np.array([7]))
        self.assertEqual(
            hg.outdegree(paddle.to_tensor(4), 'p2a').numpy(), np.array([5]))
        #  print(hg.outdegree(paddle.to_tensor(4)).numpy())
        for batch in hg.node_batch_iter(3, n_type='c'):
            break
Beispiel #6
0
    def test_dump_and_load(self):
        np.random.seed(1)
        dim = 4
        num_nodes = 15

        edges = {}
        # for test no successor
        edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
                        (3, 7), (3, 4), (3, 8)]
        edges['p2c'] = [(v, u) for u, v in edges['c2p']]
        edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
                        (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
        edges['a2p'] = [(v, u) for u, v in edges['p2a']]

        node_types = ['c' for _ in range(4)] + \
                     ['p' for _ in range(6)] + \
                     ['a' for _ in range(5)]
        node_types = [(i, t) for i, t in enumerate(node_types)]

        nfeat = {'nfeat': np.random.randn(num_nodes, dim)}
        efeat = {}
        for etype, _edges in edges.items():
            efeat[etype] = {'efeat': np.random.randn(len(_edges), dim)}

        hg = pgl.HeterGraph(edges=edges,
                            node_types=node_types,
                            node_feat=nfeat,
                            edge_feat=efeat)

        path = "./tmp"
        hg.dump(path, indegree=True)

        hg2 = pgl.HeterGraph.load(path)

        self.assertEqual(hg.num_nodes, hg2.num_nodes)

        del hg
        del hg2
        shutil.rmtree(path)
Beispiel #7
0
    def collate_fn(self, ddi_data, dti_data, ppi_data, features):
        """Aggregate all needed nodes into a Hetrogenous graph"""

        drug_feat = pd.read_csv(features, index_col=0)
        drug_feat = drug_feat[~drug_feat.index.duplicated()]
        drug_feat = drug_feat.fillna(0)
        drug_feat.replace([np.inf, -np.inf], 0, inplace=True)

        nm = StandardScaler()
        scaled_feat = pd.DataFrame(nm.fit_transform(drug_feat))
        scaled_feat = scaled_feat.fillna(0)
        scaled_feat.index = drug_feat.index

        edges = {'dds': [], 'dti': [], 'ppi': []}
        ddi_nn, ddi_nodes = num_nodes_stat(ddi_data)
        selected_drugs_feat = scaled_feat[scaled_feat.index.isin(ddi_nodes)]
        ddi_nodes = set(selected_drugs_feat.index)
        total_nodes = set()
        label = {}

        for d in ddi_data:
            if d['pair'][0] in ddi_nodes and d['pair'][1] in ddi_nodes:
                edges['dds'].append((d['pair'][0], d['pair'][1]))
                edges['dds'].append((d['pair'][1], d['pair'][0]))
                total_nodes.add(d['pair'][0])
                total_nodes.add(d['pair'][1])
                label[d['pair'][0], d['pair'][1]] = d['label']
                label[d['pair'][1], d['pair'][0]] = d['label']

        for d in dti_data:
            if d['pair'][0] in ddi_nodes:
                edges['dti'].append((d['pair'][0], d['pair'][1]))
                edges['dti'].append((d['pair'][1], d['pair'][0]))
                total_nodes.add(d['pair'][0])
                total_nodes.add(d['pair'][1])

        for d in ppi_data:
            edges['ppi'].append((d['pair'][0], d['pair'][1]))
            edges['ppi'].append((d['pair'][1], d['pair'][0]))
            total_nodes.add(d['pair'][0])
            total_nodes.add(d['pair'][1])

        num_nodes = len(total_nodes)
        nodes_dict = dict(zip(total_nodes, range(num_nodes)))
        node_feat = np.zeros((num_nodes, 2325)).astype('float32')
        selected_drugs_feat.index = [
            nodes_dict[x] for x in selected_drugs_feat.index
        ]

        for d in selected_drugs_feat.index:
            node_feat[d, :] = selected_drugs_feat.loc[d, :].values.astype(
                'float32')
        node_feats = {'features': node_feat}

        ek = {'dds': [], 'dti': [], 'ppi': []}
        for edge_type in edges.keys():
            for p in edges[edge_type]:
                p1, p2 = nodes_dict[p[0]], nodes_dict[p[1]]
                ek[edge_type].append((p1, p2))

        node_types = []
        for m in nodes_dict.keys():
            if m.startswith('CID'):
                node_types.append((nodes_dict[m], 'drug'))
            else:
                node_types.append((nodes_dict[m], 'protein'))

        hg = pgl.HeterGraph(num_nodes=num_nodes,
                            edges=ek,
                            node_types=node_types,
                            node_feat=node_feats)
        label_idx = {}
        for key in label.keys():
            label_idx[(nodes_dict[key[0]], nodes_dict[key[1]])] = label[key]

        return {'rt': (hg, nodes_dict, label, label_idx)}