Beispiel #1
0
    def test_graph_norm(self):
        graph_list = []

        edges1 = [(0, 1), (1, 2)]
        num_nodes1 = 3
        g1 = pgl.Graph(edges=edges1, num_nodes=num_nodes1)
        graph_list.append(g1)

        edges2 = [(0, 2), (0, 3), (1, 2)]
        num_nodes2 = 4
        g2 = pgl.Graph(edges=edges2, num_nodes=num_nodes2)
        graph_list.append(g2)

        multi_graph = pgl.Graph.disjoint(graph_list)
        multi_graph.tensor()

        feat = np.repeat(np.arange(0, 7).reshape(-1, 1), 3,
                         axis=1).astype("float32")
        tensor_feat = paddle.to_tensor(feat, dtype="float32")

        feat[0:3] = feat[0:3] / np.sqrt(3)
        feat[3:] = feat[3:] / np.sqrt(4)

        norm_feat = F.graph_norm(multi_graph, tensor_feat)
        self.assertEqual(feat.tolist(), norm_feat.numpy().tolist())

        gn_layer = nn.GraphNorm()
        norm_feat = gn_layer(multi_graph, tensor_feat)

        self.assertEqual(feat.tolist(), norm_feat.numpy().tolist())
Beispiel #2
0
    def __call__(self, data_list):
        """
        Collate features about a sublist of graph data and return join_graph, 
        masked_node_indice and masked_node_labels.
        Args:
            data_list : the graph data in gen_features.for data in data_list,
            create node features and edge features according to pgl graph,and then 
            use graph wrapper to feed join graph, then the label can be arrayed to batch label.
        Returns:
            The batch data contains finetune label and valid,which are 
            collected from batch_label and batch_valid.  
        """
        atom_bond_graph_list = []
        bond_angle_graph_list = []
        label_list = []
        for data in data_list:
            ab_g = pgl.Graph(num_nodes=len(data[self.atom_names[0]]),
                             edges=data['edges'],
                             node_feat={
                                 name: data[name].reshape([-1, 1])
                                 for name in self.atom_names
                             },
                             edge_feat={
                                 name: data[name].reshape([-1, 1])
                                 for name in self.bond_names +
                                 self.bond_float_names
                             })
            ba_g = pgl.Graph(num_nodes=len(data['edges']),
                             edges=data['BondAngleGraph_edges'],
                             node_feat={},
                             edge_feat={
                                 name: data[name].reshape([-1, 1])
                                 for name in self.bond_angle_float_names
                             })
            atom_bond_graph_list.append(ab_g)
            bond_angle_graph_list.append(ba_g)
            if not self.is_inference:
                label_list.append(data['label'])

        atom_bond_graph = pgl.Graph.batch(atom_bond_graph_list)
        bond_angle_graph = pgl.Graph.batch(bond_angle_graph_list)
        # TODO: reshape due to pgl limitations on the shape
        self._flat_shapes(atom_bond_graph.node_feat)
        self._flat_shapes(atom_bond_graph.edge_feat)
        self._flat_shapes(bond_angle_graph.node_feat)
        self._flat_shapes(bond_angle_graph.edge_feat)

        if not self.is_inference:
            if self.task_type == 'class':
                labels = np.array(label_list)
                # label: -1 -> 0, 1 -> 1
                labels = ((labels + 1.0) / 2)
                valids = (labels != 0.5)
                return [atom_bond_graph, bond_angle_graph, valids, labels]
            else:
                labels = np.array(label_list, 'float32')
                return atom_bond_graph, bond_angle_graph, labels
        else:
            return atom_bond_graph, bond_angle_graph
Beispiel #3
0
    def prepareGraphData(self):
        self.buildUserItemEdges()
        self.buildUserUserEdges()

        info_graph = pgl.Graph(num_nodes=self.conf['num_users'] +
                               self.conf['num_items'],
                               edges=self.user_item_edges)
        soc_graph = pgl.Graph(num_nodes=self.conf['num_users'],
                              edges=self.user_user_edges)

        return info_graph, soc_graph
Beispiel #4
0
    def __init__(self, args, dataset):
        super(LightGCN, self).__init__()
        self.args = args
        self.dataset = dataset
        self.num_users = self.dataset.n_users
        self.num_items = self.dataset.m_items
        num_nodes = self.dataset.n_users + self.dataset.m_items
        self.latent_dim = self.args.recdim
        self.n_layers = self.args.n_layers
        self.lightgcn = LightGCN_Layer(self.n_layers)
        #         self.lightgcn = LightGCNonv(self.n_layers)
        self.embedding_user = nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.latent_dim)
        self.embedding_item = nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.latent_dim)
        emb_item_weight = np.random.normal(
            0, 0.1,
            self.embedding_item.weight.numpy().shape).astype(np.float32)
        emb_user_weight = np.random.normal(
            0, 0.1,
            self.embedding_user.weight.numpy().shape).astype(np.float32)
        self.embedding_item.weight.set_value(emb_item_weight)
        self.embedding_user.weight.set_value(emb_user_weight)

        self.f = nn.Sigmoid()
        edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64')
        self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges)
        self.lightgcn.train()
Beispiel #5
0
    def test_dump_tensor_load_numpy(self):
        path = './tmp'
        glist = []
        dim = 4
        num_nodes = 10
        edges = np.random.randint(low=1,
                                  high=num_nodes,
                                  size=[np.random.randint(low=2, high=10), 2])
        nfeat = np.random.randn(num_nodes, dim)
        efeat = np.random.randn(len(edges), dim)

        g = pgl.Graph(edges=edges,
                      num_nodes=num_nodes,
                      node_feat={'nfeat': nfeat},
                      edge_feat={'efeat': efeat})

        in_before = g.indegree()
        g.outdegree()
        g.tensor()

        # Merge Graph
        g.dump(path)
        g2 = pgl.Graph.load(path)
        in_after = g2.indegree()
        for a, b in zip(in_before, in_after):
            self.assertEqual(a, b)

        del g2
        del in_after
        import shutil
        shutil.rmtree(path)
Beispiel #6
0
    def test_disjoint_graph(self):
        glist = []
        dim = 4
        for i in range(5):

            num_nodes = np.random.randint(low=2, high=10)
            edges = np.random.randint(
                low=1,
                high=num_nodes,
                size=[np.random.randint(low=1, high=10), 2])
            nfeat = np.random.randn(num_nodes, dim)
            efeat = np.random.randn(len(edges), dim)

            g = pgl.Graph(edges=edges,
                          num_nodes=num_nodes,
                          node_feat={'nfeat': nfeat},
                          edge_feat={'efeat': efeat})
            glist.append(g)
        # Merge Graph
        multi_graph = pgl.Graph.disjoint(glist)
        # Check Graph Index
        node_index = [np.ones(g.num_nodes) * n for n, g in enumerate(glist)]
        edge_index = [np.ones(g.num_edges) * n for n, g in enumerate(glist)]
        node_index = np.concatenate(node_index)
        edge_index = np.concatenate(edge_index)
        self.assertTrue(np.all(node_index == multi_graph.graph_node_id))
        self.assertTrue(np.all(edge_index == multi_graph.graph_edge_id))

        multi_graph.tensor()
        self.assertTrue(
            np.all(node_index == multi_graph.graph_node_id.numpy()))
        self.assertTrue(
            np.all(edge_index == multi_graph.graph_edge_id.numpy()))
    def lod_prot_chain(self, prot_chain_name):
        if prot_chain_name in self.cache:
            return self.cache[prot_chain_name]
        label_idx = self.labels[prot_chain_name].astype("int64")
        labels = np.zeros(self.n_labels)
        labels[label_idx] = 1.0

        prot_chain = np.load(
            os.path.join(self.prot_chain_data_dir + f"/{prot_chain_name}.npz"),
            allow_pickle=True,
        )

        seq = prot_chain["seq"]
        edges = prot_chain["n2n_edges"]
        num_nodes = len(seq)

        n_self_loops = np.sum(edges[:, 0] == edges[:, 1])
        if n_self_loops == 0:
            node_id = np.arange(num_nodes, dtype="int64")
            self_loop = np.array([node_id, node_id]).T
            edges = np.concatenate([edges, self_loop])

        p_graph = pgl.Graph(
            edges,
            num_nodes=num_nodes,
            node_feat={"seq": paddle.to_tensor(seq, dtype="int64")},
        )
        padded_features = np.zeros((self.padded_len, self.n_feats)).astype("float32")
        seq_one_hot = self.one_hot[seq]
        padded_features[: seq.shape[0]] = seq_one_hot
        out = p_graph, padded_features, labels

        if self.use_cache:
            self.cache[prot_chain_name] = out
        return out
Beispiel #8
0
    def __init_weight(self):
        self.num_users = self.dataset.n_users
        self.num_items = self.dataset.m_items
        self.latent_dim = self.config['latent_dim_rec']
        self.n_layers = self.config['lightGCN_n_layers']
        self.lgn = LightGCNonv(self.n_layers)
        self.embedding_user = nn.Embedding(num_embeddings=self.num_users,
                                           embedding_dim=self.latent_dim)
        self.embedding_item = nn.Embedding(num_embeddings=self.num_items,
                                           embedding_dim=self.latent_dim)
        if self.config['pretrain'] == 0:
            emb_item_weight = np.random.normal(
                0, 0.1,
                self.embedding_item.weight.numpy().shape).astype(np.float32)
            emb_user_weight = np.random.normal(
                0, 0.1,
                self.embedding_user.weight.numpy().shape).astype(np.float32)
        else:
            emb_item_weight = np.load('item_embedding.npy').astype(np.float32)
            emb_user_weight = np.load('item_embedding.npy').astype(np.float32)
        self.embedding_item.weight.set_value(emb_item_weight)
        self.embedding_user.weight.set_value(emb_user_weight)

        self.f = nn.Sigmoid()
        num_nodes = self.dataset.n_users + self.dataset.m_items
        edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64')

        self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges)
        print(f"lgn is already to go(dropout:{self.config['dropout']})")
        self.lgn.train()
Beispiel #9
0
    def fp_collatefn(self, batch_data):
        graph_list = []
        labels = []
        mgf_list = []
        maccs_list = []
        for gdata in batch_data:
            g = pgl.Graph(edges=gdata['edge_index'].T,
                          num_nodes=gdata['num_nodes'],
                          node_feat={'feat': gdata['node_feat']},
                          edge_feat={'feat': gdata['edge_feat']})
            graph_list.append(g)
            labels.append(gdata['label'])
            mgf_list.append(gdata['mgf'])
            maccs_list.append(gdata['maccs'])

        labels = np.array(labels, dtype="float32")
        g = pgl.Graph.batch(graph_list)
        mgf_feat = np.array(mgf_list, dtype="float32")
        maccs_feat = np.array(maccs_list, dtype="float32")

        others = {}
        others['mgf'] = mgf_feat
        others['maccs'] = maccs_feat

        return {'graph': g, 'mgf': mgf_feat, 'maccs': maccs_feat}, labels
Beispiel #10
0
def smile_to_graph(smile):
    """set max atom number equals to 100"""
    mol = Chem.MolFromSmiles(smile)
    c_size = mol.GetNumAtoms()
    #features = np.empty([c_size, 78])
    mask = [0] * 100
    features = np.zeros([100, 78])
    for i, atom in enumerate(mol.GetAtoms()):
        if atom.GetAtomicNum == 0:
            return None

        feature = atom_features(atom)
        features[i, :] = feature / sum(feature)
        mask[i] = 1

    edges = []
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        edges.append((i, j))
        edges.append((j, i))
    #g = nx.Graph(edges).to_directed()

    g = pgl.Graph(num_nodes=100,
                  edges=edges,
                  node_feat={'node_feat': features})

    return g, mask
Beispiel #11
0
    def test_neighbors(self):

        num_nodes = 5
        edges = [(0, 1), (0, 2), (1, 2), (3, 4)]
        g1 = pgl.Graph(edges=edges, num_nodes=num_nodes)

        pred, pred_eid = g1.predecessor(return_eids=True)
        self.assertEqual(len(pred), num_nodes)
        self.assertEqual(len(pred_eid), num_nodes)

        self.assertEqual(set(pred[0]), set([]))
        self.assertEqual(set(pred[1]), set([0]))
        self.assertEqual(set(pred[2]), set([0, 1]))
        self.assertEqual(set(pred[3]), set([]))
        self.assertEqual(set(pred[4]), set([3]))

        succ, succ_eid = g1.successor(return_eids=True)
        self.assertEqual(len(succ), num_nodes)
        self.assertEqual(len(succ_eid), num_nodes)

        self.assertEqual(set(succ[0]), set([1, 2]))
        self.assertEqual(set(succ[1]), set([2]))
        self.assertEqual(set(succ[2]), set([]))
        self.assertEqual(set(succ[3]), set([4]))
        self.assertEqual(set(succ[4]), set([]))
Beispiel #12
0
    def coord3_junc_collatefn(self, batch_data):
        graph_list = []
        labels = []
        junc_graph_list = []
        mol2junc_list = []

        g_offset = 0
        junc_g_offset = 0
        for gdata in batch_data:
            g = pgl.Graph(edges=gdata['mol_graph']['edge_index'].T,
                          num_nodes=gdata['mol_graph']['num_nodes'],
                          node_feat={
                              'feat': gdata['mol_graph']['node_feat'],
                              '3d': gdata['mol_coord']
                          },
                          edge_feat={'feat': gdata['mol_graph']['edge_feat']})

            num_nodes = gdata['junction_tree']['num_nodes']
            if num_nodes > 0:
                nfeat = np.array(gdata['junction_tree']['junc_dict'],
                                 dtype="int64").reshape(-1, 1)
                junc_g = pgl.Graph(
                    edges=gdata['junction_tree']['edge_index'].T,
                    num_nodes=num_nodes,
                    node_feat={'feat': nfeat})

                offset = np.array([g_offset, junc_g_offset], dtype="int64")

                mol2junc = gdata['mol2juct'] + offset
                junc_g_offset += junc_g.num_nodes

                junc_graph_list.append(junc_g)
                mol2junc_list.append(mol2junc)

            graph_list.append(g)
            labels.append(gdata['label'])
            g_offset += g.num_nodes

        mol2junc = np.concatenate(mol2junc_list, axis=0)

        labels = np.array(labels, dtype="float32")
        g = pgl.Graph.batch(graph_list)
        junc_g = pgl.Graph.batch(junc_graph_list)

        return {'graph': g, 'junc_graph': junc_g, 'mol2junc': mol2junc}, labels
Beispiel #13
0
 def get_subgraph_by_masked(self, graph, mask):
     index = L.where(mask)
     if index.shape[0] > 0:
         edges = graph.edges
         sub_edges = paddle.gather(edges, index, axis=0)
         sg = pgl.Graph(sub_edges, num_nodes=graph.num_nodes)
         return sg
     else:
         return None
Beispiel #14
0
def load_from_file(path):
    edges = []
    with open(path) as inf:
        for line in inf:
            u, t = line.strip("\n").split("\t")
            u, t = int(u), int(t)
            edges.append((u, t))
    edges = np.array(edges)
    graph = pgl.Graph(edges)
    return graph
Beispiel #15
0
    def test_build_graph(self):

        num_nodes = 5
        dim = 4
        edges = [(0, 1), (1, 2), (3, 4)]
        nfeat = np.random.randn(num_nodes, dim)
        efeat = np.random.randn(len(edges), dim)

        g1 = pgl.Graph(edges=edges,
                       num_nodes=num_nodes,
                       node_feat={'nfeat': nfeat},
                       edge_feat={'efeat': efeat})
Beispiel #16
0
    def test_num_nodes_valid(self):

        num_nodes = 3
        dim = 4
        edges = [(0, 1), (1, 2), (3, 4)]
        nfeat = np.random.randn(num_nodes, dim)
        efeat = np.random.randn(len(edges), dim)

        with self.assertRaises(ValueError):
            g1 = pgl.Graph(edges=edges,
                           num_nodes=num_nodes,
                           node_feat={'nfeat': nfeat},
                           edge_feat={'efeat': efeat})
Beispiel #17
0
def build_net(input_size, num_class, hidden_size, num_layers):
    num_nodes = F.data("num_nodes", shape=[1], dtype="int32")
    edges = F.data("edges", shape=[None, 2], dtype="int32")
    sample_index = F.data("sample_index", shape=[None], dtype="int32")
    index = F.data("index", shape=[None], dtype="int32")
    label = F.data("label", shape=[None], dtype="int64")
    label = paddle.reshape(label, [-1, 1])
    graph = pgl.Graph(num_nodes=num_nodes, edges=edges)
    feat = F.data("feature", shape=[None, input_size], dtype="float32")

    model = GraphSage(
        input_size=input_size,
        num_class=num_class,
        hidden_size=hidden_size,
        num_layers=num_layers)

    g = pgl.Graph(num_nodes=num_nodes, edges=edges)
    pred = model(g, feat)
    pred = paddle.gather(pred, index)
    loss = paddle.nn.functional.cross_entropy(pred, label)
    acc = paddle.metric.accuracy(input=pred, label=label, k=1)
    return loss, acc
Beispiel #18
0
    def test_random_walk(self):
        num_nodes = 5
        dim = 4
        edges = [(0, 1), (1, 2), (3, 4), (1, 0), (2, 1), (4, 3)]
        nfeat = np.random.randn(num_nodes, dim)
        efeat = np.random.randn(len(edges), dim)

        g1 = pgl.Graph(edges=edges,
                       num_nodes=num_nodes,
                       node_feat={'nfeat': nfeat},
                       edge_feat={'efeat': efeat})

        walk_paths = random_walk(g1, [0, 1], 2)
Beispiel #19
0
    def test_send_and_recv(self):
        np.random.seed(0)
        num_nodes = 5
        dim = 4
        edges = [(0, 1), (1, 2), (3, 4), (4, 1), (1, 0)]

        nfeat = np.array([[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6],
                          [4, 5, 6, 7], [5, 6, 7, 8]],
                         dtype="float32")

        ground = np.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7],
                           [5, 6, 7, 8], [2, 3, 4, 5]],
                          dtype="float32")

        recv_ground = np.array(
            [[2., 3., 4., 5.], [6., 8., 10., 12.], [2., 3., 4., 5.],
             [0., 0., 0., 0.], [4., 5., 6., 7.]],
            dtype="float32")

        g = pgl.Graph(edges=edges,
                      num_nodes=num_nodes,
                      node_feat={'nfeat': nfeat})

        g.tensor()

        def send_func1(src_feat, dst_feat, edge_feat):
            return src_feat

        def send_func2(src_feat, dst_feat, edge_feat):
            return {'h': src_feat['h']}

        def reduce_func(msg):
            return msg.reduce_sum(msg['h'])

        # test send_func1
        msg1 = g.send(send_func1, src_feat={'h': g.node_feat['nfeat']})
        _msg = msg1['h'].numpy()
        self.assertTrue((ground == _msg).all())

        output = g.recv(reduce_func, msg1)
        output = output.numpy()
        self.assertTrue((recv_ground == output).all())

        # test send_func2
        msg2 = g.send(send_func1, src_feat={'h': g.node_feat['nfeat']})
        _msg = msg2['h'].numpy()
        self.assertTrue((ground == _msg).all())

        output = g.recv(reduce_func, msg2)
        output = output.numpy()
        self.assertTrue((recv_ground == output).all())
Beispiel #20
0
 def __getitem__(self, idx):
     num_nodes = self.graph._graph_node_index[
         idx + 1] - self.graph._graph_node_index[idx]
     node_shift = self.graph._graph_node_index[idx]
     edges = self.graph.edges[self.graph._graph_edge_index[idx]:self.graph.
                              _graph_edge_index[idx + 1]]
     edges = edges - node_shift
     edge_feat = {}
     for key, value in self.graph.edge_feat.items():
         edge_feat[key] = value[self.graph._graph_edge_index[idx]:self.
                                graph._graph_edge_index[idx + 1]]
     node_feat = {}
     for key, value in self.graph.node_feat.items():
         node_feat[key] = value[self.graph._graph_node_index[idx]:self.
                                graph._graph_node_index[idx + 1]]
     #pretrain information
     pretrain_info = {}
     cid = self.pretrain_info_list[idx]["context_id"]
     edge_index = self.pretrain_info_list[idx]["edge_index"]
     tid = self.pretrain_info_list[idx]["twohop_context"]
     if num_nodes != len(tid):
         print(
             f"idx {idx} num_nodes is : {num_nodes} and len of tid is : {len(tid)}, they are not equal"
         )
         exit(0)
     bond_angle_index = self.pretrain_info_list[idx]["bond_angle_index"]
     bond_angle = self.pretrain_info_list[idx]["bond_angle"]
     dft_success = self.pretrain_info_list[idx]["dft_success"]
     bond_angle_mask = np.array(
         self.pretrain_info_list[idx]["bond_angle"] * 0 + dft_success,
         dtype=bool)
     edge_attr_float = np.array(
         self.pretrain_info_list[idx]["edge_feat_float"])
     edge_attr_float_mask = np.array(
         self.pretrain_info_list[idx]["edge_feat_float"].reshape(-1) * 0 +
         dft_success,
         dtype=bool)
     pretrain_info["edge_index"] = np.array(edge_index)
     pretrain_info["tid"] = np.array(tid, dtype=int)
     pretrain_info["bond_angle_index"] = bond_angle_index
     pretrain_info["bond_angle"] = bond_angle
     pretrain_info["bond_angle_mask"] = bond_angle_mask
     pretrain_info["edge_attr_float"] = edge_attr_float
     pretrain_info["edge_attr_float_mask"] = edge_attr_float_mask
     smiles, label = self.raw_dataset[idx]
     return (pgl.Graph(num_nodes=num_nodes,
                       edges=edges,
                       node_feat=node_feat,
                       edge_feat=edge_feat), self.label[idx], smiles,
             pretrain_info)
Beispiel #21
0
    def __call__(self, batch_data):
        graph_list = []
        labels = []
        for gdata in batch_data:
            g = pgl.Graph(edges=gdata['edges'],
                          num_nodes=gdata['num_nodes'],
                          node_feat={'feat': gdata['node_feat']},
                          edge_feat={'feat': gdata['edge_feat']})
            graph_list.append(g)
            labels.append(gdata['label'])

        labels = np.array(labels, dtype="float32")
        g = pgl.Graph.batch(graph_list)

        return g, labels
Beispiel #22
0
    def __init__(self, args, dataset):
        super(NGCF, self).__init__()
        self.args = args
        self.dataset = dataset
        self.num_users = self.dataset.n_users
        self.num_items = self.dataset.m_items
        num_nodes = self.dataset.n_users + self.dataset.m_items

        self.latent_dim = self.args.recdim
        self.n_layers = self.args.n_layers
        self.ngcf = NGCF_Layer(self.latent_dim, self.latent_dim, self.n_layers)
        edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64')
        self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges)
        self.f = nn.Sigmoid()
        self.__init_weight()
Beispiel #23
0
    def quality_graph_collatefn(self, batch_data):
        graph_list = []
        labels = []
        for gdata in batch_data:
            g = pgl.Graph(edges=gdata['mol_graph']['edge_index'].T,
                          num_nodes=gdata['mol_graph']['num_nodes'],
                          node_feat={'feat': gdata['mol_graph']['node_feat']},
                          edge_feat={'feat': gdata['mol_graph']['edge_feat']})
            graph_list.append(g)
            labels.append(gdata['label'])

        labels = np.array(labels, dtype="float32")
        g = pgl.Graph.batch(graph_list)

        return {'graph': g}, labels
Beispiel #24
0
def create_random_graph():
    dim = 8
    num_nodes = np.random.randint(low=8, high=16)
    edges = np.random.randint(
        low=0,
        high=num_nodes,
        size=[np.random.randint(low=num_nodes * 3, high=num_nodes * 4), 2])
    nfeat = np.random.randn(num_nodes, dim)
    efeat = np.random.randn(len(edges), dim)

    g = pgl.Graph(edges=edges,
                  num_nodes=num_nodes,
                  node_feat={'nfeat': nfeat},
                  edge_feat={'efeat': efeat})
    return g
Beispiel #25
0
    def __call__(self, data_list):
        """
        Collate features about a sublist of graph data and return join_graph, 
        masked_node_indice and masked_node_labels.

        Args:
            data_list : the graph data in AttrmaskCollateFn.for data in data_list,
            create node features and edge features according to pgl graph,and then 
            use graph wrapper to feed join graph, then the label can be arrayed to batch label.

        Returns:
            The batch data contains finetune label and valid,which are 
            collected from batch_label and batch_valid.
        """
        g_list = []
        for data in data_list:
            g = pgl.Graph(num_nodes=len(data[self.atom_names[0]]),
                          edges=data['edges'],
                          node_feat={
                              name: data[name].reshape([-1, 1])
                              for name in self.atom_names
                          },
                          edge_feat={
                              name: data[name].reshape([-1, 1])
                              for name in self.bond_names
                          })
            g_list.append(g)

        join_graph = pgl.Graph.batch(g_list)
        for name in join_graph.node_feat:
            join_graph.node_feat[name] = join_graph.node_feat[name].reshape(
                [-1])
        for name in join_graph.edge_feat:
            join_graph.edge_feat[name] = join_graph.edge_feat[name].reshape(
                [-1])

        ### mask atom
        N = join_graph.num_nodes
        masked_size = int(N * self.mask_ratio)
        masked_node_indice = np.random.choice(range(N),
                                              size=masked_size,
                                              replace=False)
        masked_node_labels = join_graph.node_feat['atomic_num'][
            masked_node_indice]
        for name in join_graph.node_feat:
            join_graph.node_feat[name][masked_node_indice] = 0  # 0: OOV

        return join_graph, masked_node_indice, masked_node_labels
Beispiel #26
0
def gen_drug_graph(drug_feature, data_id):
    """
    Construct graphs from current drug features
    :param drug_feature: current drug features generated from preceding steps
    :param data_id: sample index
    :return: a list of pgl.graph
    """
    graph_list = []
    for i in data_id:
        g = pgl.Graph(edges=drug_feature[i[1]][-1],
                      num_nodes=Max_atoms,
                      node_feat={'nfeat': drug_feature[i[1]][0].astype('float32')},
                      )
        graph_list.append(g)

    return graph_list
Beispiel #27
0
def create_feeds(name_data_pair):
    edges = name_data_pair["node_feat"].numpy()
    node_feat = name_data_pair["edge_feat"].numpy()
    edge_feat = name_data_pair["segment_ids"].numpy()
    graphs = []
    for i in range(edges.shape[0]):
        g = pgl.Graph(
            edges=edges[i],
            node_feat={"node_attr": node_feat[i]},
            edge_feat={"edge_attr": edge_feat[i]})
        graphs.append(g)
    graphs = pgl.Graph.batch(graphs).tensor()
    name_data_pair["edges"] = graphs.edges
    name_data_pair["node_feat"] = graphs.node_feat["node_attr"]
    name_data_pair["edge_feat"] = graphs.edge_feat["edge_attr"]
    name_data_pair["segment_ids"] = graphs.graph_node_id
    return name_data_pair
Beispiel #28
0
        def construct(tensors):
            """ tensor list to ([graph_tensor, graph_tensor, ...], 
            other tensor) 
            """
            graph_num = 1
            start_len = 0
            datas = []
            graph_list = []
            for graph in range(graph_num):
                graph_list.append(
                    pgl.Graph(num_nodes=tensors[start_len],
                              edges=tensors[start_len + 1]))
                start_len += 2

            for i in range(start_len, len(tensors)):
                datas.append(tensors[i])
            return graph_list, datas
Beispiel #29
0
    def __call__(self, batch_data_list):
        """
        Function caller to convert a batch of data into a big batch feed dictionary.

        Args:
            batch_data_list: a batch of the compound graph data.

        Returns:
            feed_dict: a dictionary contains `graph/xxx` inputs for PGL.
        """
        g_list, label_list = [], []
        for data in batch_data_list:
            g = pgl.Graph(
                num_nodes=len(data[self.atom_names[0]]),
                edges=data['edges'],
                node_feat={name: data[name].reshape([-1, 1])
                           for name in self.atom_names},
                edge_feat={name: data[name].reshape([-1, 1])
                           for name in self.bond_names})
            g_list.append(g)
            if self.with_graph_label:
                label_list.append(data['label'])

        join_graph = pgl.Graph.batch(g_list)
        output = [join_graph]

        if self.with_graph_label:
            if self.task_type == 'cls':
                batch_label = np.array(label_list).reshape(
                    -1, self.num_cls_tasks)
            elif self.task_type == 'reg':
                label_list = [label[self.reg_target_id]
                              for label in label_list]
                batch_label = np.array(label_list).reshape(-1, 1)

            # label: -1 -> 0, 1 -> 1
            batch_label = ((batch_label + 1.0) / 2).astype('float32')
            batch_valid = (batch_label != 0.5).astype("float32")
            output.extend([batch_label, batch_valid])

        if self.with_pos_neg_mask:
            pos_mask, neg_mask = MoleculeCollateFunc.get_pos_neg_mask(g_list)
            output.extend([pos_mask, neg_mask])

        return output
Beispiel #30
0
    def new_graph_collatefn(self, batch_data):
        # for graph_data_additional_features_0424.pkl
        # with graph_transform in mol_features_extract.py
        graph_list = []
        labels = []
        for gdata in batch_data:
            efeat = np.delete(gdata['edge_feat'], -1, axis=1)  # remove 3d dist
            g = pgl.Graph(edges=gdata['edge_index'].T,
                          num_nodes=gdata['num_nodes'],
                          node_feat={'feat': gdata['node_feat']},
                          edge_feat={'feat': efeat})
            graph_list.append(g)
            labels.append(gdata['label'])

        labels = np.array(labels, dtype="float32")
        g = pgl.Graph.batch(graph_list)

        return {'graph': g}, labels