예제 #1
0
파일: dataset.py 프로젝트: WenjinW/PGL
    def __init__(self, config, mode='train', transform=None):
        self.config = config
        self.mode = mode
        self.transform = transform
        self.raw_dataset = PCQM4MDataset(config.base_data_path,
                                         only_smiles=True)

        log.info("preprocess graph data in %s" % self.__class__.__name__)
        processed_path = os.path.join(self.raw_dataset.folder, "pgl_processed")
        if not os.path.exists(processed_path):
            os.makedirs(processed_path)
        data_file = os.path.join(processed_path, "graph_data.pkl")

        if os.path.exists(data_file):
            log.info("loading graph data from pkl file")
            self.graph_list = pkl.load(open(data_file, "rb"))
        else:
            log.info("loading graph data from smiles data")
            self.graph_list = []
            for i in tqdm.tqdm(range(len(self.raw_dataset))):
                # num_nodes, edge_index, node_feat, edge_feat, label
                smiles, label = self.raw_dataset[i]
                graph = smiles2graph(smiles)
                new_graph = {}
                new_graph["edges"] = graph["edge_index"].T
                new_graph["num_nodes"] = graph["num_nodes"]
                new_graph["node_feat"] = graph["node_feat"]
                new_graph["edge_feat"] = graph["edge_feat"]
                new_graph["label"] = label
                self.graph_list.append(new_graph)

            pkl.dump(self.graph_list, open(data_file, 'wb'))
예제 #2
0
def _get_pcq_graph_generator(indices, smiles, labels, conformers):
    """Returns a generator to yield graph."""
    for idx, smile, conformer_positions, label in zip(indices, smiles,
                                                      conformers, labels):
        graph = utils.smiles2graph(smile)
        graph = _convert_ogb_graph_to_graphs_tuple(graph)
        graph = graph._replace(
            globals={
                "target": np.array([label], dtype=np.float32),
                "graph_index": np.array([idx], dtype=np.int32),
                **(graph.globals if isinstance(graph.globals, dict) else {})
            })
        yield graph, conformer_positions
예제 #3
0
    def get(self, idx):
        smiles, homolumogap = self.smiles_list[idx], self.homolumogap_list[idx]
        graph = smiles2graph(smiles)
        assert (len(graph['edge_feat']) == graph['edge_index'].shape[1])
        assert (len(graph['node_feat']) == graph['num_nodes'])

        x = torch.from_numpy(graph['node_feat']).to(torch.int64)
        edge_index = torch.from_numpy(graph['edge_index']).to(torch.int64)
        edge_attr = torch.from_numpy(graph['edge_feat']).to(torch.int64)
        y = torch.Tensor([homolumogap])
        num_nodes = int(graph['num_nodes'])
        data = Data(x, edge_index, edge_attr, y, num_nodes=num_nodes)
        return data
예제 #4
0
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import paddle
import pgl

paddle.enable_static()

import paddle.nn as nn
import pgl.nn as gnn
from ogb.utils import smiles2graph
import paddle.static as static

graph_obj = smiles2graph('O=C1C=CC(O1)C(c1ccccc1C)O')


class GNNModel(nn.Layer):
    def __init__(self, input_size, output_size, num_layers=3):
        super(GNNModel, self).__init__()
        self.conv_fn = nn.LayerList()
        self.conv_fn.append(gnn.GCNConv(input_size, output_size))
        for i in range(num_layers - 1):
            self.conv_fn.append(gnn.GCNConv(output_size, output_size))
        self.pool_fn = gnn.GraphPool("sum")

    def forward(self, num_nodes, edges, feature):
        graph = pgl.Graph(num_nodes=num_nodes, edges=edges)
        for fn in self.conv_fn:
            feature = fn(graph, feature)