def __init__(self, config, mode='train', transform=None): self.config = config self.mode = mode self.transform = transform self.raw_dataset = PCQM4MDataset(config.base_data_path, only_smiles=True) log.info("preprocess graph data in %s" % self.__class__.__name__) processed_path = os.path.join(self.raw_dataset.folder, "pgl_processed") if not os.path.exists(processed_path): os.makedirs(processed_path) data_file = os.path.join(processed_path, "graph_data.pkl") if os.path.exists(data_file): log.info("loading graph data from pkl file") self.graph_list = pkl.load(open(data_file, "rb")) else: log.info("loading graph data from smiles data") self.graph_list = [] for i in tqdm.tqdm(range(len(self.raw_dataset))): # num_nodes, edge_index, node_feat, edge_feat, label smiles, label = self.raw_dataset[i] graph = smiles2graph(smiles) new_graph = {} new_graph["edges"] = graph["edge_index"].T new_graph["num_nodes"] = graph["num_nodes"] new_graph["node_feat"] = graph["node_feat"] new_graph["edge_feat"] = graph["edge_feat"] new_graph["label"] = label self.graph_list.append(new_graph) pkl.dump(self.graph_list, open(data_file, 'wb'))
def _get_pcq_graph_generator(indices, smiles, labels, conformers): """Returns a generator to yield graph.""" for idx, smile, conformer_positions, label in zip(indices, smiles, conformers, labels): graph = utils.smiles2graph(smile) graph = _convert_ogb_graph_to_graphs_tuple(graph) graph = graph._replace( globals={ "target": np.array([label], dtype=np.float32), "graph_index": np.array([idx], dtype=np.int32), **(graph.globals if isinstance(graph.globals, dict) else {}) }) yield graph, conformer_positions
def get(self, idx): smiles, homolumogap = self.smiles_list[idx], self.homolumogap_list[idx] graph = smiles2graph(smiles) assert (len(graph['edge_feat']) == graph['edge_index'].shape[1]) assert (len(graph['node_feat']) == graph['num_nodes']) x = torch.from_numpy(graph['node_feat']).to(torch.int64) edge_index = torch.from_numpy(graph['edge_index']).to(torch.int64) edge_attr = torch.from_numpy(graph['edge_feat']).to(torch.int64) y = torch.Tensor([homolumogap]) num_nodes = int(graph['num_nodes']) data = Data(x, edge_index, edge_attr, y, num_nodes=num_nodes) return data
# See the License for the specific language governing permissions and # limitations under the License. import numpy as np import paddle import pgl paddle.enable_static() import paddle.nn as nn import pgl.nn as gnn from ogb.utils import smiles2graph import paddle.static as static graph_obj = smiles2graph('O=C1C=CC(O1)C(c1ccccc1C)O') class GNNModel(nn.Layer): def __init__(self, input_size, output_size, num_layers=3): super(GNNModel, self).__init__() self.conv_fn = nn.LayerList() self.conv_fn.append(gnn.GCNConv(input_size, output_size)) for i in range(num_layers - 1): self.conv_fn.append(gnn.GCNConv(output_size, output_size)) self.pool_fn = gnn.GraphPool("sum") def forward(self, num_nodes, edges, feature): graph = pgl.Graph(num_nodes=num_nodes, edges=edges) for fn in self.conv_fn: feature = fn(graph, feature)