def test_graph_norm(self): graph_list = [] edges1 = [(0, 1), (1, 2)] num_nodes1 = 3 g1 = pgl.Graph(edges=edges1, num_nodes=num_nodes1) graph_list.append(g1) edges2 = [(0, 2), (0, 3), (1, 2)] num_nodes2 = 4 g2 = pgl.Graph(edges=edges2, num_nodes=num_nodes2) graph_list.append(g2) multi_graph = pgl.Graph.disjoint(graph_list) multi_graph.tensor() feat = np.repeat(np.arange(0, 7).reshape(-1, 1), 3, axis=1).astype("float32") tensor_feat = paddle.to_tensor(feat, dtype="float32") feat[0:3] = feat[0:3] / np.sqrt(3) feat[3:] = feat[3:] / np.sqrt(4) norm_feat = F.graph_norm(multi_graph, tensor_feat) self.assertEqual(feat.tolist(), norm_feat.numpy().tolist()) gn_layer = nn.GraphNorm() norm_feat = gn_layer(multi_graph, tensor_feat) self.assertEqual(feat.tolist(), norm_feat.numpy().tolist())
def __call__(self, data_list): """ Collate features about a sublist of graph data and return join_graph, masked_node_indice and masked_node_labels. Args: data_list : the graph data in gen_features.for data in data_list, create node features and edge features according to pgl graph,and then use graph wrapper to feed join graph, then the label can be arrayed to batch label. Returns: The batch data contains finetune label and valid,which are collected from batch_label and batch_valid. """ atom_bond_graph_list = [] bond_angle_graph_list = [] label_list = [] for data in data_list: ab_g = pgl.Graph(num_nodes=len(data[self.atom_names[0]]), edges=data['edges'], node_feat={ name: data[name].reshape([-1, 1]) for name in self.atom_names }, edge_feat={ name: data[name].reshape([-1, 1]) for name in self.bond_names + self.bond_float_names }) ba_g = pgl.Graph(num_nodes=len(data['edges']), edges=data['BondAngleGraph_edges'], node_feat={}, edge_feat={ name: data[name].reshape([-1, 1]) for name in self.bond_angle_float_names }) atom_bond_graph_list.append(ab_g) bond_angle_graph_list.append(ba_g) if not self.is_inference: label_list.append(data['label']) atom_bond_graph = pgl.Graph.batch(atom_bond_graph_list) bond_angle_graph = pgl.Graph.batch(bond_angle_graph_list) # TODO: reshape due to pgl limitations on the shape self._flat_shapes(atom_bond_graph.node_feat) self._flat_shapes(atom_bond_graph.edge_feat) self._flat_shapes(bond_angle_graph.node_feat) self._flat_shapes(bond_angle_graph.edge_feat) if not self.is_inference: if self.task_type == 'class': labels = np.array(label_list) # label: -1 -> 0, 1 -> 1 labels = ((labels + 1.0) / 2) valids = (labels != 0.5) return [atom_bond_graph, bond_angle_graph, valids, labels] else: labels = np.array(label_list, 'float32') return atom_bond_graph, bond_angle_graph, labels else: return atom_bond_graph, bond_angle_graph
def prepareGraphData(self): self.buildUserItemEdges() self.buildUserUserEdges() info_graph = pgl.Graph(num_nodes=self.conf['num_users'] + self.conf['num_items'], edges=self.user_item_edges) soc_graph = pgl.Graph(num_nodes=self.conf['num_users'], edges=self.user_user_edges) return info_graph, soc_graph
def __init__(self, args, dataset): super(LightGCN, self).__init__() self.args = args self.dataset = dataset self.num_users = self.dataset.n_users self.num_items = self.dataset.m_items num_nodes = self.dataset.n_users + self.dataset.m_items self.latent_dim = self.args.recdim self.n_layers = self.args.n_layers self.lightgcn = LightGCN_Layer(self.n_layers) # self.lightgcn = LightGCNonv(self.n_layers) self.embedding_user = nn.Embedding( num_embeddings=self.num_users, embedding_dim=self.latent_dim) self.embedding_item = nn.Embedding( num_embeddings=self.num_items, embedding_dim=self.latent_dim) emb_item_weight = np.random.normal( 0, 0.1, self.embedding_item.weight.numpy().shape).astype(np.float32) emb_user_weight = np.random.normal( 0, 0.1, self.embedding_user.weight.numpy().shape).astype(np.float32) self.embedding_item.weight.set_value(emb_item_weight) self.embedding_user.weight.set_value(emb_user_weight) self.f = nn.Sigmoid() edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64') self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges) self.lightgcn.train()
def test_dump_tensor_load_numpy(self): path = './tmp' glist = [] dim = 4 num_nodes = 10 edges = np.random.randint(low=1, high=num_nodes, size=[np.random.randint(low=2, high=10), 2]) nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) g = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat}) in_before = g.indegree() g.outdegree() g.tensor() # Merge Graph g.dump(path) g2 = pgl.Graph.load(path) in_after = g2.indegree() for a, b in zip(in_before, in_after): self.assertEqual(a, b) del g2 del in_after import shutil shutil.rmtree(path)
def test_disjoint_graph(self): glist = [] dim = 4 for i in range(5): num_nodes = np.random.randint(low=2, high=10) edges = np.random.randint( low=1, high=num_nodes, size=[np.random.randint(low=1, high=10), 2]) nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) g = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat}) glist.append(g) # Merge Graph multi_graph = pgl.Graph.disjoint(glist) # Check Graph Index node_index = [np.ones(g.num_nodes) * n for n, g in enumerate(glist)] edge_index = [np.ones(g.num_edges) * n for n, g in enumerate(glist)] node_index = np.concatenate(node_index) edge_index = np.concatenate(edge_index) self.assertTrue(np.all(node_index == multi_graph.graph_node_id)) self.assertTrue(np.all(edge_index == multi_graph.graph_edge_id)) multi_graph.tensor() self.assertTrue( np.all(node_index == multi_graph.graph_node_id.numpy())) self.assertTrue( np.all(edge_index == multi_graph.graph_edge_id.numpy()))
def lod_prot_chain(self, prot_chain_name): if prot_chain_name in self.cache: return self.cache[prot_chain_name] label_idx = self.labels[prot_chain_name].astype("int64") labels = np.zeros(self.n_labels) labels[label_idx] = 1.0 prot_chain = np.load( os.path.join(self.prot_chain_data_dir + f"/{prot_chain_name}.npz"), allow_pickle=True, ) seq = prot_chain["seq"] edges = prot_chain["n2n_edges"] num_nodes = len(seq) n_self_loops = np.sum(edges[:, 0] == edges[:, 1]) if n_self_loops == 0: node_id = np.arange(num_nodes, dtype="int64") self_loop = np.array([node_id, node_id]).T edges = np.concatenate([edges, self_loop]) p_graph = pgl.Graph( edges, num_nodes=num_nodes, node_feat={"seq": paddle.to_tensor(seq, dtype="int64")}, ) padded_features = np.zeros((self.padded_len, self.n_feats)).astype("float32") seq_one_hot = self.one_hot[seq] padded_features[: seq.shape[0]] = seq_one_hot out = p_graph, padded_features, labels if self.use_cache: self.cache[prot_chain_name] = out return out
def __init_weight(self): self.num_users = self.dataset.n_users self.num_items = self.dataset.m_items self.latent_dim = self.config['latent_dim_rec'] self.n_layers = self.config['lightGCN_n_layers'] self.lgn = LightGCNonv(self.n_layers) self.embedding_user = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim) self.embedding_item = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim) if self.config['pretrain'] == 0: emb_item_weight = np.random.normal( 0, 0.1, self.embedding_item.weight.numpy().shape).astype(np.float32) emb_user_weight = np.random.normal( 0, 0.1, self.embedding_user.weight.numpy().shape).astype(np.float32) else: emb_item_weight = np.load('item_embedding.npy').astype(np.float32) emb_user_weight = np.load('item_embedding.npy').astype(np.float32) self.embedding_item.weight.set_value(emb_item_weight) self.embedding_user.weight.set_value(emb_user_weight) self.f = nn.Sigmoid() num_nodes = self.dataset.n_users + self.dataset.m_items edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64') self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges) print(f"lgn is already to go(dropout:{self.config['dropout']})") self.lgn.train()
def fp_collatefn(self, batch_data): graph_list = [] labels = [] mgf_list = [] maccs_list = [] for gdata in batch_data: g = pgl.Graph(edges=gdata['edge_index'].T, num_nodes=gdata['num_nodes'], node_feat={'feat': gdata['node_feat']}, edge_feat={'feat': gdata['edge_feat']}) graph_list.append(g) labels.append(gdata['label']) mgf_list.append(gdata['mgf']) maccs_list.append(gdata['maccs']) labels = np.array(labels, dtype="float32") g = pgl.Graph.batch(graph_list) mgf_feat = np.array(mgf_list, dtype="float32") maccs_feat = np.array(maccs_list, dtype="float32") others = {} others['mgf'] = mgf_feat others['maccs'] = maccs_feat return {'graph': g, 'mgf': mgf_feat, 'maccs': maccs_feat}, labels
def smile_to_graph(smile): """set max atom number equals to 100""" mol = Chem.MolFromSmiles(smile) c_size = mol.GetNumAtoms() #features = np.empty([c_size, 78]) mask = [0] * 100 features = np.zeros([100, 78]) for i, atom in enumerate(mol.GetAtoms()): if atom.GetAtomicNum == 0: return None feature = atom_features(atom) features[i, :] = feature / sum(feature) mask[i] = 1 edges = [] for bond in mol.GetBonds(): i = bond.GetBeginAtomIdx() j = bond.GetEndAtomIdx() edges.append((i, j)) edges.append((j, i)) #g = nx.Graph(edges).to_directed() g = pgl.Graph(num_nodes=100, edges=edges, node_feat={'node_feat': features}) return g, mask
def test_neighbors(self): num_nodes = 5 edges = [(0, 1), (0, 2), (1, 2), (3, 4)] g1 = pgl.Graph(edges=edges, num_nodes=num_nodes) pred, pred_eid = g1.predecessor(return_eids=True) self.assertEqual(len(pred), num_nodes) self.assertEqual(len(pred_eid), num_nodes) self.assertEqual(set(pred[0]), set([])) self.assertEqual(set(pred[1]), set([0])) self.assertEqual(set(pred[2]), set([0, 1])) self.assertEqual(set(pred[3]), set([])) self.assertEqual(set(pred[4]), set([3])) succ, succ_eid = g1.successor(return_eids=True) self.assertEqual(len(succ), num_nodes) self.assertEqual(len(succ_eid), num_nodes) self.assertEqual(set(succ[0]), set([1, 2])) self.assertEqual(set(succ[1]), set([2])) self.assertEqual(set(succ[2]), set([])) self.assertEqual(set(succ[3]), set([4])) self.assertEqual(set(succ[4]), set([]))
def coord3_junc_collatefn(self, batch_data): graph_list = [] labels = [] junc_graph_list = [] mol2junc_list = [] g_offset = 0 junc_g_offset = 0 for gdata in batch_data: g = pgl.Graph(edges=gdata['mol_graph']['edge_index'].T, num_nodes=gdata['mol_graph']['num_nodes'], node_feat={ 'feat': gdata['mol_graph']['node_feat'], '3d': gdata['mol_coord'] }, edge_feat={'feat': gdata['mol_graph']['edge_feat']}) num_nodes = gdata['junction_tree']['num_nodes'] if num_nodes > 0: nfeat = np.array(gdata['junction_tree']['junc_dict'], dtype="int64").reshape(-1, 1) junc_g = pgl.Graph( edges=gdata['junction_tree']['edge_index'].T, num_nodes=num_nodes, node_feat={'feat': nfeat}) offset = np.array([g_offset, junc_g_offset], dtype="int64") mol2junc = gdata['mol2juct'] + offset junc_g_offset += junc_g.num_nodes junc_graph_list.append(junc_g) mol2junc_list.append(mol2junc) graph_list.append(g) labels.append(gdata['label']) g_offset += g.num_nodes mol2junc = np.concatenate(mol2junc_list, axis=0) labels = np.array(labels, dtype="float32") g = pgl.Graph.batch(graph_list) junc_g = pgl.Graph.batch(junc_graph_list) return {'graph': g, 'junc_graph': junc_g, 'mol2junc': mol2junc}, labels
def get_subgraph_by_masked(self, graph, mask): index = L.where(mask) if index.shape[0] > 0: edges = graph.edges sub_edges = paddle.gather(edges, index, axis=0) sg = pgl.Graph(sub_edges, num_nodes=graph.num_nodes) return sg else: return None
def load_from_file(path): edges = [] with open(path) as inf: for line in inf: u, t = line.strip("\n").split("\t") u, t = int(u), int(t) edges.append((u, t)) edges = np.array(edges) graph = pgl.Graph(edges) return graph
def test_build_graph(self): num_nodes = 5 dim = 4 edges = [(0, 1), (1, 2), (3, 4)] nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) g1 = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat})
def test_num_nodes_valid(self): num_nodes = 3 dim = 4 edges = [(0, 1), (1, 2), (3, 4)] nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) with self.assertRaises(ValueError): g1 = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat})
def build_net(input_size, num_class, hidden_size, num_layers): num_nodes = F.data("num_nodes", shape=[1], dtype="int32") edges = F.data("edges", shape=[None, 2], dtype="int32") sample_index = F.data("sample_index", shape=[None], dtype="int32") index = F.data("index", shape=[None], dtype="int32") label = F.data("label", shape=[None], dtype="int64") label = paddle.reshape(label, [-1, 1]) graph = pgl.Graph(num_nodes=num_nodes, edges=edges) feat = F.data("feature", shape=[None, input_size], dtype="float32") model = GraphSage( input_size=input_size, num_class=num_class, hidden_size=hidden_size, num_layers=num_layers) g = pgl.Graph(num_nodes=num_nodes, edges=edges) pred = model(g, feat) pred = paddle.gather(pred, index) loss = paddle.nn.functional.cross_entropy(pred, label) acc = paddle.metric.accuracy(input=pred, label=label, k=1) return loss, acc
def test_random_walk(self): num_nodes = 5 dim = 4 edges = [(0, 1), (1, 2), (3, 4), (1, 0), (2, 1), (4, 3)] nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) g1 = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat}) walk_paths = random_walk(g1, [0, 1], 2)
def test_send_and_recv(self): np.random.seed(0) num_nodes = 5 dim = 4 edges = [(0, 1), (1, 2), (3, 4), (4, 1), (1, 0)] nfeat = np.array([[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7], [5, 6, 7, 8]], dtype="float32") ground = np.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7], [5, 6, 7, 8], [2, 3, 4, 5]], dtype="float32") recv_ground = np.array( [[2., 3., 4., 5.], [6., 8., 10., 12.], [2., 3., 4., 5.], [0., 0., 0., 0.], [4., 5., 6., 7.]], dtype="float32") g = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}) g.tensor() def send_func1(src_feat, dst_feat, edge_feat): return src_feat def send_func2(src_feat, dst_feat, edge_feat): return {'h': src_feat['h']} def reduce_func(msg): return msg.reduce_sum(msg['h']) # test send_func1 msg1 = g.send(send_func1, src_feat={'h': g.node_feat['nfeat']}) _msg = msg1['h'].numpy() self.assertTrue((ground == _msg).all()) output = g.recv(reduce_func, msg1) output = output.numpy() self.assertTrue((recv_ground == output).all()) # test send_func2 msg2 = g.send(send_func1, src_feat={'h': g.node_feat['nfeat']}) _msg = msg2['h'].numpy() self.assertTrue((ground == _msg).all()) output = g.recv(reduce_func, msg2) output = output.numpy() self.assertTrue((recv_ground == output).all())
def __getitem__(self, idx): num_nodes = self.graph._graph_node_index[ idx + 1] - self.graph._graph_node_index[idx] node_shift = self.graph._graph_node_index[idx] edges = self.graph.edges[self.graph._graph_edge_index[idx]:self.graph. _graph_edge_index[idx + 1]] edges = edges - node_shift edge_feat = {} for key, value in self.graph.edge_feat.items(): edge_feat[key] = value[self.graph._graph_edge_index[idx]:self. graph._graph_edge_index[idx + 1]] node_feat = {} for key, value in self.graph.node_feat.items(): node_feat[key] = value[self.graph._graph_node_index[idx]:self. graph._graph_node_index[idx + 1]] #pretrain information pretrain_info = {} cid = self.pretrain_info_list[idx]["context_id"] edge_index = self.pretrain_info_list[idx]["edge_index"] tid = self.pretrain_info_list[idx]["twohop_context"] if num_nodes != len(tid): print( f"idx {idx} num_nodes is : {num_nodes} and len of tid is : {len(tid)}, they are not equal" ) exit(0) bond_angle_index = self.pretrain_info_list[idx]["bond_angle_index"] bond_angle = self.pretrain_info_list[idx]["bond_angle"] dft_success = self.pretrain_info_list[idx]["dft_success"] bond_angle_mask = np.array( self.pretrain_info_list[idx]["bond_angle"] * 0 + dft_success, dtype=bool) edge_attr_float = np.array( self.pretrain_info_list[idx]["edge_feat_float"]) edge_attr_float_mask = np.array( self.pretrain_info_list[idx]["edge_feat_float"].reshape(-1) * 0 + dft_success, dtype=bool) pretrain_info["edge_index"] = np.array(edge_index) pretrain_info["tid"] = np.array(tid, dtype=int) pretrain_info["bond_angle_index"] = bond_angle_index pretrain_info["bond_angle"] = bond_angle pretrain_info["bond_angle_mask"] = bond_angle_mask pretrain_info["edge_attr_float"] = edge_attr_float pretrain_info["edge_attr_float_mask"] = edge_attr_float_mask smiles, label = self.raw_dataset[idx] return (pgl.Graph(num_nodes=num_nodes, edges=edges, node_feat=node_feat, edge_feat=edge_feat), self.label[idx], smiles, pretrain_info)
def __call__(self, batch_data): graph_list = [] labels = [] for gdata in batch_data: g = pgl.Graph(edges=gdata['edges'], num_nodes=gdata['num_nodes'], node_feat={'feat': gdata['node_feat']}, edge_feat={'feat': gdata['edge_feat']}) graph_list.append(g) labels.append(gdata['label']) labels = np.array(labels, dtype="float32") g = pgl.Graph.batch(graph_list) return g, labels
def __init__(self, args, dataset): super(NGCF, self).__init__() self.args = args self.dataset = dataset self.num_users = self.dataset.n_users self.num_items = self.dataset.m_items num_nodes = self.dataset.n_users + self.dataset.m_items self.latent_dim = self.args.recdim self.n_layers = self.args.n_layers self.ngcf = NGCF_Layer(self.latent_dim, self.latent_dim, self.n_layers) edges = paddle.to_tensor(self.dataset.trainEdge, dtype='int64') self.Graph = pgl.Graph(num_nodes=num_nodes, edges=edges) self.f = nn.Sigmoid() self.__init_weight()
def quality_graph_collatefn(self, batch_data): graph_list = [] labels = [] for gdata in batch_data: g = pgl.Graph(edges=gdata['mol_graph']['edge_index'].T, num_nodes=gdata['mol_graph']['num_nodes'], node_feat={'feat': gdata['mol_graph']['node_feat']}, edge_feat={'feat': gdata['mol_graph']['edge_feat']}) graph_list.append(g) labels.append(gdata['label']) labels = np.array(labels, dtype="float32") g = pgl.Graph.batch(graph_list) return {'graph': g}, labels
def create_random_graph(): dim = 8 num_nodes = np.random.randint(low=8, high=16) edges = np.random.randint( low=0, high=num_nodes, size=[np.random.randint(low=num_nodes * 3, high=num_nodes * 4), 2]) nfeat = np.random.randn(num_nodes, dim) efeat = np.random.randn(len(edges), dim) g = pgl.Graph(edges=edges, num_nodes=num_nodes, node_feat={'nfeat': nfeat}, edge_feat={'efeat': efeat}) return g
def __call__(self, data_list): """ Collate features about a sublist of graph data and return join_graph, masked_node_indice and masked_node_labels. Args: data_list : the graph data in AttrmaskCollateFn.for data in data_list, create node features and edge features according to pgl graph,and then use graph wrapper to feed join graph, then the label can be arrayed to batch label. Returns: The batch data contains finetune label and valid,which are collected from batch_label and batch_valid. """ g_list = [] for data in data_list: g = pgl.Graph(num_nodes=len(data[self.atom_names[0]]), edges=data['edges'], node_feat={ name: data[name].reshape([-1, 1]) for name in self.atom_names }, edge_feat={ name: data[name].reshape([-1, 1]) for name in self.bond_names }) g_list.append(g) join_graph = pgl.Graph.batch(g_list) for name in join_graph.node_feat: join_graph.node_feat[name] = join_graph.node_feat[name].reshape( [-1]) for name in join_graph.edge_feat: join_graph.edge_feat[name] = join_graph.edge_feat[name].reshape( [-1]) ### mask atom N = join_graph.num_nodes masked_size = int(N * self.mask_ratio) masked_node_indice = np.random.choice(range(N), size=masked_size, replace=False) masked_node_labels = join_graph.node_feat['atomic_num'][ masked_node_indice] for name in join_graph.node_feat: join_graph.node_feat[name][masked_node_indice] = 0 # 0: OOV return join_graph, masked_node_indice, masked_node_labels
def gen_drug_graph(drug_feature, data_id): """ Construct graphs from current drug features :param drug_feature: current drug features generated from preceding steps :param data_id: sample index :return: a list of pgl.graph """ graph_list = [] for i in data_id: g = pgl.Graph(edges=drug_feature[i[1]][-1], num_nodes=Max_atoms, node_feat={'nfeat': drug_feature[i[1]][0].astype('float32')}, ) graph_list.append(g) return graph_list
def create_feeds(name_data_pair): edges = name_data_pair["node_feat"].numpy() node_feat = name_data_pair["edge_feat"].numpy() edge_feat = name_data_pair["segment_ids"].numpy() graphs = [] for i in range(edges.shape[0]): g = pgl.Graph( edges=edges[i], node_feat={"node_attr": node_feat[i]}, edge_feat={"edge_attr": edge_feat[i]}) graphs.append(g) graphs = pgl.Graph.batch(graphs).tensor() name_data_pair["edges"] = graphs.edges name_data_pair["node_feat"] = graphs.node_feat["node_attr"] name_data_pair["edge_feat"] = graphs.edge_feat["edge_attr"] name_data_pair["segment_ids"] = graphs.graph_node_id return name_data_pair
def construct(tensors): """ tensor list to ([graph_tensor, graph_tensor, ...], other tensor) """ graph_num = 1 start_len = 0 datas = [] graph_list = [] for graph in range(graph_num): graph_list.append( pgl.Graph(num_nodes=tensors[start_len], edges=tensors[start_len + 1])) start_len += 2 for i in range(start_len, len(tensors)): datas.append(tensors[i]) return graph_list, datas
def __call__(self, batch_data_list): """ Function caller to convert a batch of data into a big batch feed dictionary. Args: batch_data_list: a batch of the compound graph data. Returns: feed_dict: a dictionary contains `graph/xxx` inputs for PGL. """ g_list, label_list = [], [] for data in batch_data_list: g = pgl.Graph( num_nodes=len(data[self.atom_names[0]]), edges=data['edges'], node_feat={name: data[name].reshape([-1, 1]) for name in self.atom_names}, edge_feat={name: data[name].reshape([-1, 1]) for name in self.bond_names}) g_list.append(g) if self.with_graph_label: label_list.append(data['label']) join_graph = pgl.Graph.batch(g_list) output = [join_graph] if self.with_graph_label: if self.task_type == 'cls': batch_label = np.array(label_list).reshape( -1, self.num_cls_tasks) elif self.task_type == 'reg': label_list = [label[self.reg_target_id] for label in label_list] batch_label = np.array(label_list).reshape(-1, 1) # label: -1 -> 0, 1 -> 1 batch_label = ((batch_label + 1.0) / 2).astype('float32') batch_valid = (batch_label != 0.5).astype("float32") output.extend([batch_label, batch_valid]) if self.with_pos_neg_mask: pos_mask, neg_mask = MoleculeCollateFunc.get_pos_neg_mask(g_list) output.extend([pos_mask, neg_mask]) return output
def new_graph_collatefn(self, batch_data): # for graph_data_additional_features_0424.pkl # with graph_transform in mol_features_extract.py graph_list = [] labels = [] for gdata in batch_data: efeat = np.delete(gdata['edge_feat'], -1, axis=1) # remove 3d dist g = pgl.Graph(edges=gdata['edge_index'].T, num_nodes=gdata['num_nodes'], node_feat={'feat': gdata['node_feat']}, edge_feat={'feat': efeat}) graph_list.append(g) labels.append(gdata['label']) labels = np.array(labels, dtype="float32") g = pgl.Graph.batch(graph_list) return {'graph': g}, labels