def test_get_atom_hydrogen_bonding_one_hot(self): info = construct_hydrogen_bonding_info(self.mol) atoms = self.mol.GetAtoms() assert atoms[0].GetSymbol() == "C" one_hot = get_atom_hydrogen_bonding_one_hot(atoms[0], info) assert one_hot == [0.0, 0.0] assert atoms[3].GetSymbol() == "O" one_hot = get_atom_hydrogen_bonding_one_hot(atoms[3], info) assert one_hot == [0.0, 1.0]
def _featurize(self, mol: RDKitMol) -> GraphData: """Calculate molecule graph features from RDKit mol object. Parameters ---------- mol: rdkit.Chem.rdchem.Mol RDKit mol object. Returns ------- graph: GraphData A molecule graph with some features. """ if self.use_partial_charge: try: mol.GetAtomWithIdx(0).GetProp('_GasteigerCharge') except: # If partial charges were not computed try: from rdkit.Chem import AllChem AllChem.ComputeGasteigerCharges(mol) except ModuleNotFoundError: raise ImportError( "This class requires RDKit to be installed.") # construct atom (node) feature h_bond_infos = construct_hydrogen_bonding_info(mol) atom_features = np.asarray( [ _construct_atom_feature(atom, h_bond_infos, self.use_chirality, self.use_partial_charge) for atom in mol.GetAtoms() ], dtype=float, ) # construct edge (bond) index src, dest = [], [] for bond in mol.GetBonds(): # add edge list considering a directed graph start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() src += [start, end] dest += [end, start] # construct edge (bond) feature bond_features = None # deafult None if self.use_edges: features = [] for bond in mol.GetBonds(): features += 2 * [_construct_bond_feature(bond)] bond_features = np.asarray(features, dtype=float) return GraphData(node_features=atom_features, edge_index=np.asarray([src, dest], dtype=int), edge_features=bond_features)
def _featurize(self, mol: RDKitMol) -> GraphData: """Calculate molecule graph features from RDKit mol object. Parameters ---------- mol: rdkit.Chem.rdchem.Mol RDKit mol object. Returns ------- graph: GraphData A molecule graph with some features. """ from rdkit import Chem from rdkit.Chem import AllChem # construct atom and bond features try: mol.GetAtomWithIdx(0).GetProp('_GasteigerCharge') except: # If partial charges were not computed AllChem.ComputeGasteigerCharges(mol) h_bond_infos = construct_hydrogen_bonding_info(mol) sssr = Chem.GetSymmSSSR(mol) # construct atom (node) feature atom_features = np.array( [ _construct_atom_feature(atom, h_bond_infos, sssr) for atom in mol.GetAtoms() ], dtype=np.float, ) # construct edge (bond) information src, dest, bond_features = [], [], [] for bond in mol.GetBonds(): # add edge list considering a directed graph start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() src += [start, end] dest += [end, start] bond_features += 2 * [_construct_bond_feature(bond)] if self.add_self_edges: num_atoms = mol.GetNumAtoms() src += [i for i in range(num_atoms)] dest += [i for i in range(num_atoms)] # add dummy edge features bond_fea_length = len(bond_features[0]) bond_features += num_atoms * [[0 for _ in range(bond_fea_length)]] return GraphData(node_features=atom_features, edge_index=np.array([src, dest], dtype=np.int), edge_features=np.array(bond_features, dtype=np.float))
def test_construct_hydrogen_bonding_info(self): info = construct_hydrogen_bonding_info(self.mol) assert isinstance(info, list) assert isinstance(info[0], tuple) # Generally, =O behaves as an electron acceptor assert info[0] == (3, "Acceptor")
def _featurize(self, datapoint: RDKitMol, **kwargs) -> GraphData: """Calculate molecule graph features from RDKit mol object. Parameters ---------- datapoint: rdkit.Chem.rdchem.Mol RDKit mol object. Returns ------- graph: GraphData A molecule graph with some features. """ assert datapoint.GetNumAtoms( ) > 1, "More than one atom should be present in the molecule for this featurizer to work." if 'mol' in kwargs: datapoint = kwargs.get("mol") raise DeprecationWarning( 'Mol is being phased out as a parameter, please pass "datapoint" instead.' ) if self.use_partial_charge: try: datapoint.GetAtomWithIdx(0).GetProp('_GasteigerCharge') except: # If partial charges were not computed try: from rdkit.Chem import AllChem AllChem.ComputeGasteigerCharges(datapoint) except ModuleNotFoundError: raise ImportError( "This class requires RDKit to be installed.") # construct atom (node) feature h_bond_infos = construct_hydrogen_bonding_info(datapoint) atom_features = np.asarray( [ _construct_atom_feature(atom, h_bond_infos, self.use_chirality, self.use_partial_charge) for atom in datapoint.GetAtoms() ], dtype=float, ) # construct edge (bond) index src, dest = [], [] for bond in datapoint.GetBonds(): # add edge list considering a directed graph start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() src += [start, end] dest += [end, start] # construct edge (bond) feature bond_features = None # deafult None if self.use_edges: features = [] for bond in datapoint.GetBonds(): features += 2 * [_construct_bond_feature(bond)] bond_features = np.asarray(features, dtype=float) return GraphData(node_features=atom_features, edge_index=np.asarray([src, dest], dtype=int), edge_features=bond_features)