def rdmol_to_data(mol: Mol): assert mol.GetNumConformers() == 1 N = mol.GetNumAtoms() pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float) atomic_number = [] aromatic = [] sp = [] sp2 = [] sp3 = [] num_hs = [] for atom in mol.GetAtoms(): atomic_number.append(atom.GetAtomicNum()) aromatic.append(1 if atom.GetIsAromatic() else 0) hybridization = atom.GetHybridization() sp.append(1 if hybridization == HybridizationType.SP else 0) sp2.append(1 if hybridization == HybridizationType.SP2 else 0) sp3.append(1 if hybridization == HybridizationType.SP3 else 0) z = torch.tensor(atomic_number, dtype=torch.long) row, col, edge_type = [], [], [] for bond in mol.GetBonds(): start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() row += [start, end] col += [end, start] edge_type += 2 * [BOND_TYPES[bond.GetBondType()]] edge_index = torch.tensor([row, col], dtype=torch.long) edge_type = torch.tensor(edge_type) perm = (edge_index[0] * N + edge_index[1]).argsort() edge_index = edge_index[:, perm] edge_type = edge_type[perm] row, col = edge_index hs = (z == 1).to(torch.float) num_hs = scatter(hs[row], col, dim_size=N).tolist() smiles = Chem.MolToSmiles(mol) data = Data(node_type=z, pos=pos, edge_index=edge_index, edge_type=edge_type, rdmol=copy.deepcopy(mol), smiles=smiles) data.nx = to_networkx(data, to_undirected=True) return data
def construct_pos_matrix(mol: rdchem.Mol, out_size: Optional[int] = -1) -> np.ndarray: """Construct relative positions from each atom within the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. out_size: int, optional, default=-1 The size of the returned array. If this option is negative, it does not take any effect. Otherwise, it must be larger than or equal to the number of atoms in the input molecule. If so, the end of the array is padded with zeros. Returns: -------- pos_matrix: np.ndarray, shape=(n,n,3) Relative position (XYZ) coordinates from one atom the others in the mol. Examples: --------- ```python >>> from rdkit import Chem >>> from rdkit.Chem import AllChem >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \ '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2' >>> mol = Chem.MolFromSmiles(smiles) >>> mol = Chem.AddHs(mol, addCoords=True) >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG()) >>> mol = Chem.RemoveHs(mol) >>> pos_matrix = construct_pos_matrix(mol, out_size=-1) >>> pos_matrix.shape (34,34,3) >>> pos_matrix = construct_pos_matrix(mol, out_size=49) >>> pos_matrix.shape (49,49,3) ``` """ # Obtain initial distance geometry between atoms, if unavilable if mol.GetNumConformers() == 0: mol = rdmolops.AddHs(mol, addCoords=True) rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG()) mol = rdmolops.RemoveHs(mol) coords = mol.GetConformer().GetPositions() # shape=(N,3) N = mol.GetNumAtoms() # Determine appropiate output size to generate feature matrix of same size for all mols. if out_size < 0: size = N elif out_size >= N: size = out_size else: raise ValueError( '`out_size` (N={}) is smaller than number of atoms in mol (N={})'. format(out_size, N)) pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float) for atom_idx in range(N): atom_pos = coords[atom_idx] # central atom of interest for neighbor_idx in range(N): neigh_pos = coords[neighbor_idx] # neighboring atom pos_matrix[ atom_idx, neighbor_idx] = atom_pos - neigh_pos # dist between neighbor -> center return pos_matrix
def _get_conformer(mol: rdchem.Mol, conformer: str = "min", algo: str = "MMFF") -> rdchem.Mol: """Get molecule conformer from PDB file based on parameters provided. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest, ideally with mutiple conformers. conformer: str, optional, default="min" Which conformer to select for 3D coordinates. If "min" (or "max"), then the conformer with the min (or max) energy is selected. If "first" or "last", then the first or last conformer is selected. If "avg", then the average position of all the conformers are averaged. algo: str, optional, default="MMFF" Which force field algorithm to optimize the coordinates with. Read rdkit description to determine which one is best suited for your application. Returns: -------- mol: rdkit.Chem.rdchem.Mol Molecule with conformer of interest. """ forcefields = { "MMFF": rdForceFieldHelpers.MMFFOptimizeMoleculeConfs, "UFF": rdForceFieldHelpers.UFFOptimizeMoleculeConfs } if conformer == "min": # Get idx of lowest energy conformation idx = np.argmin(forcefields[algo](mol, maxIters=0), axis=0)[1] conf = mol.GetConformers()[idx] elif conformer == "max": # Get idx of highest energy conformation idx = np.argmax(forcefields[algo](mol, maxIters=0), axis=0)[1] conf = mol.GetConformers()[idx] elif conformer == "first": conf = mol.GetConformer(0) elif conformer == "last": conf = mol.GetConformer(mol.GetNumConformers() - 1) elif conformer == "avg": allpos = [conf.GetPositions() for conf in mol.GetConformers()] avgpos = np.average(allpos, axis=0) # Set avg position as new position for all atoms conf = mol.GetConformer(0) for atom_idx in range(conf.GetNumAtoms()): atom_coords = avgpos[atom_idx] conf.SetAtomPosition(atom_idx, Point3D(atom_coords[0], \ atom_coords[1], atom_coords[2])) else: available_confs = ["min", "max", "first", "last", "avg"] raise ValueError( f"Cannot get `{conformer}` conformer. Choose from the " f"following {available_confs} conformer(s).") # Save conformer, with the position specified mol.RemoveAllConformers() mol.AddConformer(conf) return mol