def order_atoms(ref_mol, mol): """ Order the atoms in `mol` by the atom order in ref_mol """ if ref_mol is not None and mol is not None: ref_mol_is_iso_copy = ref_mol.copy(deep=True) mol_is_iso_copy = mol.copy(deep=True) ref_mol_find_iso_copy = ref_mol.copy(deep=True) mol_find_iso_copy = mol.copy(deep=True) ref_mol_is_iso_copy = update_molecule(ref_mol_is_iso_copy, to_single_bonds=True) mol_is_iso_copy = update_molecule(mol_is_iso_copy, to_single_bonds=True) ref_mol_find_iso_copy = update_molecule(ref_mol_find_iso_copy, to_single_bonds=True) mol_find_iso_copy = update_molecule(mol_find_iso_copy, to_single_bonds=True) if mol_is_iso_copy.isIsomorphic(ref_mol_is_iso_copy, saveOrder=True): mapping = mol_find_iso_copy.findIsomorphism(ref_mol_find_iso_copy, saveOrder=True) if len(mapping): if isinstance(mapping, list): mapping = mapping[0] index_map = {ref_mol_find_iso_copy.atoms.index(val): mol_find_iso_copy.atoms.index(key) for key, val in mapping.items()} mol.atoms = [mol.atoms[index_map[i]] for i, _ in enumerate(mol.atoms)] else: raise SanitizationError('Could not map molecules {0}, {1}:\n\n{2}\n\n{3}'.format( ref_mol.toSMILES(), mol.toSMILES(), ref_mol.toAdjacencyList(), mol.toAdjacencyList())) else: raise SanitizationError('Could not map non isomorphic molecules {0}, {1}:\n\n{2}\n\n{3}'.format( ref_mol.toSMILES(), mol.toSMILES(), ref_mol.toAdjacencyList(), mol.toAdjacencyList()))
def perceive_smiles(self, atommap=True): """ Using the geometry, perceive the corresponding SMILES with bond orders using Open Babel and RDKit. In order to create a sensible SMILES, first infer the connectivity from the 3D coordinates using Open Babel, then convert to InChI to saturate unphysical multi-radical structures, then convert to RDKit and match the atoms to the ones in self in order to return a SMILES with atom mapping corresponding to the order given by the values of atom.idx for all atoms in self. This method requires Open Babel version >=2.4.1 """ # Get dict of atomic numbers for later comparison. atoms_in_mol_true = {} for atom in self: anum = atom.get_atomicnum() atoms_in_mol_true[anum] = atoms_in_mol_true.get(anum, 0) + 1 # There seems to be no particularly simple way in RDKit to read # in 3D structures, so use Open Babel for this part. RMG doesn't # recognize some single bonds, so we can't use that. # We've probably called to_pybel_mol at some previous time to set # connections, but it shouldn't be too expensive to do it again. pybel_mol = self.to_pybel_mol() # Open Babel will often make single bonds and generate Smiles # that have multiple radicals, which would probably correspond # to double bonds. To get around this, convert to InChI (which # does not consider bond orders) and then convert to Smiles. inchi = pybel_mol.write('inchi', opt={ 'F': None }).strip() # Add fixed H layer # Use RDKit to convert back to Smiles mol_sanitized = Chem.MolFromInchi(inchi) # RDKit doesn't like some hypervalent atoms if mol_sanitized is None: raise SanitizationError( 'Could not convert \n{}\nto Smiles. Unsanitized Smiles: {}'. format(self.to_xyz(), pybel_mol.write('smi').strip())) # RDKit adds unnecessary hydrogens in some cases. If # this happens, give up and return an error. mol_sanitized = Chem.AddHs(mol_sanitized) atoms_in_mol_sani = {} for atom in mol_sanitized.GetAtoms(): atoms_in_mol_sani[atom.GetAtomicNum()] = atoms_in_mol_sani.get( atom.GetAtomicNum(), 0) + 1 if atoms_in_mol_sani != atoms_in_mol_true: raise SanitizationError( 'Could not convert \n{}\nto Smiles. Wrong Smiles: {}'.format( self.to_xyz(), Chem.MolToSmiles(mol_sanitized))) if not atommap: return Chem.MolToSmiles(mol_sanitized) # Because we went through InChI, we lost atom mapping # information. Restore it by matching the original molecule. # There should only be one unique map. mol_with_map = self.to_rdkit_mol() # This only has single bonds mol_sani_sb = Chem.Mol( mol_sanitized) # Make copy with single bonds only for bond in mol_sani_sb.GetBonds(): bond.SetBondType(Chem.rdchem.BondType.SINGLE) match = mol_sani_sb.GetSubstructMatch( mol_with_map) # Isomorphism mapping assert mol_with_map.GetNumAtoms() == len( match) # Make sure we match all atoms for atom in mol_with_map.GetAtoms(): idx = match[atom.GetIdx()] map_num = atom.GetAtomMapNum() mol_sanitized.GetAtomWithIdx(idx).SetAtomMapNum(map_num) # If everything succeeded up to here, we hopefully have a # sensible Smiles string with atom mappings for all atoms. return Chem.MolToSmiles(mol_sanitized)