Example #1
0
def order_atoms(ref_mol, mol):
    """
    Order the atoms in `mol` by the atom order in ref_mol
    """
    if ref_mol is not None and mol is not None:
        ref_mol_is_iso_copy = ref_mol.copy(deep=True)
        mol_is_iso_copy = mol.copy(deep=True)
        ref_mol_find_iso_copy = ref_mol.copy(deep=True)
        mol_find_iso_copy = mol.copy(deep=True)

        ref_mol_is_iso_copy = update_molecule(ref_mol_is_iso_copy, to_single_bonds=True)
        mol_is_iso_copy = update_molecule(mol_is_iso_copy, to_single_bonds=True)
        ref_mol_find_iso_copy = update_molecule(ref_mol_find_iso_copy, to_single_bonds=True)
        mol_find_iso_copy = update_molecule(mol_find_iso_copy, to_single_bonds=True)

        if mol_is_iso_copy.isIsomorphic(ref_mol_is_iso_copy, saveOrder=True):
            mapping = mol_find_iso_copy.findIsomorphism(ref_mol_find_iso_copy, saveOrder=True)
            if len(mapping):
                if isinstance(mapping, list):
                    mapping = mapping[0]
                index_map = {ref_mol_find_iso_copy.atoms.index(val): mol_find_iso_copy.atoms.index(key)
                             for key, val in mapping.items()}
                mol.atoms = [mol.atoms[index_map[i]] for i, _ in enumerate(mol.atoms)]
            else:
                raise SanitizationError('Could not map molecules {0}, {1}:\n\n{2}\n\n{3}'.format(
                    ref_mol.toSMILES(), mol.toSMILES(), ref_mol.toAdjacencyList(), mol.toAdjacencyList()))
        else:
            raise SanitizationError('Could not map non isomorphic molecules {0}, {1}:\n\n{2}\n\n{3}'.format(
                ref_mol.toSMILES(), mol.toSMILES(), ref_mol.toAdjacencyList(), mol.toAdjacencyList()))
Example #2
0
    def perceive_smiles(self, atommap=True):
        """
        Using the geometry, perceive the corresponding SMILES with bond
        orders using Open Babel and RDKit. In order to create a sensible
        SMILES, first infer the connectivity from the 3D coordinates
        using Open Babel, then convert to InChI to saturate unphysical
        multi-radical structures, then convert to RDKit and match the
        atoms to the ones in self in order to return a SMILES with atom
        mapping corresponding to the order given by the values of
        atom.idx for all atoms in self.

        This method requires Open Babel version >=2.4.1
        """

        # Get dict of atomic numbers for later comparison.
        atoms_in_mol_true = {}
        for atom in self:
            anum = atom.get_atomicnum()
            atoms_in_mol_true[anum] = atoms_in_mol_true.get(anum, 0) + 1

        # There seems to be no particularly simple way in RDKit to read
        # in 3D structures, so use Open Babel for this part. RMG doesn't
        # recognize some single bonds, so we can't use that.
        # We've probably called to_pybel_mol at some previous time to set
        # connections, but it shouldn't be too expensive to do it again.
        pybel_mol = self.to_pybel_mol()

        # Open Babel will often make single bonds and generate Smiles
        # that have multiple radicals, which would probably correspond
        # to double bonds. To get around this, convert to InChI (which
        # does not consider bond orders) and then convert to Smiles.
        inchi = pybel_mol.write('inchi', opt={
            'F': None
        }).strip()  # Add fixed H layer

        # Use RDKit to convert back to Smiles
        mol_sanitized = Chem.MolFromInchi(inchi)

        # RDKit doesn't like some hypervalent atoms
        if mol_sanitized is None:
            raise SanitizationError(
                'Could not convert \n{}\nto Smiles. Unsanitized Smiles: {}'.
                format(self.to_xyz(),
                       pybel_mol.write('smi').strip()))

        # RDKit adds unnecessary hydrogens in some cases. If
        # this happens, give up and return an error.
        mol_sanitized = Chem.AddHs(mol_sanitized)
        atoms_in_mol_sani = {}
        for atom in mol_sanitized.GetAtoms():
            atoms_in_mol_sani[atom.GetAtomicNum()] = atoms_in_mol_sani.get(
                atom.GetAtomicNum(), 0) + 1
        if atoms_in_mol_sani != atoms_in_mol_true:
            raise SanitizationError(
                'Could not convert \n{}\nto Smiles. Wrong Smiles: {}'.format(
                    self.to_xyz(), Chem.MolToSmiles(mol_sanitized)))

        if not atommap:
            return Chem.MolToSmiles(mol_sanitized)

        # Because we went through InChI, we lost atom mapping
        # information. Restore it by matching the original molecule.
        # There should only be one unique map.
        mol_with_map = self.to_rdkit_mol()  # This only has single bonds
        mol_sani_sb = Chem.Mol(
            mol_sanitized)  # Make copy with single bonds only
        for bond in mol_sani_sb.GetBonds():
            bond.SetBondType(Chem.rdchem.BondType.SINGLE)
        match = mol_sani_sb.GetSubstructMatch(
            mol_with_map)  # Isomorphism mapping
        assert mol_with_map.GetNumAtoms() == len(
            match)  # Make sure we match all atoms
        for atom in mol_with_map.GetAtoms():
            idx = match[atom.GetIdx()]
            map_num = atom.GetAtomMapNum()
            mol_sanitized.GetAtomWithIdx(idx).SetAtomMapNum(map_num)

        # If everything succeeded up to here, we hopefully have a
        # sensible Smiles string with atom mappings for all atoms.
        return Chem.MolToSmiles(mol_sanitized)