예제 #1
0
    def copy_origins(cls, annotated: Chem.Mol, target: Chem.Mol):
        """
        Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx.
        However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped.

        :param annotated:
        :param target:
        :return: a list of origins
        """
        mcs = rdFMCS.FindMCS([target, annotated],
                             atomCompare=rdFMCS.AtomCompare.CompareElements,
                             bondCompare=rdFMCS.BondCompare.CompareAny,
                             ringMatchesRingOnly=True)
        common = Chem.MolFromSmarts(mcs.smartsString)
        dmapping = dict(
            zip(target.GetSubstructMatch(common),
                annotated.GetSubstructMatch(common)))
        origins = []
        for i in range(target.GetNumAtoms()):
            if i in dmapping:
                atom = annotated.GetAtomWithIdx(dmapping[i])
                tatom = target.GetAtomWithIdx(i)
                o = cls._get_origin(atom)
                tatom.SetProp('_Origin', json.dumps(o))
        return origins
예제 #2
0
    def copy_all_possible_origins(cls, annotated: Chem.Mol, target: Chem.Mol) -> Tuple[List[Chem.Mol], List[List[int]]]:
        """
        Monster leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx.
        However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped.

        :param annotated:
        :param target:
        :return: a list of mols and a list of orgins (a list too)
        """
        mcs = rdFMCS.FindMCS([target, annotated],
                             atomCompare=rdFMCS.AtomCompare.CompareElements,
                             bondCompare=rdFMCS.BondCompare.CompareAny,
                             ringMatchesRingOnly=True)
        common = Chem.MolFromSmarts(mcs.smartsString)
        options = []
        originss = []
        for target_match in target.GetSubstructMatches(common):
            for anno_match in annotated.GetSubstructMatches(common):
                dmapping = dict(zip(target_match, anno_match))
                origins = []
                option = Chem.Mol(target)
                for i in range(option.GetNumAtoms()):
                    if i in dmapping:
                        atom = annotated.GetAtomWithIdx(dmapping[i])
                        tatom = option.GetAtomWithIdx(i)
                        o = cls._get_origin(atom)
                        tatom.SetProp('_Origin', json.dumps(o))
                        xyz = cls._get_xyz(atom)
                        if xyz:
                            cls._set_xyz(tatom, xyz)
                options.append(option)
                originss.append(origins)
        return options, originss
예제 #3
0
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix:
    """Calculate weighted Burden matrix and eigenvalues."""
    mol = Chem.AddHs(mol)
    Natom = mol.GetNumAtoms()
    AdMatrix = Chem.GetAdjacencyMatrix(mol)
    bondindex = numpy.argwhere(AdMatrix)
    AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32)
    # The diagonal elements of B, Bii, are either given by
    # the carbon normalized atomic mass,
    # van der Waals volume, Sanderson electronegativity,
    # and polarizability of atom i.
    for i in range(Natom):
        atom = mol.GetAtomWithIdx(i)
        temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel)
        AdMatrix1[i, i] = round(temp, 3)
    # The element of B connecting atoms i and j, Bij,
    # is equal to the square root of the bond
    # order between atoms i and j.
    for i in bondindex:
        bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1]))
        if bond.GetBondType().name == 'SINGLE':
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3)
        if bond.GetBondType().name == "DOUBLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3)
        if bond.GetBondType().name == "TRIPLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3)
        if bond.GetBondType().name == "AROMATIC":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3)
    # All other elements of B (corresponding non bonded
    # atom pairs) are set to 0.001
    bondnonindex = numpy.argwhere(AdMatrix == 0)
    for i in bondnonindex:
        if i[0] != i[1]:
            AdMatrix1[i[0], i[1]] = 0.001
    return numpy.real(numpy.linalg.eigvals(AdMatrix1))
예제 #4
0
def __extract_subgraph(mol: Chem.Mol, selected_atoms: Set[int]) -> Tuple[Chem.Mol, List[int]]:
    selected_atoms = set(selected_atoms)
    roots = []
    for idx in selected_atoms:
        atom = mol.GetAtomWithIdx(idx)
        bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms]
        if len(bad_neis) > 0:
            roots.append(idx)

    new_mol = Chem.RWMol(mol)

    for atom_idx in roots:
        atom = new_mol.GetAtomWithIdx(atom_idx)
        atom.SetAtomMapNum(1)
        aroma_bonds = [bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC]
        aroma_bonds = [bond for bond in aroma_bonds if
                       bond.GetBeginAtom().GetIdx() in selected_atoms and bond.GetEndAtom().GetIdx() in selected_atoms]
        if len(aroma_bonds) == 0:
            atom.SetIsAromatic(False)

    remove_atoms = [atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms]
    remove_atoms = sorted(remove_atoms, reverse=True)
    for atom in remove_atoms:
        new_mol.RemoveAtom(atom)

    return new_mol.GetMol(), roots
예제 #5
0
    def from_annotated_mols(cls,
                  annotated_followup: Chem.Mol,
                  hits: Sequence[Chem.Mol]
                  ) -> mRSMD:
        """
        Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx.
        This classmethod accepts a followup with has this.

        :param annotated_followup:
        :param hits:
        :return:
        """
        mappings = []
        for h, hit in enumerate(hits):
            hname = hit.GetProp('_Name')
            mapping = []
            if hname == '':
                print(f'{hit} has no name!')
            else:
                for i in range(annotated_followup.GetNumAtoms()):
                    atom = annotated_followup.GetAtomWithIdx(i)
                    for oel in cls._get_origin(atom):
                        rex = re.match(hname+'\.(\d+)', oel)
                        if rex is not None:
                            h = int(rex.group(1))
                            mapping.append((i, h))
            mappings.append(mapping)
        return cls(annotated_followup, hits, mappings)
예제 #6
0
    def _categorise(self, mol: Chem.Mol,
                    uniques: set) -> Dict[str, Union[set, Dict]]:
        """
        What do the novel atoms do in terms of connectivity.
        Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms.

        * ``uniques`` are set of atoms to classify on
        * ``internals`` are unique atoms that are connected solely to unique atoms
        * ``attachments`` are non-unique atoms to which a unique atom connects
        * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type.

        :param mol: molecule to describe
        :param uniques: set of indices that are new to this molecule
        :return:
        """
        #
        pairs = {}
        internals = set()
        attachments = set()
        dummies = set()
        for i in uniques:  # novel atoms
            unique_atom = mol.GetAtomWithIdx(i)
            if unique_atom.GetSymbol() == self.dummy_symbol:
                dummies.add(i)
            neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()}
            if len(neighbours - uniques
                   ) == 0:  # unlessone of the connections is not unique.
                internals.add(i)
            else:
                i_attached = neighbours - uniques
                attachments |= i_attached
                pairs[i] = [{
                    'idx': j,
                    'type': mol.GetBondBetweenAtoms(i, j).GetBondType()
                } for j in i_attached]
        anchors = uniques - internals
        # store for safekeeping
        for atom in mol.GetAtoms():
            i = atom.GetIdx()
            if i in internals:  # novel and not connected
                atom.SetProp('_Category', 'internal')
            elif i in attachments:  # not-novel but connected
                atom.SetProp('_Category', 'overlapping-attachment')
            elif i in pairs:  # dict not set tho
                atom.SetProp('_Category', 'internal-attachment')
            else:  # overlapping
                atom.SetProp('_Category', 'overlapping')
        # if self._debug_draw: # depracated... but this could be useful...
        #     high = list(internals) + list(attachments) + list(anchors)
        #     color = {**{i: (0, 0.8, 0) for i in internals},
        #              **{i: (0, 0, 0.8) for i in attachments},
        #              **{i: (0.8, 0, 0.8) for i in anchors}}
        #     print('Purple: anchor atoms, Blue: attachments, Green: internals')
        #     self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color)
        #     print({atom.GetIdx(): atom.GetProp('_Category') for atom in mol.GetAtoms()})
        return dict(uniques=uniques,
                    internals=internals,
                    attachments=attachments,
                    pairs=pairs,
                    dummies=dummies)
예제 #7
0
파일: moran.py 프로젝트: OlivierBeq/chemopy
def _CalculateMoranAutocorrelation(mol: Chem.Mol, lag: int = 1, propertylabel: str = 'm') -> float:
    """Calculate weighted Moran autocorrelation descriptors.

    :param lag: topological distance between atom i and atom j.
    :param propertylabel: type of weighted property
    """
    Natom = mol.GetNumAtoms()
    prolist = []
    for i in mol.GetAtoms():
        temp = GetRelativeAtomicProperty(i.GetSymbol(), propertyname=propertylabel)
        prolist.append(temp)
    aveweight = sum(prolist) / Natom
    tempp = [numpy.square(x - aveweight) for x in prolist]
    GetDistanceMatrix = Chem.GetDistanceMatrix(mol)
    res = 0.0
    index = 0
    for i in range(Natom):
        for j in range(Natom):
            if GetDistanceMatrix[i, j] == lag:
                atom1 = mol.GetAtomWithIdx(i)
                atom2 = mol.GetAtomWithIdx(j)
                temp1 = GetRelativeAtomicProperty(element=atom1.GetSymbol(), propertyname=propertylabel)
                temp2 = GetRelativeAtomicProperty(element=atom2.GetSymbol(), propertyname=propertylabel)
                res = res + (temp1 - aveweight) * (temp2 - aveweight)
                index += 1
            else:
                res = res + 0.0
    if sum(tempp) == 0 or index == 0:
        result = 0
    else:
        result = (res / index) / (sum(tempp) / Natom)
    return round(result, 3)
예제 #8
0
    def find_symmetry_classes(rdkit_mol: Chem.Mol) -> Dict[int, str]:
        """
        Generate list of tuples of symmetry-equivalent (homotopic) atoms in the molecular graph
        based on: https://sourceforge.net/p/rdkit/mailman/message/27897393/
        Our thanks to Dr Michal Krompiec for the symmetrisation method and its implementation.
        :param rdkit_mol: molecule to find symmetry classes for (rdkit mol class object)
        :return: A dict where the keys are the atom indices and the values are their type
        (type is arbitrarily based on index; only consistency is needed, no specific values)
        """

        # Check CIPRank is present for first atom (can assume it is present for all afterwards)
        if not rdkit_mol.GetAtomWithIdx(0).HasProp("_CIPRank"):
            Chem.AssignStereochemistry(rdkit_mol,
                                       cleanIt=True,
                                       force=True,
                                       flagPossibleStereoCenters=True)

        # Array of ranks showing matching atoms
        cip_ranks = np.array(
            [int(atom.GetProp("_CIPRank")) for atom in rdkit_mol.GetAtoms()])

        # Map the ranks to the atoms to produce a list of symmetrical atoms
        atom_symmetry_classes = [
            np.where(cip_ranks == rank)[0].tolist()
            for rank in range(max(cip_ranks) + 1)
        ]

        # Convert from list of classes to dict where each key is an atom and each value is its class (just a str)
        atom_symmetry_classes_dict = {}
        # i will be used to define the class (just index based)
        for i, sym_class in enumerate(atom_symmetry_classes):
            for atom in sym_class:
                atom_symmetry_classes_dict[atom] = str(i)

        return atom_symmetry_classes_dict
예제 #9
0
def subset_rdmol(rdmol: Chem.Mol,
                atom_indices: Iterable[int],
                check_bonds: bool = True,
                return_atom_indices: bool = False) -> Chem.Mol:
    rdmol = Chem.RWMol(rdmol)
    to_remove = [i for i in range(rdmol.GetNumAtoms()) if i not in atom_indices]
    if check_bonds:
        multiple_bonds = []
        # check bonds
        for i in to_remove:
            atom = rdmol.GetAtomWithIdx(i)
            n_bonds = 0
            for bond in atom.GetBonds():
                other = bond.GetOtherAtomIdx(i)
                if other in atom_indices:
                    n_bonds += 1
            if n_bonds > 1:
                multiple_bonds.append(i)
        atom_indices = sorted(atom_indices + multiple_bonds)
        to_remove = [i for i in to_remove if i not in multiple_bonds]
    for i in to_remove[::-1]:
        rdmol.RemoveAtom(i)
    rdmol.UpdatePropertyCache()
    if return_atom_indices:
        return rdmol, atom_indices
    return rdmol
예제 #10
0
def __extract_subgraph(mol: Chem.Mol, selected_atoms: Set[int]) -> Tuple[Chem.Mol, List[int]]:
    """
    Extracts a subgraph from an RDKit molecule given a set of atom indices.

    :param mol: An RDKit molecule from which to extract a subgraph.
    :param selected_atoms: The atoms which form the subgraph to be extracted.
    :return: A tuple containing an RDKit molecule representing the subgraph
             and a list of root atom indices from the selected indices.
    """
    selected_atoms = set(selected_atoms)
    roots = []
    for idx in selected_atoms:
        atom = mol.GetAtomWithIdx(idx)
        bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms]
        if len(bad_neis) > 0:
            roots.append(idx)

    new_mol = Chem.RWMol(mol)

    for atom_idx in roots:
        atom = new_mol.GetAtomWithIdx(atom_idx)
        atom.SetAtomMapNum(1)
        aroma_bonds = [bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC]
        aroma_bonds = [bond for bond in aroma_bonds if
                       bond.GetBeginAtom().GetIdx() in selected_atoms and bond.GetEndAtom().GetIdx() in selected_atoms]
        if len(aroma_bonds) == 0:
            atom.SetIsAromatic(False)

    remove_atoms = [atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms]
    remove_atoms = sorted(remove_atoms, reverse=True)
    for atom in remove_atoms:
        new_mol.RemoveAtom(atom)

    return new_mol.GetMol(), roots
예제 #11
0
def _CalculateEState(mol: Chem.Mol, skipH: bool = True) -> float:
    """Get the EState value of each atom in the molecule."""
    mol = Chem.AddHs(mol)
    if skipH:
        mol = Chem.RemoveHs(mol)
    tb1 = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        d = at.GetDegree()
        if d > 0:
            h = at.GetTotalNumHs()
            dv = tb1.GetNOuterElecs(atNum) - h
            # dv=numpy.array(_AtomHKDeltas(at),'d')
            N = _GetPrincipleQuantumNumber(atNum)
            Is[i] = (4.0 / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0)
    dists += 1
    accum = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                temp = (Is[i] - Is[j]) / (p * p)
                accum[i] += temp
                accum[j] -= temp
    res = accum + Is
    return res
예제 #12
0
    def rename_from_template(self, template: Chem.Mol, overwrite: bool = True):
        """
        Assigns to the atoms in self.mol the names based on the template, which does not need to be a perfect match.
        See ``_fix_atom_names`` for example usage.
        Does not change the Params.

        :param template: mol object with atom names

        :return: None for now.
        """
        AllChem.SanitizeMol(template)  #this is where half my issues come from.
        mcs = rdFMCS.FindMCS([self.mol, template],
                             atomCompare=rdFMCS.AtomCompare.CompareElements,
                             bondCompare=rdFMCS.BondCompare.CompareAny,
                             ringMatchesRingOnly=True)
        common = Chem.MolFromSmarts(mcs.smartsString)
        for acceptor, donor in zip(self.mol.GetSubstructMatch(common),
                                   template.GetSubstructMatch(common)):
            a_atom = self.mol.GetAtomWithIdx(acceptor)
            d_atom = template.GetAtomWithIdx(donor)
            info = d_atom.GetPDBResidueInfo()
            if info:
                self.rename_atom(a_atom, info.GetName(), overwrite=overwrite)
            else:
                self.log.debug.info(
                    f'No info in template for atom {d_atom.GetSymbol()} #{donor}'
                )
예제 #13
0
    def guess_origins(self,
                      mol: Chem.Mol = None,
                      hits: Optional[List[Chem.Mol]] = None):
        """
        Given a positioned mol guess its origins...

        :param mol:
        :return:
        """

        if hits is None:
            hits = self.hits
        mappings = []
        for h, hit in enumerate(hits):
            hname = hit.GetProp('_Name')
            for hi, mi in self.get_positional_mapping(hit, mol).items():
                atom = mol.GetAtomWithIdx(mi)
                if atom.HasProp('_Novel') and atom.GetBoolProp(
                        '_Novel') == True:
                    continue  # flagged to avoid.
                elif atom.HasProp(
                        '_Origin') and atom.GetProp('_Origin') != 'none':
                    origin = json.loads(atom.GetProp('_Origin'))
                else:
                    origin = []
                origin.append(f'{hname}.{hi}')
                atom.SetProp('_Origin', json.dumps(origin))
예제 #14
0
    def _pre_fragment_pairs(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, A2B_mapping: Optional = None) \
            -> Dict[int, List[Dict]]:
        """
        Returns

            {4: [{'idx': 5,
                   'type': rdkit.Chem.rdchem.BondType.SINGLE,
                   'idx_F': 5,
                   'idx_S': 1}], ...}

        which is slight more than {5: [{'idx': 4, 'type': rdkit.Chem.rdchem.BondType.SINGLE}], ... from categories

        idx_F: fragmentanda index
        idx_S: scaffold index

        required for self.merge, the key is the index of anchoring atom.

        Calls get_positional_mapping and _categorise.

        :param scaffold: mol to be added to.
        :param fragmentanda: mol to be fragmented
        :param A2B_mapping: see ``get_positional_mapping``
        :return:
        """
        # get A2B mapping
        if A2B_mapping is None:
            A2B_mapping = self.get_positional_mapping(scaffold, fragmentanda)
        get_key = lambda d, v: list(d.keys())[list(d.values()).index(v)]
        if len(A2B_mapping) == 0:
            raise ConnectionError('No overlap!')
        # store alternative atom symbols.
        for si, fi in A2B_mapping.items():
            sa = scaffold.GetAtomWithIdx(si)
            sn = sa.GetSymbol()
            fn = fragmentanda.GetAtomWithIdx(fi).GetSymbol()
            if sn != fn:
                sa.SetProp('_AltSymbol', fn)
        # prepare.
        uniques = set(range(fragmentanda.GetNumAtoms())) - set(
            A2B_mapping.values())
        categories = self._categorise(fragmentanda, uniques)
        pairs = categories['pairs']
        for p in pairs:  # pairs:Dict[List[Dict]]
            for pp in pairs[p]:
                pp['idx_F'] = pp['idx']  # less ambiguous: fragmentanda index
                pp['idx_S'] = get_key(A2B_mapping, pp['idx'])  # scaffold index
        return pairs
    def find_closest_to_ligand(cls, pdb: Chem.Mol, ligand_resn: str) -> Tuple[Chem.Atom, Chem.Atom]:
        """
        Find the closest atom to the ligand

        :param pdb: a rdkit Chem object
        :param ligand_resn: 3 letter code
        :return: tuple of non-ligand atom and ligand atom
        """
        ligand = [atom.GetIdx() for atom in pdb.GetAtoms() if atom.GetPDBResidueInfo().GetResidueName() == ligand_resn]
        dm = Chem.Get3DDistanceMatrix(pdb)
        mini = np.take(dm, ligand, 0)
        mini[mini == 0] = np.nan
        mini[:, ligand] = np.nan
        a, b = np.where(mini == np.nanmin(mini))
        lig_atom = pdb.GetAtomWithIdx(ligand[int(a[0])])
        nonlig_atom = pdb.GetAtomWithIdx(int(b[0]))
        return (nonlig_atom, lig_atom)
예제 #16
0
파일: decomp.py 프로젝트: zhedian/RMG-Py
def _get_substruct(mol: Chem.Mol, atoms: List[int]) -> str:
    """Convert a list of atom indices to a substructure."""
    if mol.GetNumAtoms() == 1:
        smiles = _mol_to_smi(mol)
    else:
        # For single-atom cliques, we want the substructure to contain its neighbors
        if len(atoms) == 1:
            atoms = atoms[:]
            atoms.extend([nei.GetIdx() for nei in mol.GetAtomWithIdx(atoms[0]).GetNeighbors()])
        smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
    return _mol_to_smi(_copy_mol(Chem.MolFromSmiles(smiles, sanitize=False)))
예제 #17
0
def _CalculateAtomEState(mol: Chem.Mol, AtomicNum=6) -> float:
    """Calculate the sum of the EState indices over all atoms with specified atomic number."""
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    Estate = _CalculateEState(mol)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        if atNum == AtomicNum:
            Is[i] = Estate[i]
    res = sum(Is)
    return res
    def _categorise(self, mol: Chem.Mol,
                    uniques: set) -> Dict[str, Union[set, Dict]]:
        """
        What do the novel atoms do in terms of connectivity.
        Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms.

        * ``uniques`` are set of atoms to classify on
        * ``internals`` are unique atoms that are connected solely to unique atoms
        * ``attachments`` are non-unique atoms to which a unique atom connects
        * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type.

        :param mol: molecule to describe
        :param uniques: set of indices that are new to this molecule
        :return:
        """
        #
        pairs = {}
        internals = set()
        attachments = set()
        dummies = set()
        for i in uniques:
            unique_atom = mol.GetAtomWithIdx(i)
            if unique_atom.GetSymbol() == self.dummy_symbol:
                dummies.add(i)
            neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()}
            if len(neighbours - uniques) == 0:
                internals.add(i)
            else:
                i_attached = neighbours - uniques
                attachments |= i_attached
                pairs[i] = [{
                    'idx': j,
                    'type': mol.GetBondBetweenAtoms(i, j).GetBondType()
                } for j in i_attached]
        anchors = uniques - internals
        if self._debug_draw:
            high = list(internals) + list(attachments) + list(anchors)
            color = {
                **{i: (0, 0.8, 0)
                   for i in internals},
                **{i: (0, 0, 0.8)
                   for i in attachments},
                **{i: (0.8, 0, 0.8)
                   for i in anchors}
            }
            self.draw_nicely(mol,
                             highlightAtoms=high,
                             highlightAtomColors=color)
        return dict(uniques=uniques,
                    internals=internals,
                    attachments=attachments,
                    pairs=pairs,
                    dummies=dummies)
예제 #19
0
    def process_mol(self, test_mol: Chem.Mol) -> list:
        """
        Decompose molecule in sidechains
        @param test_mol: input molecule
        @return: list of R-groups as SMILES
        """
        # The subgraph match of the scaffold onto the molecule
        match_list = test_mol.GetSubstructMatches(self.rg_mol, False)
        if len(match_list) == 0:
            return []

        # Loop over matches to take care of all symmetry mappings
        rgroup_smiles_lst = []
        for match_idx, lst in enumerate(match_list):
            [atm.SetAtomMapNum(0) for atm in test_mol.GetAtoms()]
            match_set = set(lst)
            # map atom map numbers from the scaffold to the molecule
            for test_idx, query_idx in zip(lst, self.rg_map_lst):
                match_atm = test_mol.GetAtomWithIdx(test_idx)
                match_atm.SetAtomMapNum(query_idx)
                # Push the atom map numbers to the non-scaffold neighbors
                for nbr in match_atm.GetNeighbors():
                    if nbr.GetAtomMapNum() == 0 and (int(nbr.GetIdx())
                                                     not in match_set):
                        nbr.SetAtomMapNum(query_idx)
            # Delete the scaffold, should only leave labeled R-groups
            rgroup_mol = Chem.DeleteSubstructs(test_mol, self.rg_mol)
            for atm in rgroup_mol.GetAtoms():
                # Get rid of implicit hydrogens on the terminal atoms of the substituents
                if atm.GetAtomMapNum() > 0:
                    atm.SetNoImplicit(True)
            # Initialize a list of hydrogen substituents [[H:1],[H:2],...]
            rgroup_smiles_lst.append(
                ["[H][*:%d]" % x for x in self.rg_idx_lst])
            # Loop over substituents and place them in the appropriate place in the list
            for frag in Chem.GetMolFrags(rgroup_mol,
                                         asMols=True,
                                         sanitizeFrags=False):
                frag_idx = get_fragment_idx(frag)
                # This enables us to skip over stray fragments that may not have R-group labels
                if frag_idx > 0:
                    new_frag = grow_rgroup_atoms(frag)
                    rgroup_smiles_lst[match_idx][frag_idx -
                                                 1] = Chem.MolToSmiles(
                                                     new_frag, True)
        # Here's where we handle symmetry mapping. There may be multiple ways to map the scaffold onto
        # the molecule.  We want to pick the mapping that results in the largest number of non-hydrogen
        # R-groups.  Calculate the number of hydrogens used as rgroups. Sort to put the mapping with
        # the largest number of non-hydrogen R-groups first.
        augmented_list = [(count_hydrogens(x), x) for x in rgroup_smiles_lst]
        augmented_list.sort(key=itemgetter(0))
        return augmented_list[0][1]
 def _recruit_team(self,
                   mol: Chem.Mol,
                   starting: set,
                   categories: dict,
                   team: Optional[set] = None) -> set:
     if team is None:
         team = set()
     team.add(starting)
     for atom in mol.GetAtomWithIdx(starting).GetNeighbors():
         i = atom.GetIdx()
         if i in categories['internals'] and i not in team:
             team = self._recruit_team(mol, i, categories, team)
     return team
예제 #21
0
 def _recruit_team(self,
                   mol: Chem.Mol,
                   starting: int,
                   uniques: set,
                   team: Optional[set] = None) -> set:
     if team is None:
         team = set()
     team.add(starting)
     for atom in mol.GetAtomWithIdx(starting).GetNeighbors():
         i = atom.GetIdx()
         if i in uniques and i not in team:
             team = self._recruit_team(mol, i, uniques, team)
     return team
예제 #22
0
    def mol_with_atom_index(mol: Chem.Mol) -> Chem.Mol:
        """
        Returns a copy of the molecule that when displayed shows atom indices.

        :param mol: target molecule
        :return: labelled molecule
        """
        cp = Chem.Mol(mol)
        atoms = cp.GetNumAtoms()
        for idx in range(atoms):
            cp.GetAtomWithIdx(idx).SetProp(
                'molAtomMapNumber', str(mol.GetAtomWithIdx(idx).GetIdx()))
        return cp
예제 #23
0
def CalculateChiv4pc(mol: Chem.Mol) -> float:
    """Calculate valence molecular connectivity chi index for path/cluster."""
    accum = 0.0
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    patt = Chem.MolFromSmarts('*~*(~*)~*~*')
    HPatt = mol.GetSubstructMatches(patt)
    for cluster in HPatt:
        deltas = [_AtomHallKierDeltas(mol.GetAtomWithIdx(x)) for x in cluster]
        while 0 in deltas:
            deltas.remove(0)
        if deltas != []:
            deltas1 = numpy.array(deltas, numpy.float)
            accum = accum + 1. / numpy.sqrt(deltas1.prod())
    return accum
예제 #24
0
 def mol2xyz_by_confid(molecule: Mol,
                       prefix='rdmol',
                       confid=0,
                       comment_line=''):
     natoms = molecule.GetNumAtoms()
     filename = "{}_{}.xyz".format(prefix, confid)
     s = "{}\n{}\n".format(natoms, comment_line)
     for i in range(natoms):
         position = molecule.GetConformer(confid).GetAtomPosition(i)
         symbol = molecule.GetAtomWithIdx(i).GetSymbol()
         s += "{}\t{:.6} {:.6} {:.6}\n".format(symbol, position.x,
                                               position.y, position.z)
     with open(filename, 'w') as f:
         f.write(s)
예제 #25
0
    def get_combined_rmsd(cls,
                          followup_moved: Chem.Mol,
                          followup_placed: Optional[Chem.Mol] = None,
                          hits: Optional[List[Chem.Mol]] = None) -> float:
        """
        Depracated.
        The inbuilt RMSD calculations in RDKit align the two molecules, this does not align them.
        This deals with the case of multiple hits.
        For euclidean distance the square root of the sum of the differences in each coordinates is taken.
        For a regular RMSD the still-squared distance is averaged before taking the root.
        Here the average is done across all the atom pairs between each hit and the followup.
        Therefore, atoms in followup that derive in the blended molecule by multiple atom are scored multiple times.

        As a classmethod ``followup_placed`` and ``hits`` must be provided. But as an instance method they don't.

        :param followup_moved: followup compound moved by Igor or similar
        :param followup_placed: followup compound as placed by Monster
        :param hits: list of hits.
        :return: combined RMSD
        """
        # class or instance?
        if followup_placed is None:  # instance
            assert hasattr(
                cls, '__class__'
            ), 'if called as a classmethod the list of hits need to be provided.'
            followup_placed = cls.positioned_mol
        if hits is None:  # instance
            assert hasattr(
                cls, '__class__'
            ), 'if called as a classmethod the list of hits need to be provided.'
            hits = cls.hits
        for i in range(followup_placed.GetNumAtoms()):
            assert followup_placed.GetAtomWithIdx(
                i).GetSymbol() == followup_moved.GetAtomWithIdx(
                    i).GetSymbol(), 'The atoms order is changed.'
        if followup_moved.GetNumAtoms() > followup_placed.GetNumAtoms():
            warn(
                f'Followup moved {followup_moved.GetNumAtoms()} has more atoms that followup placed {followup_placed.GetNumAtoms()}. Assuming these are hydrogens.'
            )
        # calculate
        tatoms = 0
        d = 0
        for hit in hits:
            mapping = list(
                cls.get_positional_mapping(followup_placed, hit).items())
            tatoms += len(mapping)
            if len(mapping) == 0:
                continue
            d += cls._get_square_deviation(followup_moved, hit, mapping)
        return d / tatoms**0.5
    def copy_names(cls, acceptor_mol: Chem.Mol, donor_mol: Chem.Mol):
        """
        Copy names form donor to acceptor by finding MCS.
        Does it properly and uses ``PDBResidueInfo``.

        :param acceptor_mol: needs atomnames
        :param donor_mol: has atomnames
        :return:
        """
        mcs = rdFMCS.FindMCS([acceptor_mol, donor_mol],
                             atomCompare=rdFMCS.AtomCompare.CompareElements,
                             bondCompare=rdFMCS.BondCompare.CompareOrder,
                             ringMatchesRingOnly=True)
        common = Chem.MolFromSmarts(mcs.smartsString)
        pos_match = acceptor_mol.positioned_mol.GetSubstructMatch(common)
        pdb_match = donor_mol.GetSubstructMatch(common)
        for m, p in zip(pos_match, pdb_match):
            ma = acceptor_mol.GetAtomWithIdx(m)
            pa = donor_mol.GetAtomWithIdx(p)
            assert ma.GetSymbol() == pa.GetSymbol(), 'The indices do not align! ' + \
                                                     f'{ma.GetIdx()}:{ma.GetSymbol()} vs. ' + \
                                                     f'{pa.GetIdx()}:{pa.GetSymbol()}'
            ma.SetMonomerInfo(pa.GetPDBResidueInfo())
예제 #27
0
def conformer_to_xyz(molecule: Mol, conf_id=0, comment=None) -> str:
    num_atoms = molecule.GetNumAtoms()
    string = f'{num_atoms}\n'

    if comment:
        string += comment

    conformer = molecule.GetConformer(conf_id)

    for atom_idx in range(molecule.GetNumAtoms()):
        atom = molecule.GetAtomWithIdx(atom_idx)
        position = conformer.GetAtomPosition(atom_idx)
        string += f'\n{atom.GetSymbol()} {position.x} {position.y} {position.z}'

    return string
예제 #28
0
    def assign_features(self, mol: Mol):
        for atom in mol.GetAtoms():
            atom.SetProp('_Feature_Acceptor', '0')
            atom.SetProp('_Feature_Donor', '0')

        features = self.feature_factory.GetFeaturesForMol(mol)

        for feature in features:
            family = feature.GetFamily()
            for atom_idx in feature.GetAtomIds():
                atom = mol.GetAtomWithIdx(atom_idx)
                if family == 'Acceptor':
                    atom.SetProp('_Feature_Acceptor', '1')
                elif family == 'Donor':
                    atom.SetProp('_Feature_Donor', '1')
예제 #29
0
def convert_to_graph(mol: Chem.Mol, scaffold_ids: t.Tuple[int],
                     anchors: t.Dict[int, int], hba_ids: t.Tuple[int],
                     hbd_ids: t.Tuple[int]) -> nx.Graph:
    """
    Convert `Chem.Mol` object to `nx.Graph` object

    Args:
        mol (Chem.Mol):
            The molecule object to be converted
        scaffold_ids (t.Tuple[int]):
            The atom that corresponds to scaffolds
        anchors (t.Dict[int, int]):
            The mapping from atom in the molecule to atom in scaffold where it
            is attached to
        hba_ids (t.Tuple[int]):
            The atoms corresponding to hydrogen acceptors
        hbd_ids (t.Tuple[int]):
            The atoms corresponding to hydrogen donnors

    Returns:
        nx.Graph:
            The graph converted
    """
    # Initialize graph
    graph = nx.Graph()
    # Add nodes
    nodes = range(mol.GetNumAtoms())
    graph.add_nodes_from(nodes)
    # Add edges
    bond: Chem.Bond
    edges = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
             for bond in mol.GetBonds()]
    graph.add_edges_from(edges)
    # Attach properties to nodes
    for node_id in nodes:
        atom_i: Chem.Atom = mol.GetAtomWithIdx(node_id)
        graph.nodes[node_id]['symbol'] = atom_i.GetSymbol()
    for node_id in anchors:
        graph.nodes[node_id]['anchor'] = anchors[node_id]
    for node_id in hba_ids:
        graph.nodes[node_id]['is_hba'] = True
    for node_id in hbd_ids:
        graph.nodes[node_id]['is_hbd'] = True
    for node_id in scaffold_ids:
        graph.nodes[node_id]['is_scaffold'] = True

    return graph
예제 #30
0
def get_scaffold_anchors(mol: Chem.Mol,
                         scaffold_ids: t.Tuple[int]) -> t.Dict[int, int]:
    """
    Get the indices of atom directly connected with the scaffold. The
    dictionary returned maps the indices of directly connected atoms in the
    molecules to the index of the anchor atom in the scaffold
    """
    anchors = {}
    for scaffold_id, scaffold_id_in_mol in enumerate(scaffold_ids):
        scaffold_atom: Chem.Atom
        scaffold_atom = mol.GetAtomWithIdx(scaffold_id_in_mol)
        neighbor: Chem.Atom
        for neighbor in scaffold_atom.GetNeighbors():
            neighbor_id = neighbor.GetIdx()
            if neighbor_id not in anchors:
                anchors[neighbor_id] = scaffold_id
    return anchors