Example #1
0
def renumberResidues(mol):
    mem = {}
    CHAINS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    chains = []
    for atom in mol.GetAtoms():
        res = atom.GetPDBResidueInfo()
        chainId = res.GetChainId()
        if chainId not in chains:
            chains.append(chainId)
    chainMap = {}
    for i, chain in enumerate(chains):
        chainMap[chain] = CHAINS[i]
    for atom in mol.GetAtoms():
        res = atom.GetPDBResidueInfo()
        chainId = res.GetChainId()
        name = res.GetName()
        resName = res.GetResidueName()
        resn = res.GetResidueNumber()
        if chainId not in mem:
            mem[chainId] = set()
        if resn not in mem[chainId]:
            mem[chainId].add(resn)
        newresn = len(mem[chainId])
        mi = Chem.AtomPDBResidueInfo()
        mi.SetName(name)
        mi.SetChainId(chainMap[chainId])
        mi.SetResidueNumber(newresn)
        mi.SetResidueName(resName)
        mi.SetIsHeteroAtom(resName not in AA)
        atom.SetMonomerInfo(mi)
Example #2
0
 def from_rdkit(cls, mol, resname="UNL", resnumber=1, chain=""):
     """Creates a Molecule from an RDKit molecule
     
     While directly instantiating a molecule with ``prolif.Molecule(mol)``
     would also work, this method insures that every atom is linked to an
     AtomPDBResidueInfo which is required by ProLIF
     
     Parameters
     ----------
     mol : rdkit.Chem.rdchem.Mol
         The input RDKit molecule
     resname : str
         The default residue name that is used if none was found
     resnumber : int
         The default residue number that is used if none was found
     chain : str
         The default chain Id that is used if none was found
     
     Notes
     -----
     This method only checks for an existing AtomPDBResidueInfo in the first
     atom. If none was found, it will patch all atoms with the one created
     from the method's arguments (resname, resnumber, chain).
     """
     if mol.GetAtomWithIdx(0).GetMonomerInfo():
         return cls(mol)
     mol = copy.deepcopy(mol)
     for atom in mol.GetAtoms():
         mi = Chem.AtomPDBResidueInfo(f" {atom.GetSymbol():<3.3}",
                                      residueName=resname,
                                      residueNumber=resnumber,
                                      chainId=chain)
         atom.SetMonomerInfo(mi)
     return cls(mol)
 def _fix_atom_names(self):
     elemental = defaultdict(int)
     seen = []
     for i in range(self.mol.GetNumAtoms()):
         atom = self.mol.GetAtomWithIdx(i)
         el = atom.GetSymbol()
         if el == '*':
             el = 'CONN'
         elemental[el] += 1  # compatible mol_to_params.py
         info = atom.GetPDBResidueInfo()
         lamename = el + str(elemental[el])
         while lamename in seen:
             elemental[el] += 1
             lamename = el + str(elemental[el])
         if info is None:
             atom.SetMonomerInfo(Chem.AtomPDBResidueInfo(atomName=lamename,
                                                         serialNumber=i,
                                                         residueName=self.NAME,
                                                         isHeteroAtom=True))
             seen.append(lamename)
         else:
             if info.GetName() in seen:
                 warn(f'Name clash {info.GetName()}, second one now called {lamename}')
                 info.SetName(lamename)
                 seen.append(lamename)
             else:
                 seen.append(info.GetName())
Example #4
0
def write_rd_pdb(mol_ff, rd_mol, residue_name, output_file):

    counter = 0
    for atom in rd_mol.GetAtoms():
        mi = Chem.AtomPDBResidueInfo()
        mi.SetName(mol_ff.atoms[counter].name)
        # the rdkit PDB residue name has incorrect whitespace
        mi.SetResidueName(''.ljust(4 - len(mol_ff.atoms[counter].name)) +
                          residue_name)
        mi.SetResidueNumber(1)
        mi.SetIsHeteroAtom(False)
        atom.SetMonomerInfo(mi)

        counter += 1

    Chem.MolToPDBFile(rd_mol, output_file, flavor=2)

    # CONECT records break leap
    pdb_data = []
    with open(output_file, 'r') as f:
        for line in f:
            if line.split()[0] == 'ATOM' or line.split()[0] == 'HETATM':
                pdb_data.append(line)

    with open(output_file, 'w') as f:
        for line in pdb_data:
            f.write(line)
Example #5
0
def toRDKITmol(mol, protidx, sanitize=True, removeHs=False):
    # Taken from rdkit/Code/GraphMol/FileParsers/PDBParser.cpp
    conformer = Chem.Conformer(len(protidx))
    conformer.Set3D(True)
    conformer.SetId(0)
    rdmol = Chem.RWMol()
    atomlist = []
    for ii, i in enumerate(protidx):
        a = Chem.Atom(mol.element[i])
        a.SetFormalCharge(int(mol.charge[i]))
        info = Chem.AtomPDBResidueInfo(atomName=mol.name[i],
                                       serialNumber=int(mol.serial[i]),
                                       altLoc=mol.altloc[i],
                                       residueName=mol.resname[i],
                                       residueNumber=int(mol.resid[i]),
                                       chainId=mol.chain[i],
                                       insertionCode=mol.insertion[i],
                                       occupancy=float(mol.occupancy[i]),
                                       tempFactor=float(mol.beta[i]),
                                       isHeteroAtom=mol.record[i] == 'HETATM')
        a.SetMonomerInfo(info)

        rdmol.AddAtom(a)
        atomlist.append(a)
        coor = [float(c) for c in mol.coords[i, :, mol.frame]]
        conformer.SetAtomPosition(ii, Point3D(coor[0], coor[1],
                                              coor[2]))  # Correct the atom idx
    rdmol.AddConformer(conformer)

    # Here I diverge from the C++ parser because you cannot instantiate Chem.Bond objects in python
    # I also don't take into account double/triple bonds etc since I don't think we actually store them in Molecule
    for b in mol._getBonds():
        if b[0] in protidx and b[1] in protidx:
            bond = rdmol.GetBondBetweenAtoms(int(b[0]), int(b[1]))
            if bond is None:
                rdmol.AddBond(int(np.where(protidx == b[0])[0]),
                              int(np.where(protidx == b[1])[0]),
                              Chem.BondType.SINGLE)

    # Proximitybonds I already did by using _getBonds which calls _guessBonds
    # TODO: Set PDB double bonds

    # Calculate explicit valence of atoms
    for a in atomlist:
        pass

    if sanitize:
        if removeHs:
            Chem.RemoveHs(rdmol)
        else:
            Chem.SanitizeMol(rdmol)
    else:
        rdmol.UpdatePropertyCache()

    # Set tetrahedral chirality from 3D co-ordinates
    Chem.AssignAtomChiralTagsFromStructure(rdmol)
    StandardPDBResidueChirality(rdmol)

    return rdmol
Example #6
0
def get_pdbinfo(atom):
    info = atom.GetPDBResidueInfo()

    #TODO: should I copy the aton and/or info?

    if info is None:
        info = Chem.AtomPDBResidueInfo()
    return info
Example #7
0
 def _atom_info_helper(self, atom):
     """returns an rdkit AtomInfo object for a given atom
     """
     return Chem.AtomPDBResidueInfo(
         atomName="TODO",
         residueName=atom.GetPDBResidueInfo().GetResidueName(),
         residueNumber=atom.GetPDBResidueInfo().GetResidueNumber(),
         chainId=atom.GetPDBResidueInfo().GetChainId())
Example #8
0
def create_atom(number, name, aromatic=False):
    name = name.strip()
    atm = Chem.Atom(number)
    atm.SetIsAromatic(aromatic)
    atm.SetMonomerInfo(Chem.AtomPDBResidueInfo())
    atm.GetPDBResidueInfo().SetName(
        " {: <3s}".format(name))  #means padding with the space character
    atm.GetPDBResidueInfo().SetOccupancy(0)
    atm.GetPDBResidueInfo().SetTempFactor(0)
    return atm
Example #9
0
def copy_pdbinfo(info):
    new_info = Chem.AtomPDBResidueInfo()
    for attr in ("Name", "SerialNumber", "AltLoc", "ResidueName",
                 "ResidueNumber", "ChainId", "InsertionCode", "Occupancy",
                 "TempFactor", "IsHeteroAtom", "SecondaryStructure",
                 "SegmentNumber"):
        old = getattr(info, f"Get{attr}")()
        getattr(new_info, f"Set{attr}")(old)
    # great
    return new_info
Example #10
0
 def test_from_atom_empty(self, name, number, chain):
     atom = Chem.Atom(1)
     mi = Chem.AtomPDBResidueInfo()
     if name is not None:
         mi.SetResidueName(name)
     if number is not None:
         mi.SetResidueNumber(number)
     if chain is not None:
         mi.SetChainId(chain)
     atom.SetMonomerInfo(mi)
     resid = ResidueId.from_atom(atom)
     assert resid == ResidueId()
Example #11
0
 def test_from_atom(self, name, number, chain):
     atom = Chem.Atom(1)
     mi = Chem.AtomPDBResidueInfo()
     if name:
         mi.SetResidueName(name)
     if number:
         mi.SetResidueNumber(number)
     if chain:
         mi.SetChainId(chain)
     atom.SetMonomerInfo(mi)
     resid = ResidueId.from_atom(atom)
     assert resid.name == name
     assert resid.number == number
     assert resid.chain == chain
 def _parse_w_virtuals(self):
     """
     Add 1 or 2 vanadium (virtual) atoms, 0.1A away
     :return:
     """
     target_atom_count = 3  # this is fixed due to icoor. But the code works for more if add_icoor part is corrected.
     nonvirtuals = self.mol.GetNumAtoms()
     virtuals = target_atom_count - nonvirtuals
     if virtuals <= 0:
         raise ValueError('Human called this when there are 3+ atoms.')
     elif virtuals == target_atom_count:
         raise ValueError('There are no atoms')
     # 1 or more virtuals
     anchor = sorted(self.mol.GetAtoms(),
                     key=lambda atom: atom.GetAtomicNum(),
                     reverse=True)[0]
     anchor_idx = anchor.GetIdx()  # either 0 or 1
     mol = Chem.RWMol(self.mol)
     for i in range(virtuals):
         virtual = Chem.Atom('V')
         virtual.SetDoubleProp('_GasteigerCharge', 0.0)
         virtual.SetProp('_rType', 'VIRT')
         mol.AddAtom(virtual)
         virtual_idx = mol.GetNumAtoms() - 1
         virtual = mol.GetAtomWithIdx(
             virtual_idx)  # the PDBResidue info does not get set?
         mol.AddBond(anchor_idx, virtual_idx, Chem.BondType.ZERO)
         virtual.SetMonomerInfo(
             Chem.AtomPDBResidueInfo(atomName=f'V{i+1}',
                                     serialNumber=virtual_idx,
                                     residueName=self.NAME,
                                     isHeteroAtom=not self.is_aminoacid()))
     # get the coords off the original (without vanadium "virtual" atoms)
     coordMap = {
         i: mol.GetConformer().GetAtomPosition(i)
         for i in range(self.mol.GetNumAtoms())
     }
     AllChem.EmbedMolecule(mol, coordMap=coordMap)
     for i in range(virtuals):
         AllChem.SetBondLength(mol.GetConformer(), anchor_idx,
                               i + nonvirtuals, 0.1)
     self.mol = mol.GetMol()
     self._undescribed = deque(self.mol.GetAtoms())
     self.ordered_atoms = []
     atoms = mol.GetAtoms()
     self._add_icoor([atoms[0], atoms[0], atoms[1], atoms[2]])
     self._add_icoor([atoms[1], atoms[0], atoms[1], atoms[2]])
     self._add_icoor([atoms[2], atoms[1], atoms[0], atoms[2]])
     self.ordered_atoms = self.mol.GetAtoms()
Example #13
0
    def __init__(self, smiles):
        fragment = Chem.MolFromSmiles(smiles)
        # self.fragment(Chem.MolFromPDBChem.MolToPDBBlock(self.fragment))

        for idx, atm in enumerate(fragment.GetAtoms()):
            name = "{}{}".format(atm.GetSymbol(), idx)
            atm.SetMonomerInfo(Chem.AtomPDBResidueInfo())
            atm.GetPDBResidueInfo().SetName(" {: <3s}".format(name))
            atm.GetPDBResidueInfo().SetIsHeteroAtom(False)
            atm.GetPDBResidueInfo().SetResidueNumber(1)
            atm.GetPDBResidueInfo().SetResidueName("UNK")
            atm.GetPDBResidueInfo().SetOccupancy(0)
            atm.GetPDBResidueInfo().SetTempFactor(0)
        # print(Chem.MolToPDBBlock(self.fragment))
        super(Fragment, self).__init__(fragment)
Example #14
0
 def _set_PDBInfo_atomname(self, atom, name, overwrite=False):
     info = atom.GetPDBResidueInfo()
     if info is None:
         isHeteroAtom = not self.is_aminoacid()
         atom.SetMonomerInfo(Chem.AtomPDBResidueInfo(atomName=self.pad_name(name),
                                                     serialNumber=atom.GetIdx(),
                                                     residueName=self.NAME,
                                                     isHeteroAtom=isHeteroAtom))
         return name
     elif info.GetName() == name:
         return name
     elif overwrite:
         info.SetName(name)
         return name
     else:
         return info.GetName()
Example #15
0
 def test_from_atom(self, name, number, chain):
     atom = Chem.Atom(1)
     mi = Chem.AtomPDBResidueInfo()
     if name is not None:
         mi.SetResidueName(name)
     if number is not None:
         mi.SetResidueNumber(number)
     if chain is not None:
         mi.SetChainId(chain)
     atom.SetMonomerInfo(mi)
     resid = ResidueId.from_atom(atom)
     name = name or "UNK"
     number = number or 0
     chain = chain or None
     assert resid.name == name
     assert resid.number == number
     assert resid.chain == chain
Example #16
0
    def add_hetatm(self,
                   symbol,
                   coords=(0, 0, 0),
                   atom_name="D1",
                   res_name="LG1",
                   res_numb=1):
        """Add hetatm into structure

        Args:
            symbol (str): Any chemical element
            coords (tuple, optional): 3D coordinate of added atom
            atom_name (str, optional): PDB name of added atom
            res_name (str, optional): PDB residue name of added atom
            res_numb (int, optional): PDB residue number of added atom

        Returns:
            str: PDB <residue name>/<atom name> of added atom
        """
        # prepare pdb information
        pdb_info = Chem.AtomPDBResidueInfo()
        pdb_info.SetName(atom_name)
        pdb_info.SetResidueName(res_name)
        pdb_info.SetResidueNumber(res_numb)
        pdb_info.SetIsHeteroAtom(True)

        # add pdb information into atom
        atom = Chem.rdchem.Atom(symbol)
        atom.SetMonomerInfo(pdb_info)

        # add atom to structure
        structure = Chem.rdchem.EditableMol(self.structure)
        idx = structure.AddAtom(atom)
        self.structure = structure.GetMol()
        self.structure.GetConformer(0).SetAtomPosition(idx, coords)

        a = self.structure.GetAtomWithIdx(idx)
        pdb_info = a.GetPDBResidueInfo()
        atom_name = pdb_info.GetName().strip()
        resi_name = pdb_info.GetResidueName().strip()
        key = f"{resi_name} {atom_name}"

        self.update_atom_cache()

        return key
Example #17
0
def write_pdb_file(mol, mol_amber, output_file, resi):

    counter = 0
    for atom in mol.GetAtoms():
        mi = Chem.AtomPDBResidueInfo()
        mi.SetName(mol_amber[counter].name)
        mi.SetResidueName(''.ljust(4 - len(mol_amber[counter].name)) + resi)
        mi.SetResidueNumber(1)
        mi.SetIsHeteroAtom(False)
        atom.SetMonomerInfo(mi)

        counter += 1

    Chem.MolToPDBFile(mol, output_file, flavor=2)

    # CONECT records break leap
    # a cleaner way would be to take the new pdb file
    # and just write the first mol.GetAtoms() lines
    os.system('sed -i -e \'/CONECT/d\' %s' % (output_file))
Example #18
0
def AddMissingAtoms(protein, residue, amap, template):
    """Add missing atoms to protein molecule only at the residue according to
    template.

    Parameters
    ----------
        protein: rdkit.Chem.rdchem.RWMol
            Mol with whole protein. Note that it is modified in place.
        residue:
            Mol with residue only
        amap: list
            List mapping atom IDs in residue to atom IDs in whole protein
            (amap[i] = j means that i'th atom in residue corresponds to j'th
            atom in protein)
        template:
            Residue template
    Returns
    -------
        protein: rdkit.Chem.rdchem.RWMol
            Modified protein
        visited_bonds: list
            Bonds that match the template
        is_complete: bool
            Indicates whether all atoms in template were found in residue
    """
    # TODO: try to better guess the types of atoms (if possible)

    # Catch residues which have less than 4 atoms (i.e. cannot have complete
    # backbone), and template has more atoms than that, or residues with
    # many missing atoms, which lead to low number of bonds (less than 3)
    if ((len(amap) < 4 or residue.GetNumBonds() < 3)
            and template.GetNumAtoms() > 4):
        raise AddAtomsError('Residue has too few atoms (%i) to properly embed '
                            'residue conformer.' % len(amap))

    # we need the match anyway and ConstrainedEmbed does not outputs it
    matched_atoms = template.GetSubstructMatch(residue)
    if matched_atoms:  # instead of catching ValueError
        try:
            fixed_residue = ConstrainedEmbed(template, residue)
        except ValueError:
            raise AddAtomsError('Could not embed residue')
    else:
        residue2 = SimplifyMol(Chem.Mol(residue))
        template2 = SimplifyMol(Chem.Mol(template))
        matched_atoms = template2.GetSubstructMatch(residue2)
        if matched_atoms:
            try:
                fixed_residue = ConstrainedEmbed(template2, residue2)
            except ValueError:
                raise AddAtomsError('Could not embed residue')
            # copy coordinates to molecule with appropriate bond orders
            fixed_residue2 = Chem.Mol(template)
            fixed_residue2.RemoveAllConformers()
            fixed_residue2.AddConformer(fixed_residue.GetConformer(-1))
            fixed_residue = fixed_residue2
        else:
            raise SubstructureMatchError(
                'No matching found at missing atom stage.',
                template.GetProp('_Name'), Chem.MolToSmiles(template),
                Chem.MolToSmiles(residue))

    new_atoms = []
    new_amap = []

    info = residue.GetAtomWithIdx(0).GetPDBResidueInfo()
    protein_conformer = protein.GetConformer()
    fixed_conformer = fixed_residue.GetConformer()

    for i in range(fixed_residue.GetNumAtoms()):
        if i not in matched_atoms:
            atom = fixed_residue.GetAtomWithIdx(i)
            # we need to generate atom names like 'H123', these are
            # "wrapped around" below when setting 'atomName' to '3H12'
            atom_symbol = atom.GetSymbol()
            name = (atom_symbol + str(i)[:4 - len(atom_symbol)]).ljust(4)
            new_info = Chem.AtomPDBResidueInfo(
                atomName=name[-1:] + name[:-1],  # wrap around
                residueName=info.GetResidueName(),
                residueNumber=info.GetResidueNumber(),
                chainId=info.GetChainId(),
                insertionCode=info.GetInsertionCode(),
                isHeteroAtom=info.GetIsHeteroAtom())

            atom.SetMonomerInfo(new_info)
            new_id = protein.AddAtom(atom)
            new_atoms.append(new_id)
            pos = fixed_conformer.GetAtomPosition(i)
            protein_conformer.SetAtomPosition(new_id, pos)
            new_amap.append(new_id)
        else:
            new_amap.append(amap[matched_atoms.index(i)])

    # add bonds in separate loop (we need all atoms added before that)
    for i in range(fixed_residue.GetNumAtoms()):
        if i not in matched_atoms:
            atom = fixed_residue.GetAtomWithIdx(i)
            for n in atom.GetNeighbors():
                ni = n.GetIdx()
                bond = fixed_residue.GetBondBetweenAtoms(i, ni)
                # for multiple missing atoms we may hit bonds multiple times
                new_bond = protein.GetBondBetweenAtoms(new_amap[i],
                                                       new_amap[ni])
                if new_bond is None:
                    protein.AddBond(new_amap[i], new_amap[ni])
                    new_bond = protein.GetBondBetweenAtoms(
                        new_amap[i], new_amap[ni])
                    new_bond.SetBondType(bond.GetBondType())

    # if there are no new atoms raise an exception and dont go further
    if len(new_atoms) == 0:
        raise AddAtomsError

    backbone_definitions = [
        # Phosphodiester Bond
        {
            'smarts': Chem.MolFromSmiles('O=P(O)OCC1OC(CC1O)'),
            'atom_types': {
                0: 'OP1',
                1: 'P',
                2: 'OP2',
                3: 'O5\'',
                4: 'C5\'',
                5: 'C4\'',
                9: 'C3\'',
                10: 'O3\''
            },
            'bond_pair': ('O3\'', 'P')
        },
        # Peptide Bond
        {
            'smarts': Chem.MolFromSmiles('C(=O)CN'),
            'atom_types': {
                0: 'C',
                1: 'O',
                2: 'CA',
                3: 'N'
            },
            'bond_pair': ('C', 'N')
        },
    ]
    info = residue.GetAtomWithIdx(0).GetPDBResidueInfo()
    res_num = info.GetResidueNumber()
    res_chain = info.GetChainId()

    for bond_def in backbone_definitions:
        backbone_match = fixed_residue.GetSubstructMatch(bond_def['smarts'])
        if backbone_match:
            for i in new_atoms:
                if new_amap.index(i) in backbone_match:
                    atom = protein.GetAtomWithIdx(i)
                    match_idx = backbone_match.index(new_amap.index(i))
                    if match_idx not in bond_def['atom_types']:
                        # if atom type is not defined we can skip that atom
                        continue

                    # Set atom label if present in backbone definition
                    match_type = bond_def['atom_types'][match_idx]
                    atom.GetPDBResidueInfo().SetName(' ' + match_type.ljust(3))

                    # define upstream and downstream bonds
                    bonds = zip([
                        bond_def['bond_pair'],
                        reversed(bond_def['bond_pair'])
                    ], [1, -1])
                    for (a1, a2), diff in bonds:
                        if match_type == a1:
                            limit = max(-1, protein.GetNumAtoms() * diff)
                            for j in range(amap[0], limit, diff):
                                info = (protein.GetAtomWithIdx(
                                    j).GetPDBResidueInfo())
                                res2_num = info.GetResidueNumber()
                                res2_chain = info.GetChainId()
                                if (res2_num == res_num + diff
                                        and res_chain == res2_chain):
                                    if info.GetName().strip() == a2:
                                        protein.AddBond(
                                            i, j, Chem.BondType.SINGLE)
                                        break
                                elif (abs(res2_num - res_num) > 1
                                      or res_chain != res2_chain):
                                    break

    # run minimization just for this residue
    protein = UFFConstrainedOptimize(protein, moving_atoms=new_atoms)

    # run PreparePDBResidue to fix atom properies
    out = PreparePDBResidue(protein, fixed_residue, new_amap, template)
    return out + (new_atoms, )
Example #19
0
def jsontomols(text, strict=True):
    from collections import defaultdict

    obj = json.loads(text)
    if obj['moljson-header']['version'] != 10:
        raise ValueError('bad version %s' % obj['header']['version'])
    nm = obj['moljson-header']['name']
    if 'atomDefaults' in obj:
        atomDefaults = obj['atomDefaults']
    else:
        atomDefaults = {}
    if 'bondDefaults' in obj:
        bondDefaults = obj['bondDefaults']
    else:
        bondDefaults = {}
    mols = []
    for mobj in obj['molecules']:
        m = Chem.RWMol()
        nm = mobj.get("name", "")
        m.SetProp('_Name', nm)
        # ---------------------------------
        #      Atoms
        for entry in mobj['atoms']:
            atm = Chem.Atom(entry.get('Z', atomDefaults.get('Z', 6)))
            atm.SetNoImplicit(True)
            atm.SetNumExplicitHs(
                entry.get('impHs', atomDefaults.get('impHs', 0)))
            atm.SetFormalCharge(entry.get('chg', atomDefaults.get('chg', 0)))
            tags = {
                'unspecified': Chem.ChiralType.CHI_UNSPECIFIED,
                'ccw': Chem.ChiralType.CHI_TETRAHEDRAL_CCW,
                'cw': Chem.ChiralType.CHI_TETRAHEDRAL_CW,
                'other': Chem.ChiralType.CHI_OTHER
            }
            atm.SetChiralTag(tags[entry.get(
                'stereo', atomDefaults.get('stereo', 'unspecified'))])
            atm.SetNumRadicalElectrons(
                entry.get('nRad', atomDefaults.get('nRad', 0)))
            m.AddAtom(atm)
        # ---------------------------------
        #      Atom Properties
        for entry in mobj.get('atomProperties', []):
            if entry["type"] == "partialcharges":
                if entry["method"] == "rdkit-gasteiger":
                    pnm = "_GasteigerCharge"
                else:
                    pnm = "_partialcharge"
                for i, v in enumerate(entry['values']):
                    m.GetAtomWithIdx(i).SetDoubleProp(pnm, v)

        # ---------------------------------
        #      Bonds
        # at the moment we can't set bond stereo directly because all atoms need to be there, so hold
        # that info for a bit
        bondStereos = {}
        for entry in mobj['bonds']:
            bos = {
                1: Chem.BondType.SINGLE,
                2: Chem.BondType.DOUBLE,
                3: Chem.BondType.TRIPLE
            }
            bo = bos[entry.get('bo',
                               bondDefaults.get('bo', Chem.BondType.SINGLE))]
            nbs = m.AddBond(entry['atoms'][0], entry['atoms'][1], bo)
            bnd = m.GetBondWithIdx(nbs - 1)
            tags = {
                'cis': Chem.BondStereo.STEREOCIS,
                'trans': Chem.BondStereo.STEREOTRANS,
                'either': Chem.BondStereo.STEREOANY,
                'unspecified': Chem.BondStereo.STEREONONE
            }
            stereo = tags[entry.get('stereo',
                                    bondDefaults.get('stereo', 'unspecified'))]
            if 'stereoAtoms' in entry:
                bondStereos[bnd.GetIdx()] = (entry['stereoAtoms'], stereo)
            elif stereo in (Chem.BondStereo.STEREOCIS,
                            Chem.BondStereo.STEREOTRANS):
                raise ValueError(
                    "bond stereo set, but stereoatoms not provided")
        for idx, (vs, stereo) in bondStereos.items():
            bnd = m.GetBondWithIdx(idx)
            bnd.SetStereoAtoms(vs[0], vs[1])
            bnd.SetStereo(stereo)

        # ---------------------------------
        #      Conformers
        for entry in mobj.get('conformers', []):
            conf = Chem.Conformer(m.GetNumAtoms())
            dim = entry.get('dim', 3)
            if dim == 3:
                conf.Set3D(True)
            else:
                conf.Set3D(False)
            for i in range(m.GetNumAtoms()):
                coord = entry['coords'][i]
                if dim != 3:
                    coord.append(0.)
                conf.SetAtomPosition(
                    i, Chem.rdGeometry.Point3D(coord[0], coord[1], coord[2]))
            m.AddConformer(conf, assignId=True)

        # ---------------------------------
        #      Properties
        props = mobj.get("molProperties", {})
        for p in props:
            v = props[p]
            if type(v) == float:
                m.SetDoubleProp(p, v)
            elif type(v) == int:
                m.SetIntProp(p, v)
            else:
                m.SetProp(p, str(v))

        # ---------------------------------
        #      Residue information
        chainLookup = defaultdict(str)
        for chain in mobj.get("chains", []):
            cnm = chain["name"]
            for residue in chain["residues"]:
                if residue in chainLookup:
                    raise ValueError(
                        "residue %d appears more than once in chain definitions"
                        % residue)
                chainLookup[residue] = cnm
        for residue in mobj.get("residues", []):
            idx = residue['idx']
            chain = chainLookup[idx]
            num = residue['num']
            rnm = residue['name']
            hets = residue.get('containsHetatms', False)
            for aidx, anm, snum in zip(residue['atoms'], residue['atomNames'],
                                       residue['serialNumbers']):
                at = m.GetAtomWithIdx(aidx)
                if at.GetPDBResidueInfo():
                    raise ValueError("atom %d appears in multiple residues" %
                                     aidx)
                at.SetMonomerInfo(
                    Chem.AtomPDBResidueInfo(anm,
                                            residueName=rnm,
                                            serialNumber=snum,
                                            residueNumber=num,
                                            chainId=chain,
                                            isHeteroAtom=hets))

        # ---------------------------------
        #      representation
        for entry in mobj.get('representations'):
            if entry['toolkit'] == 'RDKit':
                if entry['format_version'] != 1:
                    raise ValueError("bad format_version %s" %
                                     entry['format_version'])
                aromAtoms = entry.get('aromaticAtoms', [])
                for idx in aromAtoms:
                    m.GetAtomWithIdx(idx).SetIsAromatic(True)
                aromBonds = entry.get('aromaticBonds', [])
                for idx in aromBonds:
                    bnd = m.GetBondWithIdx(idx)
                    bnd.SetIsAromatic(True)
                    bnd.SetBondType(Chem.BondType.AROMATIC)
                if hasattr(Chem.RingInfo, 'AddRing'):  #<- needed to be added
                    atomRings = entry.get('atomRings', [])
                    for ring in atomRings:
                        ringBonds = []
                        alist = ring + [ring[0]]
                        for i in range(len(ring)):
                            ringBonds.append(
                                m.GetBondBetweenAtoms(alist[i],
                                                      alist[i + 1]).GetIdx())
                        m.GetRingInfo().AddRing(ring, ringBonds)
                else:
                    Chem.GetSymmSSSR(m)
                for i, x in enumerate(entry.get('cipRanks', [])):
                    m.GetAtomWithIdx(i).SetProp('_CIPRank', str(x))
                for i, x in entry.get('cipCodes', []):
                    m.GetAtomWithIdx(i).SetProp('_CIPCode', x)
                    #m.GetAtomWithIdx(i).SetIntProp('_ChiralityPossible',1)
                break
        m.UpdatePropertyCache(strict=strict)
        m.SetIntProp("_StereochemDone", 1)
        mols.append(m)
    return mols
Example #20
0
def set_PDBresidueInfo(rdkit_atom, pdb_info):
    atom_pdb_residue_info = Chem.AtomPDBResidueInfo()
    for item, value in pdb_info.items():
        set_function = 'Set' + item
        atom_pdb_residue_info.__getattribute__(set_function)(value)
    rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)
Example #21
0
def openmmTop_to_rdmol(topology, positions, verbose = False):
    """
    This function converts an OpenMM topology into a RDMol
    Parameters:
    -----------
    topology : OpenMM Topology
        The OpenMM topology
    positions : OpenMM Quantity
        The molecule atom positions associated with the
        topology
    Return:
    -------
    rdmol : RDMol
        The generated RDKit molecule
    """
    rdmol = Chem.RWMol()

    # Mapping dictionary between openmm atoms and rdk atoms
    openmm_atom_to_rdk_atom = {}

    # Python set used to identify atoms that are not in protein residues
    keep = set(proteinResidues).union(dnaResidues).union(rnaResidues)

    #TODO charge info is not transferred
    for chain in topology.chains():
        chainId = str(chain.id)
        for res in chain.residues():
            resName, resNum= res.name, int(res.index)
            for openmm_at in res.atoms():
                rdatom = Chem.Atom(openmm_at.element._atomic_number)
                info = Chem.AtomPDBResidueInfo()
                info.SetName(openmm_at.name)
                info.SetChainId(chainId)
                info.SetResidueNumber(resNum)
                info.SetResidueName(resName)

                rdatom.SetMonomerInfo(info)

                if resName not in keep:
                    rdatom.SetIsHeteroAtom()

                rdmol.AddAtom(rdatom)
                openmm_atom_to_rdk_atom[openmm_at] = rdmol.GetNumAtoms() - 1

    if topology.getNumAtoms() != rdmol.GetNumAtoms():
        raise ValueError("OpenMM topology and RDMol number of atoms mismatching: "
                             "OpenMM = {} vs RDMol  = {}".format(topology.getNumAtoms(), rdmol.GetNumAtoms()))
    # Count the number of bonds in the openmm topology
    omm_bond_count = 0

    # Create the bonds
    _bondtypes = {0: Chem.BondType.UNSPECIFIED,
              1: Chem.BondType.SINGLE,
              1.5: Chem.BondType.AROMATIC,
              2: Chem.BondType.DOUBLE,
              3: Chem.BondType.TRIPLE,
              4: Chem.BondType.QUADRUPLE,
              5: Chem.BondType.QUINTUPLE,
              6: Chem.BondType.HEXTUPLE,
              7: Chem.BondType.ONEANDAHALF,}

    for omm_bond in topology.bonds():

        omm_bond_count += 1

        at0 = omm_bond[0]
        at1 = omm_bond[1]

        rd_atom0, rd_atom1 = openmm_atom_to_rdk_atom[at0], openmm_atom_to_rdk_atom[at1]

        if omm_bond.type == "Aromatic":
            #CESHI assumed by setting bond aromatic the two atoms are aromatic
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[1.5])
        elif omm_bond.type == "Single":
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[1])
        elif omm_bond.type == "Double":
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[2])
        elif omm_bond.type == "Triple":
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[3])
        elif omm_bond.type == "Amide":
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[int(omm_bond.order)])
        else:
            rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[0])

    if topology.getNumAtoms() != rdmol.GetNumAtoms():
        raise ValueError("OpenMM topology and RDMol number of bonds mismatching: "
                             "OpenMM = {} vs RDMol  = {}".format(omm_bond_count, rdmol.GetNumBonds()))

    pos = positions.in_units_of(unit.angstrom) / unit.angstrom
    conformer = Chem.Conformer()

    for idx,coord in enumerate(pos):
        # x,y,z = [i._value for i in coord]
        x,y,z = [i for i in coord]
        conformer.SetAtomPosition(idx, Geometry.Point3D(x,y,z))

    rdmol.AddConformer(conformer)

    rdmol.UpdatePropertyCache(strict=False)
    Chem.GetSSSR(rdmol)

    return rdmol.GetMol()