def renumberResidues(mol): mem = {} CHAINS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' chains = [] for atom in mol.GetAtoms(): res = atom.GetPDBResidueInfo() chainId = res.GetChainId() if chainId not in chains: chains.append(chainId) chainMap = {} for i, chain in enumerate(chains): chainMap[chain] = CHAINS[i] for atom in mol.GetAtoms(): res = atom.GetPDBResidueInfo() chainId = res.GetChainId() name = res.GetName() resName = res.GetResidueName() resn = res.GetResidueNumber() if chainId not in mem: mem[chainId] = set() if resn not in mem[chainId]: mem[chainId].add(resn) newresn = len(mem[chainId]) mi = Chem.AtomPDBResidueInfo() mi.SetName(name) mi.SetChainId(chainMap[chainId]) mi.SetResidueNumber(newresn) mi.SetResidueName(resName) mi.SetIsHeteroAtom(resName not in AA) atom.SetMonomerInfo(mi)
def from_rdkit(cls, mol, resname="UNL", resnumber=1, chain=""): """Creates a Molecule from an RDKit molecule While directly instantiating a molecule with ``prolif.Molecule(mol)`` would also work, this method insures that every atom is linked to an AtomPDBResidueInfo which is required by ProLIF Parameters ---------- mol : rdkit.Chem.rdchem.Mol The input RDKit molecule resname : str The default residue name that is used if none was found resnumber : int The default residue number that is used if none was found chain : str The default chain Id that is used if none was found Notes ----- This method only checks for an existing AtomPDBResidueInfo in the first atom. If none was found, it will patch all atoms with the one created from the method's arguments (resname, resnumber, chain). """ if mol.GetAtomWithIdx(0).GetMonomerInfo(): return cls(mol) mol = copy.deepcopy(mol) for atom in mol.GetAtoms(): mi = Chem.AtomPDBResidueInfo(f" {atom.GetSymbol():<3.3}", residueName=resname, residueNumber=resnumber, chainId=chain) atom.SetMonomerInfo(mi) return cls(mol)
def _fix_atom_names(self): elemental = defaultdict(int) seen = [] for i in range(self.mol.GetNumAtoms()): atom = self.mol.GetAtomWithIdx(i) el = atom.GetSymbol() if el == '*': el = 'CONN' elemental[el] += 1 # compatible mol_to_params.py info = atom.GetPDBResidueInfo() lamename = el + str(elemental[el]) while lamename in seen: elemental[el] += 1 lamename = el + str(elemental[el]) if info is None: atom.SetMonomerInfo(Chem.AtomPDBResidueInfo(atomName=lamename, serialNumber=i, residueName=self.NAME, isHeteroAtom=True)) seen.append(lamename) else: if info.GetName() in seen: warn(f'Name clash {info.GetName()}, second one now called {lamename}') info.SetName(lamename) seen.append(lamename) else: seen.append(info.GetName())
def write_rd_pdb(mol_ff, rd_mol, residue_name, output_file): counter = 0 for atom in rd_mol.GetAtoms(): mi = Chem.AtomPDBResidueInfo() mi.SetName(mol_ff.atoms[counter].name) # the rdkit PDB residue name has incorrect whitespace mi.SetResidueName(''.ljust(4 - len(mol_ff.atoms[counter].name)) + residue_name) mi.SetResidueNumber(1) mi.SetIsHeteroAtom(False) atom.SetMonomerInfo(mi) counter += 1 Chem.MolToPDBFile(rd_mol, output_file, flavor=2) # CONECT records break leap pdb_data = [] with open(output_file, 'r') as f: for line in f: if line.split()[0] == 'ATOM' or line.split()[0] == 'HETATM': pdb_data.append(line) with open(output_file, 'w') as f: for line in pdb_data: f.write(line)
def toRDKITmol(mol, protidx, sanitize=True, removeHs=False): # Taken from rdkit/Code/GraphMol/FileParsers/PDBParser.cpp conformer = Chem.Conformer(len(protidx)) conformer.Set3D(True) conformer.SetId(0) rdmol = Chem.RWMol() atomlist = [] for ii, i in enumerate(protidx): a = Chem.Atom(mol.element[i]) a.SetFormalCharge(int(mol.charge[i])) info = Chem.AtomPDBResidueInfo(atomName=mol.name[i], serialNumber=int(mol.serial[i]), altLoc=mol.altloc[i], residueName=mol.resname[i], residueNumber=int(mol.resid[i]), chainId=mol.chain[i], insertionCode=mol.insertion[i], occupancy=float(mol.occupancy[i]), tempFactor=float(mol.beta[i]), isHeteroAtom=mol.record[i] == 'HETATM') a.SetMonomerInfo(info) rdmol.AddAtom(a) atomlist.append(a) coor = [float(c) for c in mol.coords[i, :, mol.frame]] conformer.SetAtomPosition(ii, Point3D(coor[0], coor[1], coor[2])) # Correct the atom idx rdmol.AddConformer(conformer) # Here I diverge from the C++ parser because you cannot instantiate Chem.Bond objects in python # I also don't take into account double/triple bonds etc since I don't think we actually store them in Molecule for b in mol._getBonds(): if b[0] in protidx and b[1] in protidx: bond = rdmol.GetBondBetweenAtoms(int(b[0]), int(b[1])) if bond is None: rdmol.AddBond(int(np.where(protidx == b[0])[0]), int(np.where(protidx == b[1])[0]), Chem.BondType.SINGLE) # Proximitybonds I already did by using _getBonds which calls _guessBonds # TODO: Set PDB double bonds # Calculate explicit valence of atoms for a in atomlist: pass if sanitize: if removeHs: Chem.RemoveHs(rdmol) else: Chem.SanitizeMol(rdmol) else: rdmol.UpdatePropertyCache() # Set tetrahedral chirality from 3D co-ordinates Chem.AssignAtomChiralTagsFromStructure(rdmol) StandardPDBResidueChirality(rdmol) return rdmol
def get_pdbinfo(atom): info = atom.GetPDBResidueInfo() #TODO: should I copy the aton and/or info? if info is None: info = Chem.AtomPDBResidueInfo() return info
def _atom_info_helper(self, atom): """returns an rdkit AtomInfo object for a given atom """ return Chem.AtomPDBResidueInfo( atomName="TODO", residueName=atom.GetPDBResidueInfo().GetResidueName(), residueNumber=atom.GetPDBResidueInfo().GetResidueNumber(), chainId=atom.GetPDBResidueInfo().GetChainId())
def create_atom(number, name, aromatic=False): name = name.strip() atm = Chem.Atom(number) atm.SetIsAromatic(aromatic) atm.SetMonomerInfo(Chem.AtomPDBResidueInfo()) atm.GetPDBResidueInfo().SetName( " {: <3s}".format(name)) #means padding with the space character atm.GetPDBResidueInfo().SetOccupancy(0) atm.GetPDBResidueInfo().SetTempFactor(0) return atm
def copy_pdbinfo(info): new_info = Chem.AtomPDBResidueInfo() for attr in ("Name", "SerialNumber", "AltLoc", "ResidueName", "ResidueNumber", "ChainId", "InsertionCode", "Occupancy", "TempFactor", "IsHeteroAtom", "SecondaryStructure", "SegmentNumber"): old = getattr(info, f"Get{attr}")() getattr(new_info, f"Set{attr}")(old) # great return new_info
def test_from_atom_empty(self, name, number, chain): atom = Chem.Atom(1) mi = Chem.AtomPDBResidueInfo() if name is not None: mi.SetResidueName(name) if number is not None: mi.SetResidueNumber(number) if chain is not None: mi.SetChainId(chain) atom.SetMonomerInfo(mi) resid = ResidueId.from_atom(atom) assert resid == ResidueId()
def test_from_atom(self, name, number, chain): atom = Chem.Atom(1) mi = Chem.AtomPDBResidueInfo() if name: mi.SetResidueName(name) if number: mi.SetResidueNumber(number) if chain: mi.SetChainId(chain) atom.SetMonomerInfo(mi) resid = ResidueId.from_atom(atom) assert resid.name == name assert resid.number == number assert resid.chain == chain
def _parse_w_virtuals(self): """ Add 1 or 2 vanadium (virtual) atoms, 0.1A away :return: """ target_atom_count = 3 # this is fixed due to icoor. But the code works for more if add_icoor part is corrected. nonvirtuals = self.mol.GetNumAtoms() virtuals = target_atom_count - nonvirtuals if virtuals <= 0: raise ValueError('Human called this when there are 3+ atoms.') elif virtuals == target_atom_count: raise ValueError('There are no atoms') # 1 or more virtuals anchor = sorted(self.mol.GetAtoms(), key=lambda atom: atom.GetAtomicNum(), reverse=True)[0] anchor_idx = anchor.GetIdx() # either 0 or 1 mol = Chem.RWMol(self.mol) for i in range(virtuals): virtual = Chem.Atom('V') virtual.SetDoubleProp('_GasteigerCharge', 0.0) virtual.SetProp('_rType', 'VIRT') mol.AddAtom(virtual) virtual_idx = mol.GetNumAtoms() - 1 virtual = mol.GetAtomWithIdx( virtual_idx) # the PDBResidue info does not get set? mol.AddBond(anchor_idx, virtual_idx, Chem.BondType.ZERO) virtual.SetMonomerInfo( Chem.AtomPDBResidueInfo(atomName=f'V{i+1}', serialNumber=virtual_idx, residueName=self.NAME, isHeteroAtom=not self.is_aminoacid())) # get the coords off the original (without vanadium "virtual" atoms) coordMap = { i: mol.GetConformer().GetAtomPosition(i) for i in range(self.mol.GetNumAtoms()) } AllChem.EmbedMolecule(mol, coordMap=coordMap) for i in range(virtuals): AllChem.SetBondLength(mol.GetConformer(), anchor_idx, i + nonvirtuals, 0.1) self.mol = mol.GetMol() self._undescribed = deque(self.mol.GetAtoms()) self.ordered_atoms = [] atoms = mol.GetAtoms() self._add_icoor([atoms[0], atoms[0], atoms[1], atoms[2]]) self._add_icoor([atoms[1], atoms[0], atoms[1], atoms[2]]) self._add_icoor([atoms[2], atoms[1], atoms[0], atoms[2]]) self.ordered_atoms = self.mol.GetAtoms()
def __init__(self, smiles): fragment = Chem.MolFromSmiles(smiles) # self.fragment(Chem.MolFromPDBChem.MolToPDBBlock(self.fragment)) for idx, atm in enumerate(fragment.GetAtoms()): name = "{}{}".format(atm.GetSymbol(), idx) atm.SetMonomerInfo(Chem.AtomPDBResidueInfo()) atm.GetPDBResidueInfo().SetName(" {: <3s}".format(name)) atm.GetPDBResidueInfo().SetIsHeteroAtom(False) atm.GetPDBResidueInfo().SetResidueNumber(1) atm.GetPDBResidueInfo().SetResidueName("UNK") atm.GetPDBResidueInfo().SetOccupancy(0) atm.GetPDBResidueInfo().SetTempFactor(0) # print(Chem.MolToPDBBlock(self.fragment)) super(Fragment, self).__init__(fragment)
def _set_PDBInfo_atomname(self, atom, name, overwrite=False): info = atom.GetPDBResidueInfo() if info is None: isHeteroAtom = not self.is_aminoacid() atom.SetMonomerInfo(Chem.AtomPDBResidueInfo(atomName=self.pad_name(name), serialNumber=atom.GetIdx(), residueName=self.NAME, isHeteroAtom=isHeteroAtom)) return name elif info.GetName() == name: return name elif overwrite: info.SetName(name) return name else: return info.GetName()
def test_from_atom(self, name, number, chain): atom = Chem.Atom(1) mi = Chem.AtomPDBResidueInfo() if name is not None: mi.SetResidueName(name) if number is not None: mi.SetResidueNumber(number) if chain is not None: mi.SetChainId(chain) atom.SetMonomerInfo(mi) resid = ResidueId.from_atom(atom) name = name or "UNK" number = number or 0 chain = chain or None assert resid.name == name assert resid.number == number assert resid.chain == chain
def add_hetatm(self, symbol, coords=(0, 0, 0), atom_name="D1", res_name="LG1", res_numb=1): """Add hetatm into structure Args: symbol (str): Any chemical element coords (tuple, optional): 3D coordinate of added atom atom_name (str, optional): PDB name of added atom res_name (str, optional): PDB residue name of added atom res_numb (int, optional): PDB residue number of added atom Returns: str: PDB <residue name>/<atom name> of added atom """ # prepare pdb information pdb_info = Chem.AtomPDBResidueInfo() pdb_info.SetName(atom_name) pdb_info.SetResidueName(res_name) pdb_info.SetResidueNumber(res_numb) pdb_info.SetIsHeteroAtom(True) # add pdb information into atom atom = Chem.rdchem.Atom(symbol) atom.SetMonomerInfo(pdb_info) # add atom to structure structure = Chem.rdchem.EditableMol(self.structure) idx = structure.AddAtom(atom) self.structure = structure.GetMol() self.structure.GetConformer(0).SetAtomPosition(idx, coords) a = self.structure.GetAtomWithIdx(idx) pdb_info = a.GetPDBResidueInfo() atom_name = pdb_info.GetName().strip() resi_name = pdb_info.GetResidueName().strip() key = f"{resi_name} {atom_name}" self.update_atom_cache() return key
def write_pdb_file(mol, mol_amber, output_file, resi): counter = 0 for atom in mol.GetAtoms(): mi = Chem.AtomPDBResidueInfo() mi.SetName(mol_amber[counter].name) mi.SetResidueName(''.ljust(4 - len(mol_amber[counter].name)) + resi) mi.SetResidueNumber(1) mi.SetIsHeteroAtom(False) atom.SetMonomerInfo(mi) counter += 1 Chem.MolToPDBFile(mol, output_file, flavor=2) # CONECT records break leap # a cleaner way would be to take the new pdb file # and just write the first mol.GetAtoms() lines os.system('sed -i -e \'/CONECT/d\' %s' % (output_file))
def AddMissingAtoms(protein, residue, amap, template): """Add missing atoms to protein molecule only at the residue according to template. Parameters ---------- protein: rdkit.Chem.rdchem.RWMol Mol with whole protein. Note that it is modified in place. residue: Mol with residue only amap: list List mapping atom IDs in residue to atom IDs in whole protein (amap[i] = j means that i'th atom in residue corresponds to j'th atom in protein) template: Residue template Returns ------- protein: rdkit.Chem.rdchem.RWMol Modified protein visited_bonds: list Bonds that match the template is_complete: bool Indicates whether all atoms in template were found in residue """ # TODO: try to better guess the types of atoms (if possible) # Catch residues which have less than 4 atoms (i.e. cannot have complete # backbone), and template has more atoms than that, or residues with # many missing atoms, which lead to low number of bonds (less than 3) if ((len(amap) < 4 or residue.GetNumBonds() < 3) and template.GetNumAtoms() > 4): raise AddAtomsError('Residue has too few atoms (%i) to properly embed ' 'residue conformer.' % len(amap)) # we need the match anyway and ConstrainedEmbed does not outputs it matched_atoms = template.GetSubstructMatch(residue) if matched_atoms: # instead of catching ValueError try: fixed_residue = ConstrainedEmbed(template, residue) except ValueError: raise AddAtomsError('Could not embed residue') else: residue2 = SimplifyMol(Chem.Mol(residue)) template2 = SimplifyMol(Chem.Mol(template)) matched_atoms = template2.GetSubstructMatch(residue2) if matched_atoms: try: fixed_residue = ConstrainedEmbed(template2, residue2) except ValueError: raise AddAtomsError('Could not embed residue') # copy coordinates to molecule with appropriate bond orders fixed_residue2 = Chem.Mol(template) fixed_residue2.RemoveAllConformers() fixed_residue2.AddConformer(fixed_residue.GetConformer(-1)) fixed_residue = fixed_residue2 else: raise SubstructureMatchError( 'No matching found at missing atom stage.', template.GetProp('_Name'), Chem.MolToSmiles(template), Chem.MolToSmiles(residue)) new_atoms = [] new_amap = [] info = residue.GetAtomWithIdx(0).GetPDBResidueInfo() protein_conformer = protein.GetConformer() fixed_conformer = fixed_residue.GetConformer() for i in range(fixed_residue.GetNumAtoms()): if i not in matched_atoms: atom = fixed_residue.GetAtomWithIdx(i) # we need to generate atom names like 'H123', these are # "wrapped around" below when setting 'atomName' to '3H12' atom_symbol = atom.GetSymbol() name = (atom_symbol + str(i)[:4 - len(atom_symbol)]).ljust(4) new_info = Chem.AtomPDBResidueInfo( atomName=name[-1:] + name[:-1], # wrap around residueName=info.GetResidueName(), residueNumber=info.GetResidueNumber(), chainId=info.GetChainId(), insertionCode=info.GetInsertionCode(), isHeteroAtom=info.GetIsHeteroAtom()) atom.SetMonomerInfo(new_info) new_id = protein.AddAtom(atom) new_atoms.append(new_id) pos = fixed_conformer.GetAtomPosition(i) protein_conformer.SetAtomPosition(new_id, pos) new_amap.append(new_id) else: new_amap.append(amap[matched_atoms.index(i)]) # add bonds in separate loop (we need all atoms added before that) for i in range(fixed_residue.GetNumAtoms()): if i not in matched_atoms: atom = fixed_residue.GetAtomWithIdx(i) for n in atom.GetNeighbors(): ni = n.GetIdx() bond = fixed_residue.GetBondBetweenAtoms(i, ni) # for multiple missing atoms we may hit bonds multiple times new_bond = protein.GetBondBetweenAtoms(new_amap[i], new_amap[ni]) if new_bond is None: protein.AddBond(new_amap[i], new_amap[ni]) new_bond = protein.GetBondBetweenAtoms( new_amap[i], new_amap[ni]) new_bond.SetBondType(bond.GetBondType()) # if there are no new atoms raise an exception and dont go further if len(new_atoms) == 0: raise AddAtomsError backbone_definitions = [ # Phosphodiester Bond { 'smarts': Chem.MolFromSmiles('O=P(O)OCC1OC(CC1O)'), 'atom_types': { 0: 'OP1', 1: 'P', 2: 'OP2', 3: 'O5\'', 4: 'C5\'', 5: 'C4\'', 9: 'C3\'', 10: 'O3\'' }, 'bond_pair': ('O3\'', 'P') }, # Peptide Bond { 'smarts': Chem.MolFromSmiles('C(=O)CN'), 'atom_types': { 0: 'C', 1: 'O', 2: 'CA', 3: 'N' }, 'bond_pair': ('C', 'N') }, ] info = residue.GetAtomWithIdx(0).GetPDBResidueInfo() res_num = info.GetResidueNumber() res_chain = info.GetChainId() for bond_def in backbone_definitions: backbone_match = fixed_residue.GetSubstructMatch(bond_def['smarts']) if backbone_match: for i in new_atoms: if new_amap.index(i) in backbone_match: atom = protein.GetAtomWithIdx(i) match_idx = backbone_match.index(new_amap.index(i)) if match_idx not in bond_def['atom_types']: # if atom type is not defined we can skip that atom continue # Set atom label if present in backbone definition match_type = bond_def['atom_types'][match_idx] atom.GetPDBResidueInfo().SetName(' ' + match_type.ljust(3)) # define upstream and downstream bonds bonds = zip([ bond_def['bond_pair'], reversed(bond_def['bond_pair']) ], [1, -1]) for (a1, a2), diff in bonds: if match_type == a1: limit = max(-1, protein.GetNumAtoms() * diff) for j in range(amap[0], limit, diff): info = (protein.GetAtomWithIdx( j).GetPDBResidueInfo()) res2_num = info.GetResidueNumber() res2_chain = info.GetChainId() if (res2_num == res_num + diff and res_chain == res2_chain): if info.GetName().strip() == a2: protein.AddBond( i, j, Chem.BondType.SINGLE) break elif (abs(res2_num - res_num) > 1 or res_chain != res2_chain): break # run minimization just for this residue protein = UFFConstrainedOptimize(protein, moving_atoms=new_atoms) # run PreparePDBResidue to fix atom properies out = PreparePDBResidue(protein, fixed_residue, new_amap, template) return out + (new_atoms, )
def jsontomols(text, strict=True): from collections import defaultdict obj = json.loads(text) if obj['moljson-header']['version'] != 10: raise ValueError('bad version %s' % obj['header']['version']) nm = obj['moljson-header']['name'] if 'atomDefaults' in obj: atomDefaults = obj['atomDefaults'] else: atomDefaults = {} if 'bondDefaults' in obj: bondDefaults = obj['bondDefaults'] else: bondDefaults = {} mols = [] for mobj in obj['molecules']: m = Chem.RWMol() nm = mobj.get("name", "") m.SetProp('_Name', nm) # --------------------------------- # Atoms for entry in mobj['atoms']: atm = Chem.Atom(entry.get('Z', atomDefaults.get('Z', 6))) atm.SetNoImplicit(True) atm.SetNumExplicitHs( entry.get('impHs', atomDefaults.get('impHs', 0))) atm.SetFormalCharge(entry.get('chg', atomDefaults.get('chg', 0))) tags = { 'unspecified': Chem.ChiralType.CHI_UNSPECIFIED, 'ccw': Chem.ChiralType.CHI_TETRAHEDRAL_CCW, 'cw': Chem.ChiralType.CHI_TETRAHEDRAL_CW, 'other': Chem.ChiralType.CHI_OTHER } atm.SetChiralTag(tags[entry.get( 'stereo', atomDefaults.get('stereo', 'unspecified'))]) atm.SetNumRadicalElectrons( entry.get('nRad', atomDefaults.get('nRad', 0))) m.AddAtom(atm) # --------------------------------- # Atom Properties for entry in mobj.get('atomProperties', []): if entry["type"] == "partialcharges": if entry["method"] == "rdkit-gasteiger": pnm = "_GasteigerCharge" else: pnm = "_partialcharge" for i, v in enumerate(entry['values']): m.GetAtomWithIdx(i).SetDoubleProp(pnm, v) # --------------------------------- # Bonds # at the moment we can't set bond stereo directly because all atoms need to be there, so hold # that info for a bit bondStereos = {} for entry in mobj['bonds']: bos = { 1: Chem.BondType.SINGLE, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE } bo = bos[entry.get('bo', bondDefaults.get('bo', Chem.BondType.SINGLE))] nbs = m.AddBond(entry['atoms'][0], entry['atoms'][1], bo) bnd = m.GetBondWithIdx(nbs - 1) tags = { 'cis': Chem.BondStereo.STEREOCIS, 'trans': Chem.BondStereo.STEREOTRANS, 'either': Chem.BondStereo.STEREOANY, 'unspecified': Chem.BondStereo.STEREONONE } stereo = tags[entry.get('stereo', bondDefaults.get('stereo', 'unspecified'))] if 'stereoAtoms' in entry: bondStereos[bnd.GetIdx()] = (entry['stereoAtoms'], stereo) elif stereo in (Chem.BondStereo.STEREOCIS, Chem.BondStereo.STEREOTRANS): raise ValueError( "bond stereo set, but stereoatoms not provided") for idx, (vs, stereo) in bondStereos.items(): bnd = m.GetBondWithIdx(idx) bnd.SetStereoAtoms(vs[0], vs[1]) bnd.SetStereo(stereo) # --------------------------------- # Conformers for entry in mobj.get('conformers', []): conf = Chem.Conformer(m.GetNumAtoms()) dim = entry.get('dim', 3) if dim == 3: conf.Set3D(True) else: conf.Set3D(False) for i in range(m.GetNumAtoms()): coord = entry['coords'][i] if dim != 3: coord.append(0.) conf.SetAtomPosition( i, Chem.rdGeometry.Point3D(coord[0], coord[1], coord[2])) m.AddConformer(conf, assignId=True) # --------------------------------- # Properties props = mobj.get("molProperties", {}) for p in props: v = props[p] if type(v) == float: m.SetDoubleProp(p, v) elif type(v) == int: m.SetIntProp(p, v) else: m.SetProp(p, str(v)) # --------------------------------- # Residue information chainLookup = defaultdict(str) for chain in mobj.get("chains", []): cnm = chain["name"] for residue in chain["residues"]: if residue in chainLookup: raise ValueError( "residue %d appears more than once in chain definitions" % residue) chainLookup[residue] = cnm for residue in mobj.get("residues", []): idx = residue['idx'] chain = chainLookup[idx] num = residue['num'] rnm = residue['name'] hets = residue.get('containsHetatms', False) for aidx, anm, snum in zip(residue['atoms'], residue['atomNames'], residue['serialNumbers']): at = m.GetAtomWithIdx(aidx) if at.GetPDBResidueInfo(): raise ValueError("atom %d appears in multiple residues" % aidx) at.SetMonomerInfo( Chem.AtomPDBResidueInfo(anm, residueName=rnm, serialNumber=snum, residueNumber=num, chainId=chain, isHeteroAtom=hets)) # --------------------------------- # representation for entry in mobj.get('representations'): if entry['toolkit'] == 'RDKit': if entry['format_version'] != 1: raise ValueError("bad format_version %s" % entry['format_version']) aromAtoms = entry.get('aromaticAtoms', []) for idx in aromAtoms: m.GetAtomWithIdx(idx).SetIsAromatic(True) aromBonds = entry.get('aromaticBonds', []) for idx in aromBonds: bnd = m.GetBondWithIdx(idx) bnd.SetIsAromatic(True) bnd.SetBondType(Chem.BondType.AROMATIC) if hasattr(Chem.RingInfo, 'AddRing'): #<- needed to be added atomRings = entry.get('atomRings', []) for ring in atomRings: ringBonds = [] alist = ring + [ring[0]] for i in range(len(ring)): ringBonds.append( m.GetBondBetweenAtoms(alist[i], alist[i + 1]).GetIdx()) m.GetRingInfo().AddRing(ring, ringBonds) else: Chem.GetSymmSSSR(m) for i, x in enumerate(entry.get('cipRanks', [])): m.GetAtomWithIdx(i).SetProp('_CIPRank', str(x)) for i, x in entry.get('cipCodes', []): m.GetAtomWithIdx(i).SetProp('_CIPCode', x) #m.GetAtomWithIdx(i).SetIntProp('_ChiralityPossible',1) break m.UpdatePropertyCache(strict=strict) m.SetIntProp("_StereochemDone", 1) mols.append(m) return mols
def set_PDBresidueInfo(rdkit_atom, pdb_info): atom_pdb_residue_info = Chem.AtomPDBResidueInfo() for item, value in pdb_info.items(): set_function = 'Set' + item atom_pdb_residue_info.__getattribute__(set_function)(value) rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)
def openmmTop_to_rdmol(topology, positions, verbose = False): """ This function converts an OpenMM topology into a RDMol Parameters: ----------- topology : OpenMM Topology The OpenMM topology positions : OpenMM Quantity The molecule atom positions associated with the topology Return: ------- rdmol : RDMol The generated RDKit molecule """ rdmol = Chem.RWMol() # Mapping dictionary between openmm atoms and rdk atoms openmm_atom_to_rdk_atom = {} # Python set used to identify atoms that are not in protein residues keep = set(proteinResidues).union(dnaResidues).union(rnaResidues) #TODO charge info is not transferred for chain in topology.chains(): chainId = str(chain.id) for res in chain.residues(): resName, resNum= res.name, int(res.index) for openmm_at in res.atoms(): rdatom = Chem.Atom(openmm_at.element._atomic_number) info = Chem.AtomPDBResidueInfo() info.SetName(openmm_at.name) info.SetChainId(chainId) info.SetResidueNumber(resNum) info.SetResidueName(resName) rdatom.SetMonomerInfo(info) if resName not in keep: rdatom.SetIsHeteroAtom() rdmol.AddAtom(rdatom) openmm_atom_to_rdk_atom[openmm_at] = rdmol.GetNumAtoms() - 1 if topology.getNumAtoms() != rdmol.GetNumAtoms(): raise ValueError("OpenMM topology and RDMol number of atoms mismatching: " "OpenMM = {} vs RDMol = {}".format(topology.getNumAtoms(), rdmol.GetNumAtoms())) # Count the number of bonds in the openmm topology omm_bond_count = 0 # Create the bonds _bondtypes = {0: Chem.BondType.UNSPECIFIED, 1: Chem.BondType.SINGLE, 1.5: Chem.BondType.AROMATIC, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE, 4: Chem.BondType.QUADRUPLE, 5: Chem.BondType.QUINTUPLE, 6: Chem.BondType.HEXTUPLE, 7: Chem.BondType.ONEANDAHALF,} for omm_bond in topology.bonds(): omm_bond_count += 1 at0 = omm_bond[0] at1 = omm_bond[1] rd_atom0, rd_atom1 = openmm_atom_to_rdk_atom[at0], openmm_atom_to_rdk_atom[at1] if omm_bond.type == "Aromatic": #CESHI assumed by setting bond aromatic the two atoms are aromatic rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[1.5]) elif omm_bond.type == "Single": rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[1]) elif omm_bond.type == "Double": rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[2]) elif omm_bond.type == "Triple": rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[3]) elif omm_bond.type == "Amide": rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[int(omm_bond.order)]) else: rdmol.AddBond(rd_atom0, rd_atom1, _bondtypes[0]) if topology.getNumAtoms() != rdmol.GetNumAtoms(): raise ValueError("OpenMM topology and RDMol number of bonds mismatching: " "OpenMM = {} vs RDMol = {}".format(omm_bond_count, rdmol.GetNumBonds())) pos = positions.in_units_of(unit.angstrom) / unit.angstrom conformer = Chem.Conformer() for idx,coord in enumerate(pos): # x,y,z = [i._value for i in coord] x,y,z = [i for i in coord] conformer.SetAtomPosition(idx, Geometry.Point3D(x,y,z)) rdmol.AddConformer(conformer) rdmol.UpdatePropertyCache(strict=False) Chem.GetSSSR(rdmol) return rdmol.GetMol()