def split_chains(mol, distance_threshold=1.75*u.angstrom): """ Split a molecule's chains into unbroken biopolymers and groups of non-polymers This function is non-destructive - the passed molecule will not be modified. Specifically, this function will: - Split any chain with non-contiguous biopolymeric pieces into single, contiguous polymers - Remove any solvent molecules from a chain into their own chain - Isolate ligands from each chain into their own chains Args: mol (mdt.Molecule): Input molecule distance_threshold (u.Scalar[length]): if not ``None``, the maximum distance between adjacent residues for which we consider them "contiguous". For PDB data, values greater than 1.4 Angstrom are eminently reasonable; the default threshold of 1.75 Angstrom is purposefully set to be extremely cautious (and still much lower than the distance to the *next* nearest neighbor, generally around 2.5 Angstrom) Returns: mdt.Molecule: molecule with separated chains """ tempmol = mol.copy() def bonded(r1, r2): if r2 not in r1.bonded_residues: return False if distance_threshold is not None and r1.distance(r2) > distance_threshold: return False return True def addto(chain, res): res.chain = None chain.add(res) allchains = [mdt.Chain(tempmol.chains[0].name)] for chain in tempmol.chains: chaintype = chain.residues[0].type solventchain = mdt.Chain(None) ligandchain = mdt.Chain(None) for ires, residue in enumerate(chain.residues): if residue.type == 'unknown': thischain = ligandchain elif residue.type in ('water', 'solvent', 'ion'): thischain = solventchain else: assert residue.type == chaintype if ires != 0 and not bonded(residue.prev_residue, residue): allchains.append(mdt.Chain(None)) thischain = allchains[-1] addto(thischain, residue) for c in (solventchain, ligandchain): if c.num_atoms > 0: allchains.append(c) return mdt.Molecule(allchains)
def restore_topology(mol, topo): """ Restores chain IDs and residue indices (these are stripped by some methods) Args: mol (mdt.Molecule): molecule to restore topology to topo (mdt.Molecule): reference topology Returns: mdt.Molecule: a copy of ``mol`` with a restored topology """ import moldesign as mdt assert mol.num_residues == topo.num_residues assert mol.num_chains == 1 chain_map = {} for chain in topo.chains: chain_map[chain] = mdt.Chain(name=chain.name) for res, refres in zip(mol.residues, topo.residues): if refres.resname != res.resname: print(( 'INFO: Residue #{res.index} residue code changed from "{refres.resname}"' ' to "{res.resname}".').format(res=res, refres=refres)) res.pdbindex = refres.pdbindex res.name = refres.name res.chain = chain_map[refres.chain] return mdt.Molecule(mol.atoms)
def biopy_to_mol(struc): """Convert a biopython PDB structure to an MDT molecule. Because Biopython doesn't assign bonds, assign connectivity using templates. Args: struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert Returns: moldesign.Molecule: converted molecule """ # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance newatoms = [] for chain in struc.get_chains(): tmp, pdbidx, pdbid = chain.get_full_id() newchain = mdt.Chain(pdbname=pdbid.strip()) for residue in chain.get_residues(): newresidue = mdt.Residue(pdbname=residue.resname.strip(), pdbindex=residue.id[1]) newchain.add(newresidue) for atom in residue.get_atom(): newatom = mdt.Atom(element=atom.element, name=atom.get_name(), pdbname=atom.get_name(), pdbindex=atom.get_serial_number()) newatom.position = atom.coord * u.angstrom newresidue.add(newatom) newatoms.append(newatom) return mdt.Molecule(newatoms, name=struc.get_full_id()[0])
def finish_job(job): mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False) if mol.num_chains == 1: assert mol.num_residues % 2 == 0 oldchain = mol.chains[0] oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A' newchain = mdt.Chain('B') for residue in mol.residues[mol.num_residues//2:]: residue.chain = newchain mol = mdt.Molecule(mol) mdt.helpers.assign_biopolymer_bonds(mol) mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence) return mol
def parmed_to_mdt(pmdmol): """ Convert parmed Structure to MDT Structure Args: pmdmol (parmed.Structure): parmed structure to convert Returns: mdt.Molecule: converted molecule """ atoms = collections.OrderedDict() residues = {} chains = {} masses = [pa.mass for pa in pmdmol.atoms] * u.dalton positions = [[pa.xx, pa.xy, pa.xz] for pa in pmdmol.atoms] * u.angstrom for iatom, patm in enumerate(pmdmol.atoms): if patm.residue.chain not in chains: chains[patm.residue.chain] = mdt.Chain(pdbname=patm.residue.chain) chain = chains[patm.residue.chain] if patm.residue not in residues: residues[patm.residue] = mdt.Residue(resname=patm.residue.name, pdbindex=patm.residue.number) residues[patm.residue].chain = chain chain.add(residues[patm.residue]) residue = residues[patm.residue] atom = mdt.Atom(name=patm.name, atnum=patm.atomic_number, pdbindex=patm.number, mass=masses[iatom]) atom.position = positions[iatom] atom.residue = residue residue.add(atom) assert patm not in atoms atoms[patm] = atom for pbnd in pmdmol.bonds: atoms[pbnd.atom1].bond_to(atoms[pbnd.atom2], int(pbnd.order)) mol = mdt.Molecule(list(atoms.values()), metadata=_get_pdb_metadata(pmdmol)) return mol
def _reassign_chains(f, mol): """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments) If the required fields don't exist, a copy of the molecule is returned unchanged. Args: f (file): mmcif file/stream mol (moldesign.Molecule): molecule with default parmed assignemnts Returns: moldesign.Molecule: new molecule with reassigned chains """ data = mdt.interfaces.biopython_interface.get_mmcif_data(f) f.seek(0) try: poly_seq_ids = _aslist(data['_pdbx_poly_seq_scheme.asym_id']) nonpoly_ids = _aslist(data['_pdbx_nonpoly_scheme.asym_id']) except KeyError: return mol.copy(name=mol.name) newchain_names = set(poly_seq_ids + nonpoly_ids) newchains = {name: mdt.Chain(name) for name in newchain_names} residue_iterator = itertools.chain( zip(_aslist(data['_pdbx_poly_seq_scheme.mon_id']), _aslist(data['_pdbx_poly_seq_scheme.pdb_seq_num']), _aslist(data['_pdbx_poly_seq_scheme.pdb_strand_id']), _aslist(data['_pdbx_poly_seq_scheme.asym_id'])), zip(_aslist(data['_pdbx_nonpoly_scheme.mon_id']), _aslist(data['_pdbx_nonpoly_scheme.pdb_seq_num']), _aslist(data['_pdbx_nonpoly_scheme.pdb_strand_id']), _aslist(data['_pdbx_nonpoly_scheme.asym_id']))) reschains = {(rname, ridx, rchain): newchains[chainid] for rname, ridx, rchain, chainid in residue_iterator} for residue in mol.residues: newchain = reschains[residue.resname, str(residue.pdbindex), residue.chain.name] residue.chain = newchain return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata)
def biopy_to_mol(struc): """Convert a biopython PDB structure to an MDT molecule. Note: Biopython doesn't deal with bond data, so no bonds will be present in the Molecule Args: struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert Returns: moldesign.Molecule: converted molecule """ # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance newatoms = [] backup_chain_names = list(string.ascii_uppercase) for chain in struc.get_chains(): tmp, pdbidx, pdbid = chain.get_full_id() if not pdbid.strip(): pdbid = backup_chain_names.pop() newchain = mdt.Chain(pdbname=pdbid.strip()) for residue in chain.get_residues(): newresidue = mdt.Residue(pdbname=residue.resname.strip(), pdbindex=residue.id[1]) newchain.add(newresidue) for atom in residue.get_atom(): elem = atom.element if len(elem) == 2: elem = elem[0] + elem[1].lower() newatom = mdt.Atom(element=elem, name=atom.get_name(), pdbname=atom.get_name(), pdbindex=atom.get_serial_number()) newatom.position = atom.coord * u.angstrom newresidue.add(newatom) newatoms.append(newatom) return mdt.Molecule(newatoms, name=struc.get_full_id()[0])
def _reassign_chains(f, mol): """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments) Args: f (file): mmcif file/stream mol (moldesign.Molecule): molecule with default parmed assignemnts Returns: moldesign.Molecule: new molecule with reassigned chains """ data = mdt.interfaces.biopython_interface.get_mmcif_data(f) f.seek(0) newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id'] + data['_pdbx_nonpoly_scheme.asym_id']) newchains = {name: mdt.Chain(name) for name in newchain_names} residue_iterator = itertools.chain( zip(data['_pdbx_poly_seq_scheme.mon_id'], data['_pdbx_poly_seq_scheme.pdb_seq_num'], data['_pdbx_poly_seq_scheme.pdb_strand_id'], data['_pdbx_poly_seq_scheme.asym_id']), zip(data['_pdbx_nonpoly_scheme.mon_id'], data['_pdbx_nonpoly_scheme.pdb_seq_num'], data['_pdbx_nonpoly_scheme.pdb_strand_id'], data['_pdbx_nonpoly_scheme.asym_id'])) reschains = {(rname, ridx, rchain): newchains[chainid] for rname, ridx, rchain, chainid in residue_iterator} for residue in mol.residues: newchain = reschains[residue.resname, str(residue.pdbindex), residue.chain.name] for atom in residue.atoms: atom.chain = newchain residue.chain = newchain return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata)
def pybel_to_mol(pbmol, reorder_atoms_by_residue=False, primary_structure=True, **kwargs): """ Translate a pybel molecule object into a moldesign object. Note: The focus is on translating topology and biomolecular structure - we don't translate any metadata. Args: pbmol (pybel.Molecule): molecule to translate reorder_atoms_by_residue (bool): change atom order so that all atoms in a residue are stored contiguously primary_structure (bool): translate primary structure data as well as atomic data **kwargs (dict): keyword arguments to moldesign.Molecule __init__ method Returns: moldesign.Molecule: translated molecule """ newatom_map = {} newresidues = {} newchains = {} newatoms = mdt.AtomList([]) backup_chain_names = list(string.ascii_uppercase) for pybatom in pbmol.atoms: obres = pybatom.OBAtom.GetResidue() name = obres.GetAtomID(pybatom.OBAtom).strip() if pybatom.atomicnum == 67: print(( "WARNING: openbabel parsed atom serial %d (name:%s) as Holmium; " "correcting to hydrogen. ") % (pybatom.OBAtom.GetIdx(), name)) atnum = 1 elif pybatom.atomicnum == 0: print( "WARNING: openbabel failed to parse atom serial %d (name:%s); guessing %s. " % (pybatom.OBAtom.GetIdx(), name, name[0])) atnum = mdt.data.ATOMIC_NUMBERS[name[0]] else: atnum = pybatom.atomicnum mdtatom = mdt.Atom(atnum=atnum, name=name, formal_charge=pybatom.formalcharge * u.q_e, pdbname=name, pdbindex=pybatom.OBAtom.GetIdx()) newatom_map[pybatom.OBAtom.GetIdx()] = mdtatom mdtatom.position = pybatom.coords * u.angstrom if primary_structure: obres = pybatom.OBAtom.GetResidue() resname = obres.GetName() residx = obres.GetIdx() chain_id = obres.GetChain() chain_id_num = obres.GetChainNum() if chain_id_num not in newchains: # create new chain if not mdt.utils.is_printable( chain_id.strip()) or not chain_id.strip(): chain_id = backup_chain_names.pop() print( 'WARNING: assigned name %s to unnamed chain object @ %s' % (chain_id, hex(chain_id_num))) chn = mdt.Chain(pdbname=str(chain_id)) newchains[chain_id_num] = chn else: chn = newchains[chain_id_num] if residx not in newresidues: # Create new residue pdb_idx = obres.GetNum() res = mdt.Residue(pdbname=resname, pdbindex=pdb_idx) newresidues[residx] = res chn.add(res) res.chain = chn else: res = newresidues[residx] res.add(mdtatom) newatoms.append(mdtatom) for ibond in range(pbmol.OBMol.NumBonds()): obbond = pbmol.OBMol.GetBond(ibond) a1 = newatom_map[obbond.GetBeginAtomIdx()] a2 = newatom_map[obbond.GetEndAtomIdx()] order = obbond.GetBondOrder() bond = mdt.Bond(a1, a2) bond.order = order if reorder_atoms_by_residue and primary_structure: resorder = {} for atom in newatoms: resorder.setdefault(atom.residue, len(resorder)) newatoms.sort(key=lambda a: resorder[a.residue]) return mdt.Molecule(newatoms, **kwargs)
def topology_to_mol(topo, name=None, positions=None, velocities=None, assign_bond_orders=True): """ Convert an OpenMM topology object into an MDT molecule. Args: topo (simtk.openmm.app.topology.Topology): topology to convert name (str): name to assign to molecule positions (list): simtk list of atomic positions velocities (list): simtk list of atomic velocities assign_bond_orders (bool): assign bond orders from templates (simtk topologies do not store bond orders) """ from simtk import unit as stku # Atoms atommap = {} newatoms = [] masses = u.amu * [ atom.element.mass.value_in_unit(stku.amu) for atom in topo.atoms() ] for atom, mass in zip(topo.atoms(), masses): newatom = mdt.Atom(atnum=atom.element.atomic_number, name=atom.name, mass=mass) atommap[atom] = newatom newatoms.append(newatom) # Coordinates if positions is not None: poslist = np.array( [p.value_in_unit(stku.nanometer) for p in positions]) * u.nm poslist.ito(u.default.length) for newatom, position in zip(newatoms, poslist): newatom.position = position if velocities is not None: velolist = np.array([ v.value_in_unit(stku.nanometer / stku.femtosecond) for v in velocities ]) * u.nm / u.fs velolist = u.default.convert(velolist) for newatom, velocity in zip(newatoms, velolist): newatom.momentum = newatom.mass * simtk2pint(velocity) # Biounits chains = {} for chain in topo.chains(): if chain.id not in chains: chains[chain.id] = mdt.Chain(name=chain.id, index=chain.index) newchain = chains[chain.id] for residue in chain.residues(): newresidue = mdt.Residue(name='%s%d' % (residue.name, residue.index), chain=newchain, pdbindex=int(residue.id), pdbname=residue.name) newchain.add(newresidue) for atom in residue.atoms(): newatom = atommap[atom] newatom.residue = newresidue newresidue.add(newatom) # Bonds bonds = {} for bond in topo.bonds(): a1, a2 = bond na1, na2 = atommap[a1], atommap[a2] if na1 not in bonds: bonds[na1] = {} if na2 not in bonds: bonds[na2] = {} b = mdt.Bond(na1, na2) b.order = 1 if name is None: name = 'Unnamed molecule from OpenMM' newmol = mdt.Molecule(newatoms, name=name) if assign_bond_orders: for residue in newmol.residues: try: residue.assign_template_bonds() except (KeyError, ValueError): pass return newmol