コード例 #1
0
def split_chains(mol, distance_threshold=1.75*u.angstrom):
    """ Split a molecule's chains into unbroken biopolymers and groups of non-polymers

    This function is non-destructive - the passed molecule will not be modified.

    Specifically, this function will:
       - Split any chain with non-contiguous biopolymeric pieces into single, contiguous polymers
       - Remove any solvent molecules from a chain into their own chain
       - Isolate ligands from each chain into their own chains

    Args:
        mol (mdt.Molecule): Input molecule
        distance_threshold (u.Scalar[length]): if not ``None``, the maximum distance between
           adjacent residues for which we consider them "contiguous". For PDB data, values greater
           than 1.4 Angstrom are eminently reasonable; the default threshold of 1.75 Angstrom is
           purposefully set to be extremely cautious (and still much lower than the distance to
           the *next* nearest neighbor, generally around 2.5 Angstrom)

    Returns:
        mdt.Molecule: molecule with separated chains
    """

    tempmol = mol.copy()

    def bonded(r1, r2):
        if r2 not in r1.bonded_residues:
            return False
        if distance_threshold is not None and r1.distance(r2) > distance_threshold:
            return False
        return True

    def addto(chain, res):
        res.chain = None
        chain.add(res)

    allchains = [mdt.Chain(tempmol.chains[0].name)]
    for chain in tempmol.chains:
        chaintype = chain.residues[0].type
        solventchain = mdt.Chain(None)
        ligandchain = mdt.Chain(None)

        for ires, residue in enumerate(chain.residues):
            if residue.type == 'unknown':
                thischain = ligandchain
            elif residue.type in ('water', 'solvent', 'ion'):
                thischain = solventchain
            else:
                assert residue.type == chaintype
                if ires != 0 and not bonded(residue.prev_residue, residue):
                    allchains.append(mdt.Chain(None))
                thischain = allchains[-1]

            addto(thischain, residue)

        for c in (solventchain, ligandchain):
            if c.num_atoms > 0:
                allchains.append(c)

    return mdt.Molecule(allchains)
コード例 #2
0
def restore_topology(mol, topo):
    """ Restores chain IDs and residue indices (these are stripped by some methods)

    Args:
        mol (mdt.Molecule): molecule to restore topology to
        topo (mdt.Molecule): reference topology

    Returns:
        mdt.Molecule: a copy of ``mol`` with a restored topology
    """
    import moldesign as mdt

    assert mol.num_residues == topo.num_residues
    assert mol.num_chains == 1

    chain_map = {}
    for chain in topo.chains:
        chain_map[chain] = mdt.Chain(name=chain.name)

    for res, refres in zip(mol.residues, topo.residues):
        if refres.resname != res.resname:
            print((
                'INFO: Residue #{res.index} residue code changed from "{refres.resname}"'
                ' to "{res.resname}".').format(res=res, refres=refres))
        res.pdbindex = refres.pdbindex
        res.name = refres.name
        res.chain = chain_map[refres.chain]

    return mdt.Molecule(mol.atoms)
コード例 #3
0
def biopy_to_mol(struc):
    """Convert a biopython PDB structure to an MDT molecule.
    Because Biopython doesn't assign bonds, assign connectivity using templates.

    Args:
        struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert

    Returns:
        moldesign.Molecule: converted molecule
    """
    # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance
    newatoms = []

    for chain in struc.get_chains():
        tmp, pdbidx, pdbid = chain.get_full_id()
        newchain = mdt.Chain(pdbname=pdbid.strip())

        for residue in chain.get_residues():
            newresidue = mdt.Residue(pdbname=residue.resname.strip(),
                                     pdbindex=residue.id[1])

            newchain.add(newresidue)

            for atom in residue.get_atom():
                newatom = mdt.Atom(element=atom.element,
                                   name=atom.get_name(),
                                   pdbname=atom.get_name(),
                                   pdbindex=atom.get_serial_number())
                newatom.position = atom.coord * u.angstrom
                newresidue.add(newatom)

                newatoms.append(newatom)

    return mdt.Molecule(newatoms, name=struc.get_full_id()[0])
コード例 #4
0
    def finish_job(job):
        mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False)
        if mol.num_chains == 1:
            assert mol.num_residues % 2 == 0
            oldchain = mol.chains[0]
            oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A'
            newchain = mdt.Chain('B')
            for residue in mol.residues[mol.num_residues//2:]:
                residue.chain = newchain
            mol = mdt.Molecule(mol)
        mdt.helpers.assign_biopolymer_bonds(mol)

        mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence)
        return mol
コード例 #5
0
def parmed_to_mdt(pmdmol):
    """ Convert parmed Structure to MDT Structure

    Args:
        pmdmol (parmed.Structure): parmed structure to convert

    Returns:
        mdt.Molecule: converted molecule
    """
    atoms = collections.OrderedDict()
    residues = {}
    chains = {}

    masses = [pa.mass for pa in pmdmol.atoms] * u.dalton
    positions = [[pa.xx, pa.xy, pa.xz] for pa in pmdmol.atoms] * u.angstrom

    for iatom, patm in enumerate(pmdmol.atoms):
        if patm.residue.chain not in chains:
            chains[patm.residue.chain] = mdt.Chain(pdbname=patm.residue.chain)
        chain = chains[patm.residue.chain]

        if patm.residue not in residues:
            residues[patm.residue] = mdt.Residue(resname=patm.residue.name,
                                                 pdbindex=patm.residue.number)
            residues[patm.residue].chain = chain
            chain.add(residues[patm.residue])
        residue = residues[patm.residue]

        atom = mdt.Atom(name=patm.name,
                        atnum=patm.atomic_number,
                        pdbindex=patm.number,
                        mass=masses[iatom])
        atom.position = positions[iatom]

        atom.residue = residue
        residue.add(atom)
        assert patm not in atoms
        atoms[patm] = atom

    for pbnd in pmdmol.bonds:
        atoms[pbnd.atom1].bond_to(atoms[pbnd.atom2], int(pbnd.order))

    mol = mdt.Molecule(list(atoms.values()),
                       metadata=_get_pdb_metadata(pmdmol))
    return mol
コード例 #6
0
def _reassign_chains(f, mol):
    """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments)

    If the required fields don't exist, a copy of the molecule is returned unchanged.

    Args:
        f (file): mmcif file/stream
        mol (moldesign.Molecule): molecule with default parmed assignemnts

    Returns:
        moldesign.Molecule: new molecule with reassigned chains
    """
    data = mdt.interfaces.biopython_interface.get_mmcif_data(f)
    f.seek(0)

    try:
        poly_seq_ids = _aslist(data['_pdbx_poly_seq_scheme.asym_id'])
        nonpoly_ids = _aslist(data['_pdbx_nonpoly_scheme.asym_id'])
    except KeyError:
        return mol.copy(name=mol.name)

    newchain_names = set(poly_seq_ids + nonpoly_ids)
    newchains = {name: mdt.Chain(name) for name in newchain_names}

    residue_iterator = itertools.chain(
        zip(_aslist(data['_pdbx_poly_seq_scheme.mon_id']),
            _aslist(data['_pdbx_poly_seq_scheme.pdb_seq_num']),
            _aslist(data['_pdbx_poly_seq_scheme.pdb_strand_id']),
            _aslist(data['_pdbx_poly_seq_scheme.asym_id'])),
        zip(_aslist(data['_pdbx_nonpoly_scheme.mon_id']),
            _aslist(data['_pdbx_nonpoly_scheme.pdb_seq_num']),
            _aslist(data['_pdbx_nonpoly_scheme.pdb_strand_id']),
            _aslist(data['_pdbx_nonpoly_scheme.asym_id'])))

    reschains = {(rname, ridx, rchain): newchains[chainid]
                 for rname, ridx, rchain, chainid in residue_iterator}

    for residue in mol.residues:
        newchain = reschains[residue.resname,
                             str(residue.pdbindex), residue.chain.name]

        residue.chain = newchain

    return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata)
コード例 #7
0
def biopy_to_mol(struc):
    """Convert a biopython PDB structure to an MDT molecule.

    Note:
        Biopython doesn't deal with bond data, so no bonds will be present
        in the Molecule

    Args:
        struc (Bio.PDB.Structure.Structure): Biopython PDB structure to convert

    Returns:
        moldesign.Molecule: converted molecule
    """
    # TODO: assign bonds using 1) CONECT records, 2) residue templates, 3) distance
    newatoms = []
    backup_chain_names = list(string.ascii_uppercase)

    for chain in struc.get_chains():
        tmp, pdbidx, pdbid = chain.get_full_id()
        if not pdbid.strip():
            pdbid = backup_chain_names.pop()
        newchain = mdt.Chain(pdbname=pdbid.strip())

        for residue in chain.get_residues():
            newresidue = mdt.Residue(pdbname=residue.resname.strip(),
                                     pdbindex=residue.id[1])

            newchain.add(newresidue)

            for atom in residue.get_atom():
                elem = atom.element
                if len(elem) == 2:
                    elem = elem[0] + elem[1].lower()
                newatom = mdt.Atom(element=elem,
                                   name=atom.get_name(),
                                   pdbname=atom.get_name(),
                                   pdbindex=atom.get_serial_number())
                newatom.position = atom.coord * u.angstrom
                newresidue.add(newatom)

                newatoms.append(newatom)

    return mdt.Molecule(newatoms,
                        name=struc.get_full_id()[0])
コード例 #8
0
def _reassign_chains(f, mol):
    """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments)

    Args:
        f (file): mmcif file/stream
        mol (moldesign.Molecule): molecule with default parmed assignemnts

    Returns:
        moldesign.Molecule: new molecule with reassigned chains
    """
    data = mdt.interfaces.biopython_interface.get_mmcif_data(f)
    f.seek(0)
    newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id'] +
                         data['_pdbx_nonpoly_scheme.asym_id'])
    newchains = {name: mdt.Chain(name) for name in newchain_names}

    residue_iterator = itertools.chain(
        zip(data['_pdbx_poly_seq_scheme.mon_id'],
            data['_pdbx_poly_seq_scheme.pdb_seq_num'],
            data['_pdbx_poly_seq_scheme.pdb_strand_id'],
            data['_pdbx_poly_seq_scheme.asym_id']),
        zip(data['_pdbx_nonpoly_scheme.mon_id'],
            data['_pdbx_nonpoly_scheme.pdb_seq_num'],
            data['_pdbx_nonpoly_scheme.pdb_strand_id'],
            data['_pdbx_nonpoly_scheme.asym_id']))

    reschains = {(rname, ridx, rchain): newchains[chainid]
                 for rname, ridx, rchain, chainid in residue_iterator}

    for residue in mol.residues:
        newchain = reschains[residue.resname,
                             str(residue.pdbindex), residue.chain.name]

        for atom in residue.atoms:
            atom.chain = newchain
        residue.chain = newchain

    return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata)
コード例 #9
0
def pybel_to_mol(pbmol,
                 reorder_atoms_by_residue=False,
                 primary_structure=True,
                 **kwargs):
    """ Translate a pybel molecule object into a moldesign object.

    Note:
        The focus is on translating topology and biomolecular structure - we don't translate any metadata.

    Args:
        pbmol (pybel.Molecule): molecule to translate
        reorder_atoms_by_residue (bool): change atom order so that all atoms in a residue are stored
            contiguously
        primary_structure (bool): translate primary structure data as well as atomic data
        **kwargs (dict): keyword arguments to  moldesign.Molecule __init__ method

    Returns:
        moldesign.Molecule: translated molecule
    """
    newatom_map = {}
    newresidues = {}
    newchains = {}
    newatoms = mdt.AtomList([])
    backup_chain_names = list(string.ascii_uppercase)

    for pybatom in pbmol.atoms:
        obres = pybatom.OBAtom.GetResidue()
        name = obres.GetAtomID(pybatom.OBAtom).strip()

        if pybatom.atomicnum == 67:
            print((
                "WARNING: openbabel parsed atom serial %d (name:%s) as Holmium; "
                "correcting to hydrogen. ") % (pybatom.OBAtom.GetIdx(), name))
            atnum = 1

        elif pybatom.atomicnum == 0:
            print(
                "WARNING: openbabel failed to parse atom serial %d (name:%s); guessing %s. "
                % (pybatom.OBAtom.GetIdx(), name, name[0]))
            atnum = mdt.data.ATOMIC_NUMBERS[name[0]]
        else:
            atnum = pybatom.atomicnum
        mdtatom = mdt.Atom(atnum=atnum,
                           name=name,
                           formal_charge=pybatom.formalcharge * u.q_e,
                           pdbname=name,
                           pdbindex=pybatom.OBAtom.GetIdx())
        newatom_map[pybatom.OBAtom.GetIdx()] = mdtatom
        mdtatom.position = pybatom.coords * u.angstrom

        if primary_structure:
            obres = pybatom.OBAtom.GetResidue()
            resname = obres.GetName()
            residx = obres.GetIdx()
            chain_id = obres.GetChain()
            chain_id_num = obres.GetChainNum()

            if chain_id_num not in newchains:
                # create new chain
                if not mdt.utils.is_printable(
                        chain_id.strip()) or not chain_id.strip():
                    chain_id = backup_chain_names.pop()
                    print(
                        'WARNING: assigned name %s to unnamed chain object @ %s'
                        % (chain_id, hex(chain_id_num)))
                chn = mdt.Chain(pdbname=str(chain_id))
                newchains[chain_id_num] = chn
            else:
                chn = newchains[chain_id_num]

            if residx not in newresidues:
                # Create new residue
                pdb_idx = obres.GetNum()
                res = mdt.Residue(pdbname=resname, pdbindex=pdb_idx)
                newresidues[residx] = res
                chn.add(res)
                res.chain = chn
            else:
                res = newresidues[residx]

            res.add(mdtatom)

        newatoms.append(mdtatom)

    for ibond in range(pbmol.OBMol.NumBonds()):
        obbond = pbmol.OBMol.GetBond(ibond)
        a1 = newatom_map[obbond.GetBeginAtomIdx()]
        a2 = newatom_map[obbond.GetEndAtomIdx()]
        order = obbond.GetBondOrder()
        bond = mdt.Bond(a1, a2)
        bond.order = order

    if reorder_atoms_by_residue and primary_structure:
        resorder = {}
        for atom in newatoms:
            resorder.setdefault(atom.residue, len(resorder))
        newatoms.sort(key=lambda a: resorder[a.residue])

    return mdt.Molecule(newatoms, **kwargs)
コード例 #10
0
def topology_to_mol(topo,
                    name=None,
                    positions=None,
                    velocities=None,
                    assign_bond_orders=True):
    """ Convert an OpenMM topology object into an MDT molecule.

    Args:
        topo (simtk.openmm.app.topology.Topology): topology to convert
        name (str): name to assign to molecule
        positions (list): simtk list of atomic positions
        velocities (list): simtk list of atomic velocities
        assign_bond_orders (bool): assign bond orders from templates (simtk topologies
             do not store bond orders)

    """
    from simtk import unit as stku

    # Atoms
    atommap = {}
    newatoms = []
    masses = u.amu * [
        atom.element.mass.value_in_unit(stku.amu) for atom in topo.atoms()
    ]
    for atom, mass in zip(topo.atoms(), masses):
        newatom = mdt.Atom(atnum=atom.element.atomic_number,
                           name=atom.name,
                           mass=mass)
        atommap[atom] = newatom
        newatoms.append(newatom)

    # Coordinates
    if positions is not None:
        poslist = np.array(
            [p.value_in_unit(stku.nanometer) for p in positions]) * u.nm
        poslist.ito(u.default.length)
        for newatom, position in zip(newatoms, poslist):
            newatom.position = position
    if velocities is not None:
        velolist = np.array([
            v.value_in_unit(stku.nanometer / stku.femtosecond)
            for v in velocities
        ]) * u.nm / u.fs
        velolist = u.default.convert(velolist)
        for newatom, velocity in zip(newatoms, velolist):
            newatom.momentum = newatom.mass * simtk2pint(velocity)

    # Biounits
    chains = {}
    for chain in topo.chains():
        if chain.id not in chains:
            chains[chain.id] = mdt.Chain(name=chain.id, index=chain.index)
        newchain = chains[chain.id]
        for residue in chain.residues():
            newresidue = mdt.Residue(name='%s%d' %
                                     (residue.name, residue.index),
                                     chain=newchain,
                                     pdbindex=int(residue.id),
                                     pdbname=residue.name)
            newchain.add(newresidue)
            for atom in residue.atoms():
                newatom = atommap[atom]
                newatom.residue = newresidue
                newresidue.add(newatom)

    # Bonds
    bonds = {}
    for bond in topo.bonds():
        a1, a2 = bond
        na1, na2 = atommap[a1], atommap[a2]
        if na1 not in bonds:
            bonds[na1] = {}
        if na2 not in bonds:
            bonds[na2] = {}
        b = mdt.Bond(na1, na2)
        b.order = 1

    if name is None:
        name = 'Unnamed molecule from OpenMM'

    newmol = mdt.Molecule(newatoms, name=name)

    if assign_bond_orders:
        for residue in newmol.residues:
            try:
                residue.assign_template_bonds()
            except (KeyError, ValueError):
                pass

    return newmol