예제 #1
1
def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written
        ph: the pH at which hydrogens will be determined and added
        chains_to_remove: dictionary containing pdbs with chains to remove
    Returns: nothing, but it does right PDB files

    """
    print(pdbid)

    # Download the topology from rcsb based on pdbod
    fixer = PDBFixer(pdbid=pdbid)

    # Remove chains based on hand curated .csv file
    if pdbid in chains_to_remove['pdbid']:
        chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)]
        chains_list = chains.split()
        fixer.removeChains(chainIds=chains_list)

    # Determine the first and last residue resolved in chain 0
    chains = [chain for chain in fixer.topology.chains()]
    resindices = [residue.index for residue in chains[0].residues()]
    resindices = natsorted(resindices)
    first_resindex = resindices[0]
    last_resindex = resindices[-1]

    # Find Missing residues and determine if they are C or N terminal fragments (which will be removed)

    fixer.findMissingResidues()
    if len(fixer.missingResidues) > 0:
        if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0]))

        if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1]))

    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(ph)
    # Write fixed PDB file, with all of the waters and ligands
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)

    # Remove the ligand and write a pdb file
    fixer.removeHeterogens(True)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)
    # Remove the waters and write a pdb file
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)),
                                                            'w'), keepIds=keepNumbers)
예제 #2
0
    def read_and_repair(self, path_pdb: str):
        '''
        params:
            path_pdb (str) path to structrue
        return:
            pdb (PDBFixer object)
            invalid_residues (list[residues]) residues to remove
        '''
        assert os.path.isfile(path_pdb)
        fixer = PDBFixer(filename=path_pdb)
        #fixer.removeHeterogens(keepWater=False)
        #fixer.addMissingHydrogens()
        #fixer.findNonstandardResidues()
        #fixer.replaceNonstandardResidues()

        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)

        invalid_residues = self._check_residues(fixer.topology)

        return fixer, invalid_residues
예제 #3
0
def add_missing_atoms(session, m, minimization_steps = 0, keep_waters = False):
    fname = m.filename
    from pdbfixer import PDBFixer
    pf = PDBFixer(filename = fname)
    pf.findMissingResidues()
    pf.findNonstandardResidues()
    pf.replaceNonstandardResidues()
    pf.findMissingAtoms()
    pf.addMissingAtoms()
    pf.removeHeterogens(keep_waters)
    pf.addMissingHydrogens(7.0)
    if minimization_steps > 0:
        minimize(pf, minimization_steps)
    from os.path import splitext
    fout = splitext(fname)[0] + '-pdbfixer.pdb'
    out = open(fout, 'w')
    from simtk.openmm.app import PDBFile
    PDBFile.writeFile(pf.topology, pf.positions, out)
    out.close()
    mfix = session.models.open([fout])[0]
    mfix.atoms.displays = True
    mfix.residues.ribbon_displays = False
    m.display = False
    log = session.logger
    log.info('Wrote %s' % fout)
예제 #4
0
파일: clustenm.py 프로젝트: SHZ66/ProDy
    def _fix(self, atoms):

        try:
            from pdbfixer import PDBFixer
            from openmm.app import PDBFile
        except ImportError:
            raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.')

        stream = createStringIO()
        title = atoms.getTitle()
        writePDBStream(stream, atoms)
        stream.seek(0)
        fixed = PDBFixer(pdbfile=stream)
        stream.close()

        fixed.missingResidues = {}
        fixed.findNonstandardResidues()
        fixed.replaceNonstandardResidues()
        fixed.removeHeterogens(False)
        fixed.findMissingAtoms()
        fixed.addMissingAtoms()
        fixed.addMissingHydrogens(self._ph)

        stream = createStringIO()
        PDBFile.writeFile(fixed.topology, fixed.positions,
                          stream, keepIds=True)
        stream.seek(0)
        self._atoms = parsePDBStream(stream)
        self._atoms.setTitle(title)
        stream.close()

        self._topology = fixed.topology
        self._positions = fixed.positions
예제 #5
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(pdb_file, 'w'),
                      keepIds=True)

    return pdb_file
예제 #6
0
def pdb_clean_sim(args):
    """
    Top-level function to be executed in parallel to clean and generate features.

    :param args: Input and output directories, pdb name.
    :return:
    """
    input_dir, output_dir, fname = args
    # print(input_dir, output_dir, fname)
    if not Path(output_dir + fname).exists():
        # clean PDB
        pdb = pmd.load_file(input_dir + fname)
        pdb.save('/tmp/' + fname, overwrite=True)

        fixer = PDBFixer(filename='/tmp/' + fname)
        Path('/tmp/' + fname).unlink()
        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        # print(f'number of non-standard residues in {fname}: {len(fixer.nonstandardResidues)}')
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)

        # fixer.addSolvent(fixer.topology.getUnitCellDimensions())

        # Run simulation
        try:
            forcefield = so.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
            system = forcefield.createSystem(fixer.topology, nonbondedMethod=so.app.NoCutoff)
            param = pmd.openmm.load_topology(fixer.topology, system=system, xyz=fixer.positions)

            basename = '.'.join(fname.split('.')[:-1])

            # get indices of atoms for the 2 interacting subunits
            sub_unit_chains = pdb_parser(basename)
            # print(param.to_dataframe()['chain'])
            ids0, ids1 = (np.where(param.to_dataframe()['chain'].isin(cids))[
                          0] for cids in sub_unit_chains)
            # print(sub_unit_chains,fname,ids0,ids1)

            features = generate_features(ids0, ids1, forcefield, system, param)

            print(f'done simulating: {fname}')

            # stack 3 matrices into 1
            combined_mat = np.stack((features["U_LJ"], features["U_el"], features["D_mat"]))

            np.save(output_dir + '/' + basename + '.npy', combined_mat)

            print(f'saved features: {fname}')

        except Exception as e:
            print(f'could not simulate: {fname} Exception: {e}')
            return 1, f'E;{fname};{e}'

    return 0, f'S;{fname};'
예제 #7
0
def fix_pdb(pdb_file):
    fixer = PDBFixer(filename=pdb_file)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(True)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))
예제 #8
0
 def fix_pdb(self, infile, out=None, pH=7):
     with open(infile, 'r') as f:
         fixer = PDBFixer(pdbfile=f)
     fixer.findMissingResidues()
     fixer.findMissingAtoms()
     fixer.addMissingAtoms()
     fixer.addMissingHydrogens(pH=pH)
     if out is None:
         out = '{0[0]}{1}{0[1]}'.format(os.path.splitext(infile), '_fixed')
     with open(out, 'w') as f:
         PDBFile.writeFile(fixer.topology, fixer.positions, f)
예제 #9
0
 def add_hydrogens_by_openmm(self):
     from simtk.openmm.app import ForceField, Modeller, PDBFile
     from pdbfixer import PDBFixer
     fixer = PDBFixer(self.name)
     field = ForceField('amber99sb.xml', 'tip3p.xml')
     fixer.findMissingResidues()
     fixer.findMissingAtoms()
     fixer.addMissingAtoms()
     fixer.addMissingHydrogens(7.0)
     modeller = Modeller(fixer.topology, fixer.positions)
     modeller.addHydrogens(forcefield=field)
     modeller.deleteWater()
     PDBFile.writeModel(modeller.topology, modeller.positions, open(self.shotname+'_h.pdb', 'w'))
예제 #10
0
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb + ".pdb"
        if fromFolder is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif fromFolder[:4] == ".pdb":
            fromFile = fromFolder
        else:
            fromFile = os.path.join(fromFolder, pdbFile)
        if chain is None:  # None mean deafult is chain A unless specified.
            if len(pdb_id) == 5:
                Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile)
        else:
            Chosen_chain = chain
        # clean pdb
        fixer = PDBFixer(filename=fromFile)
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        # print(keys)
        for key in list(keys):
            chain_tmp = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain_tmp.residues())):
                del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'))
예제 #11
0
def add_membrane(pdb_path, membrane_lipid_type='POPC', out_as=None):
    """
        Make a lipid bilayer for your protein easy.

            Parameters
            ----------

            pdb_path: Give your pdb whole path to this parameter

            membrane_lipid_type : Add POPC or POPE lipid membranes to your system.

            out_as: Give and extension list like ['psf', 'crd', 'gro']

            Example
            ----------

            add_membrane('protein.pdb', 'POPC', ['crd', 'gro'])

        """
    fixer = PDBFixer(filename=pdb_path)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)

    print('\nAdding membrane:', membrane_lipid_type)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed.pdb", 'w'))
    fixer.addMembrane(lipidType=membrane_lipid_type,
                      membraneCenterZ=0 * unit.nanometer,
                      minimumPadding=1 * unit.nanometer,
                      positiveIon="Na+",
                      negativeIon="Cl-",
                      ionicStrength=0.0 * unit.molar)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed_membrane.pdb", 'w'), keepIds=True)

    if out_as is not None:
        struct = pmd.load_file('fixed_membrane.pdb')
        for i in out_as:
            try:
                print("Savind *.%s extension File" % i)
                struct.save('fixed_membrane.%s' % i)
            except:
                pass
예제 #12
0
    def __init__(self, config_: Config):
        self.config = config_
        self.logger = make_message_writer(self.config.verbose, self.__class__.__name__)
        with self.logger("__init__") as logger:
            self.boxvec = None
            self.explicit = self.config.explicit
            self.system = None
            ofs = oechem.oemolistream(self.config.ligand_file_name)
            oemol = oechem.OEMol()
            oechem.OEReadMolecule(ofs, oemol)
            ofs.close()
            self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol)
            self.params_written = 0
            self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True)
            fixer = PDBFixer(self.config.pdb_file_name)
            
            if self.config.use_pdbfixer:
                logger.log("Fixing with PDBFixer")

                fixer.findMissingResidues()
                fixer.findNonstandardResidues()
                fixer.replaceNonstandardResidues()
                fixer.removeHeterogens(keepWater=False)
                fixer.findMissingAtoms()
                fixer.addMissingAtoms()
                fixer.addMissingHydrogens(7.0)



                logger.log("Found missing residues: ", fixer.missingResidues)
                logger.log("Found missing terminals residues: ", fixer.missingTerminals)
                logger.log("Found missing atoms:", fixer.missingAtoms)
                logger.log("Found nonstandard residues:", fixer.nonstandardResidues)


            self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb"
            with open(self.config.pdb_file_name, 'w') as f:
                app.PDBFile.writeFile(fixer.topology, fixer.positions, f)
            cmd.reinitialize()
            cmd.load(self.config.pdb_file_name)
            cmd.load(self.config.ligand_file_name, "UNL")
            cmd.alter("UNL", "resn='UNL'")
            cmd.save("{}".format(self.config.pdb_file_name))
예제 #13
0
def pdbfix_protein(input_pdb_path,
                   output_pdb_path,
                   find_missing_residues=True,
                   keep_water=False,
                   ph=None):
    """Run PDBFixer on the input PDB file.

    Heterogen atoms are always removed.

    Parameters
    ----------
    input_pdb_path : str
        The PDB to fix.
    output_pdb_path : str
        The path to the output PDB file.
    find_missing_residues : bool, optional
        If True, PDBFixer will try to model the unresolved residues
        that appear in the amino acid sequence (default is True).
    keep_water : bool, optional
        If True, water molecules are not stripped (default is False).
    ph : float or None, optional
        If not None, hydrogen atoms will be added at this pH.

    """
    fixer = PDBFixer(filename=input_pdb_path)
    if find_missing_residues:
        fixer.findMissingResidues()
    else:
        fixer.missingResidues = {}
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(keep_water)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    if ph is not None:
        fixer.addMissingHydrogens(ph)

    # print(fixer.nonstandardResidues)
    # print(fixer.missingAtoms)
    # print(fixer.missingTerminals)

    with open(output_pdb_path, 'w') as f:
        PDBFile.writeFile(fixer.topology, fixer.positions, f)
예제 #14
0
def _apply_pdbfix(molecule, pH=7.0, add_hydrogens=False):
    """
    Run PDBFixer to ammend potential issues in PDB format.

    Parameters
    ----------
    molecule : chimera.Molecule
        Chimera Molecule object to fix.
    pH : float, optional
        Target pH for adding missing hydrogens.
    add_hydrogens : bool, optional
        Whether to add missing hydrogens or not.

    Returns
    -------
    memfile : StringIO
        An in-memory file with the modified PDB contents
    """
    memfile = StringIO()
    chimera.pdbWrite([molecule], chimera.Xform(), memfile)
    chimera.openModels.close([molecule])
    memfile.seek(0)
    fixer = PDBFixer(pdbfile=memfile)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)
    if add_hydrogens:
        fixer.addMissingHydrogens(pH)
    memfile.close()

    memfile = StringIO()
    PDBFile.writeFile(fixer.topology, fixer.positions, memfile)
    memfile.seek(0)
    molecule = chimera.openModels.open(memfile,
                                       type="PDB",
                                       identifyAs=molecule.name)
    chimera.openModels.remove(molecule)
    memfile.close()
    return molecule[0]
예제 #15
0
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True):
    global ONE_THREE_CODE
    fixer = PDBFixer(filename=pdb_file)
    fixer.sequences.clear()
    for chain in fixer.topology.chains():
        seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())])
        fixer.sequences.append(seq)
    if remove_small_mols:
        fixer.removeHeterogens(not remove_water)
    delete_chains = []
    # Convert single AA codes to three letter code
    for key, value in seq_dict.items():
        if not value or value is None:
            delete_chains.append(key)
        else:
            three_letter = []
            for item in value:
                three_letter.append(ONE_THREE_CODE[item])
            seq_dict[key] = three_letter

    for chain in fixer.topology.chains():
        if chain.index in seq_dict:
            if seq_dict[chain.index] is not None:
                fixer.sequences[chain.index].residues = seq_dict[chain.index]
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)
    fixer.removeChains(delete_chains)
    dummy = tempfile.NamedTemporaryFile(suffix=".pdb")
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w'))
    product = mdtraj.load(dummy.name)
    problem_cis = ChiralityCheck.check_cispeptide_bond(product)
    problem_chiral = ChiralityCheck.check_chirality(product)
    print("The following problems have been detected:")
    print(problem_cis)
    print(problem_chiral)
    print("Either rerun or find a tool to solve. Perhaps VMD?")
    return product
예제 #16
0
    def __init__(self, constraints=app.HBonds, hydrogenMass=None, pH=7.0, **kwargs):

        TestSystem.__init__(self, **kwargs)

        system_pdb=PDBFixer(os.path.dirname(__file__)+"/pdbs/1l2y.pdb")

        system_pdb.findMissingResidues()
        system_pdb.findNonstandardResidues()
        system_pdb.findMissingAtoms()
        system_pdb.addMissingAtoms()

        forcefield = app.ForceField('amber96.xml', 'amber96_obc.xml')
        modeller = app.Modeller(system_pdb.topology, system_pdb.positions)
        addHs_log = modeller.addHydrogens(forcefield, pH=pH)

        self.topology  = modeller.getTopology()
        self.positions = modeller.getPositions() # asNumpy=True
        self.positions._value = np.array(self.positions._value)
        self.system    = forcefield.createSystem(modeller.topology,implicitSolvent=app.OBC1,
                                         constraints=constraints,nonbondedMethod=app.NoCutoff,
                                         hydrogenMass=hydrogenMass)
예제 #17
0
def fix_pdb(pdb_id):
    path = os.getcwd()
    if len(pdb_id) != 4:
        print("Creating PDBFixer...")
        fixer = PDBFixer(pdb_id)
        print("Finding missing residues...")
        fixer.findMissingResidues()

        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        for key in list(keys):
            chain = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain.residues())):
                print("ok")
                del fixer.missingResidues[key]

        print("Finding nonstandard residues...")
        fixer.findNonstandardResidues()
        print("Replacing nonstandard residues...")
        fixer.replaceNonstandardResidues()
        print("Removing heterogens...")
        fixer.removeHeterogens(keepWater=True)

        print("Finding missing atoms...")
        fixer.findMissingAtoms()
        print("Adding missing atoms...")
        fixer.addMissingAtoms()
        print("Adding missing hydrogens...")
        fixer.addMissingHydrogens(7)
        print("Writing PDB file...")

        PDBFile.writeFile(
            fixer.topology,
            fixer.positions,
            open(
                os.path.join(path,
                             "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)),
                "w"),
            keepIds=True)
        return "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)
예제 #18
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True)

    return pdb_file
예제 #19
0
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, 
           pH: float = 7.0, path: str = '.', **kwargs) -> str:
    if pdbid:
        fixer = PDBFixer(pdbid=pdbid)
    else:
        fixer = PDBFixer(filename=receptor)

    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)

    if receptor:
        outfile = receptor
    else:
        outfile = Path(path)/f'{pdbid}.pdb'

    PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w'))
    
    return outfile
예제 #20
0
    def pdb2omm(self, 
              input_pdb=None,
              solvate=True,
              protonate=True,
              fix_pdb=True,
              inspect=False,
              extra_input_pdb=[],
              ff_files=[],
              extra_ff_files=[],
              extra_names=[],
              other_ff_instance=False,
              pH_protein = 7.0,
              residue_variants={},
              other_omm=False,
              input_sdf_file=None,
              box_size=9.0,
              name='NoName'):
        """
        Method to prepare an openMM system from PDB and XML/other force field definitions.
        Returns self, so that other methods can act on it.
        Requires input PDB file(s) handled by "input_pdbs". 
        Uses default AMBER force fields if none are provide by "ff_files".
        Includes to provided force fields (or defaults) additional XML/other definitions with "extra_ff_files".
        TODO: include "extra_input_pdb" methods to build boxes on the fly.
        

        Parameters
        ----------
        input_pdb : TYPE, optional
            DESCRIPTION. The default is None.
        solvate : TYPE, optional
            DESCRIPTION. The default is True.
        protonate : TYPE, optional
            DESCRIPTION. The default is True.
        fix_pdb : TYPE, optional
            DESCRIPTION. The default is True.
        extra_input_pdb : TYPE, optional
            DESCRIPTION. The default is [].
        ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_names : TYPE, optional
            DESCRIPTION. The default is [].
        other_ff_instance : TYPE, optional
            DESCRIPTION. The default is False.
        pH_protein : TYPE, optional
            DESCRIPTION. The default is 7.0.

        Returns
        -------
        None.

        """
        
        self.structures={}
        self.input_pdb=input_pdb
        
        self.structures['input_pdb']=input_pdb
        
        #Fix the input_pdb with PDBFixer
        if fix_pdb:
            
            pdb=PDBFixer(self.input_pdb)
            
            pdb.findMissingResidues()
            pdb.findMissingAtoms()
            pdb.addMissingAtoms()
        
        else:
            pdb = app.PDBFile(self.input_pdb)
        
        
        #Generate a Modeller instance of the fixed pdb
        #It will be used to populate system
        pre_system = app.Modeller(pdb.topology, pdb.positions)
    
    
        #Add ligand structures to the model with addExtraMolecules_PDB
        if len(extra_input_pdb) > 0:

            pre_system, self.extra_molecules=self.addExtraMolecules_PDB(pre_system, extra_input_pdb)


    
        #Create a ForceField instance with provided XMLs with setForceFields()
        forcefield, ff_paths=self.setForceFields(ff_files=ff_files)
    
        #Call to setProtonationState()
        if protonate:
            
            if residue_variants:
            
                pre_system.addHydrogens(forcefield, pH = pH_protein, 
                                        variants = self.setProtonationState(pre_system.topology.chains(), 
                                                                 protonation_dict=residue_variants))

            else:
                pre_system.addHydrogens(forcefield, pH = pH_protein)
        

        #Call to solvate()
        #TODO: For empty box, add waters, remove then
        if solvate:
            pre_system=self.solvate(pre_system, forcefield, box_size=box_size)
    
        self.topology=pre_system.topology
        self.positions=pre_system.positions
    
        #Define system. Either by provided pre_system, or other_omm system instance.
        
        if other_omm:
            
            system, forcefield_other=self.omm_system(input_sdf_file, 
                                                     pre_system,
                                                     forcefield,
                                                     self.def_input_struct,
                                                     ff_files=ff_paths, 
                                                     template_ff='gaff-2.11')
                        
            #forcefield not needed?? 
    
        else:
            
            #Create a openMM topology instance
            system = forcefield.createSystem(pre_system.topology, 
                                         nonbondedMethod=app.PME, 
                                         nonbondedCutoff=1.0*nanometers,
                                         ewaldErrorTolerance=0.0005, 
                                         constraints='HBonds', 
                                         rigidWater=True)
            
        #Update attributes
        self.system=system

        #TODO: A lot. Link to Visualization
        self.structures['system']=self.writePDB(pre_system.topology, pre_system.positions, name='system')
        
        
        print(f"System is now converted to openMM type: \n\tFile: {self.structures['system']}, \n\tTopology: {self.topology}")
        
        return self
예제 #21
0
def process_pdb(path,
                corr_path,
                chain_id,
                max_atoms,
                gsd_file,
                embedding_dicts,
                NN,
                nlist_model,
                keep_residues=[-1, 1],
                debug=False,
                units=unit.nanometer,
                frame_number=3,
                model_index=0,
                log_file=None,
                shiftx_style=False):

    global MA_LOST_FRAGS
    if shiftx_style:
        frame_number = 1
    # load pdb
    pdb = app.PDBFile(path)

    # load cs sets
    peak_data, sequence_map, peak_seq = process_corr(corr_path, debug,
                                                     shiftx_style)

    result = []
    # check for weird/null chain
    if chain_id == '_':
        chain_id = list(pdb.topology.residues())[0].chain.id[0]
    # sometimes chains have extra characters (why?)
    residues = list(
        filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues()))
    if len(residues) == 0:
        if debug:
            raise ValueError('Failed to find requested chain ', chain_id)

    pdb_offset, seq_offset = None, None

    # from pdb residue index to our aligned residue index
    residue_lookup = {}
    # bonded neighbor mask
    nlist_mask = None
    peak_count = 0
    # select a random set of frames for generating data without replacement
    frame_choices = random.sample(range(0, pdb.getNumFrames()),
                                  k=min(pdb.getNumFrames(), frame_number))
    for fi in frame_choices:
        peak_successes = set()
        # clean up individual frame
        frame = pdb.getPositions(frame=fi)
        # have to fix at each frame since inserted atoms may change
        # fix missing residues/atoms
        fixer = PDBFixer(filename=path)
        # overwrite positions with frame positions
        fixer.positions = frame
        # we want to add missing atoms,
        # but not replace missing residue. We'd
        # rather just ignore those
        fixer.findMissingResidues()
        # remove the missing residues
        fixer.missingResidues = []
        # remove water!
        fixer.removeHeterogens(False)
        if not shiftx_style:
            fixer.findMissingAtoms()
            fixer.findNonstandardResidues()
            fixer.replaceNonstandardResidues()
            fixer.addMissingAtoms()
            fixer.addMissingHydrogens(7.0)
        # get new positions
        frame = fixer.positions
        num_atoms = len(frame)
        # remake residue list each time so they have correct atom ids
        residues = list(
            filter(lambda r: r.chain.id[0] == chain_id,
                   fixer.topology.residues()))
        if num_atoms > 20000:
            MA_LOST_FRAGS += len(residues)
            if debug:
                print(
                    'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}'
                    .format(fi, path))
            break
        # check alignment once
        if pdb_offset is None:
            # create sequence from residues
            pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues])
            for r in residues:
                rid = int(r.id)
                if rid >= 0:
                    pdb_seq[int(r.id)] = r.name
            if debug:
                print('pdb_seq', pdb_seq)
                print('peak_seq', peak_seq)
            pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug)
            #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO?????
            # Maybe it's ok
            pdb_offset = 0
            if debug:
                print('pdb_offset', pdb_offset)
                print('seq_offset', seq_offset)
                print(sequence_map)
                # now check alignment - rarely perfect
                saw_one = False
                aligned = 0
                for i in range(len(residues)):
                    segid = int(residues[i].id) + pdb_offset
                    saw_one = pdb_seq[segid] == residues[i].name
                    if not saw_one:
                        print('Mismatch (A) at position {} ({}). {} != {}'.
                              format(segid, residues[i].id, pdb_seq[segid],
                                     residues[i].name))
                        continue
                    if segid + seq_offset in sequence_map:
                        peakid = sequence_map[segid + seq_offset]
                        print(segid, segid + seq_offset, len(pdb_seq),
                              len(peak_seq))
                        saw_one = pdb_seq[segid] == peak_seq[segid +
                                                             seq_offset]
                        if not saw_one:
                            print(
                                'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}'
                                .format(segid, peak_seq[segid + seq_offset],
                                        pdb_seq[peakid]))
                            continue
                        saw_one = peak_data[peakid]['name'] == residues[i].name
                        if not saw_one:
                            print(
                                'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}'
                                .format(segid, i, peak_seq[segid + seq_offset],
                                        peak_data[peakid]['name'],
                                        residues[i].name))
                            continue
                        aligned += 1
                if aligned < 5:
                    raise ValueError(
                        'Could not find more than 5 aligned residues, very unusual'
                    )

            # create resiud look-up from atom index
            for i, r in enumerate(residues):
                for a in r.atoms():
                    residue_lookup[a.index] = i
            # This alignment will be checked as we compare shifts against the pdb
        # get neighbor list for frame
        np_pos = np.array([v.value_in_unit(units) for v in frame])
        frame_nlist = nlist_model(np_pos)

        for ri in range(len(residues)):
            # we build up fragment by getting residues around us, both in chain
            # and those within a certain distance of us
            rmin = max(0, ri + keep_residues[0])
            # have to +1 here (and not in range) to get min to work :)
            rmax = min(len(residues), ri + keep_residues[1] + 1)
            # do we have any residues to consider?
            success = rmax - rmin > 0

            consider = set(range(rmin, rmax))

            # Used to indicate an atom should be included from a different residue
            marked = [False for _ in range(len(frame))]

            # now grab spatial neighbor residues
            # NOTE: I checked this by hand a lot
            # Believe this code.
            for a in residues[ri].atoms():
                for ni in range(NN):
                    j = int(frame_nlist[a.index, ni, 1])
                    try:
                        consider.add(residue_lookup[j])
                        marked[j] = True
                    except KeyError as e:
                        success = False
                        if debug:
                            print(
                                'Neighboring residue in different chain, skipping'
                            )
                        break
            atoms = np.zeros((max_atoms), dtype=np.int64)
            # we will put dummy atom at end to keep bond counts the same by bonding to it
            # Z-DISABLED
            #atoms[-1] = embedding_dicts['atom']['Z']
            mask = np.zeros((max_atoms), dtype=np.float)
            bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64)
            # nlist:
            # :,:,0 -> distance
            # :,:,1 -> neighbor index
            # :,:,2 -> bond count
            nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float)
            positions = np.zeros((max_atoms, 3), dtype=np.float)
            peaks = np.zeros((max_atoms), dtype=np.float)
            names = np.zeros((max_atoms), dtype=np.int64)
            # going from pdb atom index to index in these data structures
            rmap = dict()
            index = 0
            # check our two conditions that could have made this false: there are residues and
            # we didn't have off-chain spatial neighboring residues
            if not success:
                continue
            for rj in consider:
                residue = residues[rj]
                # use the alignment result to get offset
                segid = int(residue.id) + pdb_offset
                if segid + seq_offset not in sequence_map:
                    if debug:
                        print('Could not find residue index', rj, ': ',
                              residue, 'in the sequence map. Its index is',
                              segid + seq_offset, 'ri: ', ri)
                        print('We are considering', consider)
                    success = False
                    break
                peak_id = sequence_map[segid + seq_offset]
                #peak_id = segid
                if peak_id >= len(peak_data):
                    success = False
                    if debug:
                        print('peakd id is outside of peak range')
                    break
                # only check for residue we actually care about
                if ri == rj and residue.name != peak_data[peak_id]['name']:
                    if debug:
                        print('Mismatch between residue ', ri, rj, peak_id,
                              residue, segid, peak_data[peak_id], path,
                              corr_path, chain_id)
                    success = False
                    break
                for atom in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[atom.index]:
                        continue
                    mask[index] = float(ri == rj)
                    atom_name = residue.name + '-' + atom.name
                    if atom_name not in embedding_dicts['name']:
                        embedding_dicts['name'][atom_name] = len(
                            embedding_dicts['name'])
                    names[index] = embedding_dicts['name'][atom_name]

                    if atom.element.symbol not in embedding_dicts['atom']:
                        if debug:
                            print('Could not identify atom',
                                  atom.element.symbol)
                        success = False
                        break
                    atoms[index] = embedding_dicts['atom'][atom.element.symbol]
                    positions[index] = np_pos[atom.index, :]
                    rmap[atom.index] = index
                    peaks[index] = 0
                    if mask[index]:
                        if atom.name[:3] in peak_data[peak_id]:
                            peaks[index] = peak_data[peak_id][atom.name[:3]]
                            peak_count += 1
                            peak_successes.add(peak_id)
                        else:
                            mask[index] = 0
                    index += 1
                    # Z-DISABLED
                    # -1 for dummy atom which is stored at end
                    if index == max_atoms - 1:  #2:
                        MA_LOST_FRAGS += 1
                        if debug:
                            print('Not enough space for all atoms in ri', ri)
                        success = False
                        break
                if ri == rj and sum(mask) == 0:
                    if debug:
                        print('Warning found no peaks for', ri, rj, residue,
                              peak_data[peak_id])
                    success = False
                if not success:
                    break
            if not success:
                continue
            # do this after so our reverse mapping is complete
            for rj in consider:
                residue = residues[rj]
                for b in residue.bonds():
                    # set bonds
                    try:
                        bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1
                        bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1
                    except KeyError:
                        # for bonds that cross residue
                        pass
            for rj in consider:
                residue = residues[rj]
                for a in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[a.index]:
                        continue
                    index = rmap[a.index]
                    # convert to local indices and filter neighbors
                    n_index = 0
                    for ni in range(NN):
                        if frame_nlist[a.index, ni, 0] > 50.0:
                            # large distances are sentinels for things
                            # like self neighbors
                            continue
                        try:
                            j = rmap[int(frame_nlist[a.index, ni, 1])]
                        except KeyError:
                            # either we couldn't find a neighbor on the root residue (which is bad)
                            # or just one of the neighbors is not on a considered residue.
                            if rj == ri:
                                success = False
                                if debug:
                                    print('Could not find all neighbors',
                                          int(frame_nlist[a.index, ni, 1]),
                                          consider)
                                break
                            # Z-DISABLED
                            #j = max_atoms - 1 # point to dummy atom
                            continue
                        # mark as not a neighbor if out of molecule (only for non-subject nlists)
                        if False and j == max_atoms - 1:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['none']
                            n_index += 1
                        # a 0 -> non-bonded
                        elif bonds[index, j] == 0:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['nonbonded']
                            n_index += 1
                        # single bonded
                        else:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist'][1]
                            n_index += 1
                        if n_index == NEIGHBOR_NUMBER:
                            break
                    # how did we do on peaks
                    if False and (peaks[index] > 0 and peaks[index] < 25):
                        nonbonded_count = np.sum(
                            nlist[index, :,
                                  2] == embedding_dicts['nlist']['nonbonded'])
                        bonded_count = np.sum(
                            nlist[index, :, 2] == embedding_dicts['nlist'][1])
                        print(
                            'neighbor summary: non-bonded: {}, bonded: {}, total: {}'
                            .format(nonbonded_count, bonded_count,
                                    NEIGHBOR_NUMBER))
                        print(nlist[index, :, :])
                        exit()
            if not success:
                if debug:
                    raise RuntimeError()
                continue
            if gsd_file is not None:
                snapshot = write_record_traj(
                    positions, atoms, mask, nlist, peaks,
                    embedding_dicts['class'][residues[ri].name], names,
                    embedding_dicts)
                snapshot.configuration.step = len(gsd_file)
                gsd_file.append(snapshot)
            result.append(
                make_tfrecord(atoms,
                              mask,
                              nlist,
                              peaks,
                              embedding_dicts['class'][residues[ri].name],
                              names,
                              indices=np.array(
                                  [model_index, fi,
                                   int(residues[ri].id)],
                                  dtype=np.int64)))
            if log_file is not None:
                log_file.write('{} {} {} {} {} {} {} {}\n'.format(
                    path.split('/')[-1],
                    corr_path.split('/')[-1], chain_id, len(peak_successes),
                    len(gsd_file), model_index, fi, residues[ri].id))
    return result, len(peak_successes) / len(peak_data), len(
        result), peak_count
예제 #22
0
def apply_pdbfixer(mol,
                   add_missing=True,
                   hydrogenate=True,
                   pH=7.4,
                   remove_heterogens=True,
                   is_protein=True):
    """
  Apply PDBFixer to a molecule to try to clean it up.

  Parameters
  ----------
  mol: Rdkit Mol
    Molecule to clean up.
  add_missing: bool, optional
    If true, add in missing residues and atoms
  hydrogenate: bool, optional
    If true, add hydrogens at specified pH
  pH: float, optional
    The pH at which hydrogens will be added if `hydrogenate==True`. Set to 7.4 by default.
  remove_heterogens: bool, optional
    Often times, PDB files come with extra waters and salts attached.
    If this field is set, remove these heterogens.
  is_protein: bool, optional
    If false, then don't remove heterogens (since this molecule is
    itself a heterogen).

  Returns
  -------
  Rdkit Mol

  Note
  ----
  This function requires RDKit and PDBFixer to be installed.
  """
    molecule_file = None
    try:
        from pdbfixer import PDBFixer
    except ModuleNotFoundError:
        raise ImportError("This function requires pdbfixer")

    try:
        import simtk
    except ModuleNotFoundError:
        raise ImportError("This function requires openmm")

    try:
        from rdkit import Chem
        pdbblock = Chem.MolToPDBBlock(mol)
        pdb_stringio = StringIO()
        pdb_stringio.write(pdbblock)
        pdb_stringio.seek(0)

        fixer = PDBFixer(pdbfile=pdb_stringio)
        if add_missing:
            fixer.findMissingResidues()
            fixer.findMissingAtoms()
            fixer.addMissingAtoms()
        if hydrogenate:
            fixer.addMissingHydrogens(pH)
        if is_protein and remove_heterogens:
            # False here specifies that water is to be removed
            fixer.removeHeterogens(False)

        hydrogenated_io = StringIO()
        simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions,
                                           hydrogenated_io)
        hydrogenated_io.seek(0)
        return Chem.MolFromPDBBlock(hydrogenated_io.read(),
                                    sanitize=False,
                                    removeHs=False)
    except ValueError as e:
        logger.warning("Unable to add hydrogens %s", e)
        raise MoleculeLoadException(e)
    finally:
        try:
            os.remove(molecule_file)
        except (OSError, TypeError):
            pass
예제 #23
0
파일: utils.py 프로젝트: simomarsili/mmlite
def prepare_pdb(pdb,
                chains='A',
                ff=('amber99sbildn.xml', 'tip3p.xml'),
                ph=7,
                pad=10 * unit.angstroms,
                nbonded=app.PME,
                constraints=app.HBonds,
                crystal_water=True):
    """
    Fetch, solvate and minimize a protein PDB structure.

    Parameters
    ----------
    pdb : str
        PDB Id.
    chains : str or list
        Chain(s) to keep in the system.
    ff : tuple of xml ff files.
        Forcefields for parametrization.
    ph : float
        pH value for adding missing hydrogens.
    pad: Quantity object
        Padding around macromolecule for filling box with water.
    nbonded : object
        The method to use for nonbonded interactions.  Allowed values are
        NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME.
    constraints : object
        Specifies which bonds and angles should be implemented with
        constraints. Allowed values are None, HBonds, AllBonds, or HAngles.
    crystal_water : bool
        Keep crystal water.

    """

    # Load forcefield.
    logger.info('Retrieving %s from PDB...', pdb)
    ff = app.ForceField(*ff)

    # Retrieve structure from PDB.
    fixer = PDBFixer(pdbid=pdb)

    # Remove unselected chains.
    logger.info('Removing all chains but %s', chains)
    all_chains = [c.id for c in fixer.topology.chains()]
    fixer.removeChains(chainIds=set(all_chains) - set(chains))

    # Find missing residues.
    logger.info('Finding missing residues...')
    fixer.findMissingResidues()

    # Replace nonstandard residues.
    logger.info('Replacing nonstandard residues...')
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()

    # Add missing atoms.
    logger.info('Adding missing atoms...')
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Remove heterogens.
    logger.info('Removing heterogens...')
    fixer.removeHeterogens(keepWater=crystal_water)

    # Add missing hydrogens.
    logger.info('Adding missing hydrogens appropriate for pH %s', ph)
    fixer.addMissingHydrogens(ph)

    if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]:
        # Add solvent.
        logger.info('Adding solvent...')
        fixer.addSolvent(padding=pad)

    # Write PDB file.
    logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb)
    app.PDBFile.writeFile(fixer.topology, fixer.positions,
                          open('%s-pdbfixer.pdb' % pdb, 'w'))

    # Create OpenMM System.
    logger.info('Creating OpenMM system...')
    system = ff.createSystem(fixer.topology,
                             nonbondedMethod=nbonded,
                             constraints=constraints,
                             rigidWater=True,
                             removeCMMotion=False)

    # Minimimze to update positions.
    logger.info('Minimizing...')
    integrator = mm.VerletIntegrator(1.0 * unit.femtosecond)
    context = mm.Context(system, integrator)
    context.setPositions(fixer.positions)
    mm.LocalEnergyMinimizer.minimize(context)
    # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
    state = context.getState(getPositions=True)
    fixer.positions = state.getPositions()

    # Write final coordinates.
    logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb)
    with open('%s-minimized.pdb' % pdb, 'w') as fp:
        app.PDBFile.writeFile(fixer.topology, fixer.positions, fp)

    # Serialize final coordinates.
    logger.info('Serializing to XML...')
    serialize_system(context, system, integrator)
예제 #24
0
def cleanProtein(structure,
                 mutator=None,
                 regexes=None,
                 hydrogens=True,
                 run_pdb2pqr=True,
                 quiet=False,
                 remove_numerical_chain_id=False,
                 method="geobind",
                 **kwargs):
    """ Perform any operations needed to modify the structure or sequence of a protein
    chain.
    """
    prefix = structure.name  # used for file names

    if remove_numerical_chain_id:
        # APBS and TABI-PB does not process numerical chain IDs correctly. This is a work-around
        available_ids = list(
            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")

        # find current chain ids
        taken_ids = set()
        for chain in structure.get_chains():
            cid = chain.get_id()
            taken_ids.add(cid)

        # iterate over chains and update
        chain_map = {}
        for chain in structure.get_chains():
            cid = chain.get_id()
            if cid.isnumeric():
                # we want to replace this chain id
                while len(available_ids) > 0:
                    new_id = available_ids.pop()
                    if new_id in taken_ids:
                        continue
                    else:
                        break
                chain_map[cid] = new_id
                chain.id = new_id
            else:
                chain_map[cid] = cid

    if method == "geobind":
        # set up needed objects
        if regexes is None:
            regexes = data.regexes
        if mutator is None:
            mutator = ResidueMutator(data.tripeptides, data.chem_components)

        # remove non-standard residues
        for chain in structure.get_chains():
            replace = []
            remove = []
            for residue in chain:
                resn = residue.get_resname().strip()
                resid = residue.get_id()
                if resn in data.chem_components and heavyAtomCount(residue) / (
                        data.chem_components[resn]['heavy_atom_count'] -
                        1) < 0.6:
                    # too many missing atoms - replace residue
                    replace.append(resid)
                elif mutator.standard(resn):
                    if resid[0] == ' ':
                        continue
                    else:
                        remove.append(
                            (resid, "removed HETATM standard residue: %s"))
                elif resn == 'HOH' or resn == 'WAT':
                    remove.append((resid, None))
                elif regexes["SOLVENT_COMPONENTS"].search(resn):
                    continue
                elif mutator.modified(resn):
                    replace.append(resid)
                else:
                    remove.append((resid, "removed unrecognized residue: %s"))

            for rid, reason in remove:
                if reason is not None and not quiet:
                    logging.info(reason, chain[rid].get_resname())
                chain.detach_child(rid)

            for rid in replace:
                replacement = mutator.mutate(chain[rid])
                if replacement:
                    if not quiet:
                        logging.info("replacing residue %s with %s",
                                     chain[rid].get_resname(),
                                     replacement.get_resname())
                    replacement.id = rid
                    idx = chain.child_list.index(chain[rid])
                    chain.child_list[idx] = replacement
                else:
                    if not quiet:
                        logging.info(
                            "could not perform replacement on %s, removing",
                            chain[rid].get_resname())
                    chain.detach_child(rid)
    elif method == "pdbfixer":
        try:
            from pdbfixer import PDBFixer
            from openmm.app import PDBFile
        except ModuleNotFoundError:
            raise ModuleNotFoundError(
                "The dependencies 'pdbfixer' and 'openmm' are required with option 'method=\"pdbfixer\"'"
            )

        # create a temp file
        tmpFile1 = tempFileName(prefix, 'pdb')
        structure.save(tmpFile1)

        # run pdbfixer
        fixer = PDBFixer(filename=tmpFile1)
        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()

        tmpFile2 = tempFileName(prefix, 'pdb')
        PDBFile.writeFile(fixer.topology,
                          fixer.positions,
                          open(tmpFile2, 'w'),
                          keepIds=True)

        # load new fixed structure
        structure = StructureData(tmpFile2, name=prefix)

        # clean up
        os.remove(tmpFile1)
        os.remove(tmpFile2)

    # run PDB2PQR if requested
    if run_pdb2pqr:
        structure, pqrFile = runPDB2PQR(structure, **kwargs)

    # remove hydrogens if requested
    if not hydrogens:
        stripHydrogens(structure)

    # decide what to return
    rargs = [structure]
    if run_pdb2pqr:
        rargs.append(pqrFile)
    if remove_numerical_chain_id:
        rargs.append(chain_map)

    return tuple(rargs)
예제 #25
0
def cleanPdb(pdb_list,
             chain=None,
             source=None,
             toFolder="cleaned_pdbs",
             formatName=False,
             removeDNAchains=True,
             verbose=False,
             removeTwoEndsMissingResidues=True,
             addMissingResidues=True,
             removeHeterogens=True,
             keepIds=False):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        print(pdb_id)
        # pdb = f"{pdb_id.lower()[:4]}"
        # pdbFile = pdb+".pdb"
        if formatName:
            pdb = f"{pdb_id.lower()[:4]}"
        else:
            pdb = pdb_id
        pdbFile = pdb + ".pdb"
        if source is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif source[-4:] == ".pdb":
            fromFile = source
        else:
            fromFile = os.path.join(source, pdbFile)

        # clean pdb
        try:
            fixer = PDBFixer(filename=fromFile)
        except Exception as inst:
            print(inst)
            print(f"{fromFile} not found. skipped")
            continue
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        print(chains)
        if chain is None:  # 'None' means deafult is chain A unless specified.
            if len(pdb_id) >= 5:
                Chosen_chain = pdb_id[4]
                # Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile,
                                        removeDNAchains=removeDNAchains)
            print(f"Chains: {Chosen_chain}")
        elif chain == "first":
            Chosen_chain = chains[0].id
        else:
            Chosen_chain = chain

        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        if verbose:
            print("chains to remove", chains_to_remove)
            print("missing residues: ", keys)
        if not addMissingResidues:
            for key in list(keys):
                del fixer.missingResidues[key]
        else:
            if removeTwoEndsMissingResidues:
                for key in list(keys):
                    chain_tmp = chains[key[0]]
                    if key[1] == 0 or key[1] == len(list(
                            chain_tmp.residues())):
                        del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        if removeHeterogens:
            fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        try:
            fixer.addMissingAtoms()
        except:
            print("Unable to add missing atoms")
            continue
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology,
                          fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'),
                          keepIds=keepIds)
예제 #26
0
"""

import argparse

from pdbfixer import PDBFixer
from simtk.openmm.app import PDBxFile

ap = argparse.ArgumentParser()
ap.add_argument('structure')
ap.add_argument('--output',
                default=None,
                help='Output name for fixed structure')
cmd = ap.parse_args()

print('Reading PDB structure: {}'.format(cmd.structure))
fixer = PDBFixer(cmd.structure)

print('Finding and adding missing heavy atoms')
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()

if cmd.output is None:
    ofname = cmd.structure[:-4] + '.cif'
else:
    ofname = cmd.output

print('Writing completed structure in CIF format: {}'.format(ofname))
with open('{}'.format(ofname), 'w') as handle:
    PDBxFile.writeFile(fixer.topology, fixer.positions, handle)
예제 #27
0
chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep)
fixer.removeChains(chainIds=chain_ids_to_remove)

# Find missing residues.
print('Finding missing residues...')
fixer.findMissingResidues()

# Replace nonstandard residues.
print('Replacing nonstandard residues...')
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()

# Add missing atoms.
print('Adding missing atoms...')
fixer.findMissingAtoms()
fixer.addMissingAtoms()

# Remove heterogens.
print('Removing heterogens...')
fixer.removeHeterogens(keepWater=keepWater)

# Add missing hydrogens.
print('Adding missing hydrogens appropriate for pH %s' % pH)
fixer.addMissingHydrogens(pH)

if nonbondedMethod in [app.PME, app.CutoffPeriodic, app.Ewald]:
    # Add solvent.
    print('Adding solvent...')
    fixer.addSolvent(padding=padding)

# Write PDB file.
예제 #28
0
    def pdb2omm(self, 
              input_pdbs=None,
              solvate=True,
              protonate=True,
              fix_pdb=True,
              inspect=False,
              extra_input_pdb=[],
              ff_files=[],
              extra_ff_files=[],
              extra_names=[],
              other_ff_instance=False,
              pH = 7.0):
        """
        

        Parameters
        ----------
        input_pdb : TYPE, optional
            DESCRIPTION. The default is None.
        solvate : TYPE, optional
            DESCRIPTION. The default is True.
        protonate : TYPE, optional
            DESCRIPTION. The default is True.
        fix_pdb : TYPE, optional
            DESCRIPTION. The default is True.
        extra_input_pdb : TYPE, optional
            DESCRIPTION. The default is [].
        ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_names : TYPE, optional
            DESCRIPTION. The default is [].
        other_ff_instance : TYPE, optional
            DESCRIPTION. The default is False.
        pH : TYPE, optional
            DESCRIPTION. The default is 7.0.

        Returns
        -------
        None.

        """

# =============================================================================
#         
#                       extra_input_pdb=[], #['SAM_H3K36.pdb', 'ZNB_H3K36.pdb']
#               ff_files=[], #['amber14-all.xml', 'amber14/tip4pew.xml', 'gaff.xml'],
#               extra_ff_files=[], #['SAM.xml', 'ZNB.xml']
#               extra_names=[], #['SAM', 'ZNB'],
#         
# =============================================================================
        tools.Functions.fileHandler(self.workdir)
       
        input_pdb=f'{self.workdir}/{input_pdbs}'
        
        if fix_pdb:
            
            pdb=PDBFixer(input_pdb)
            
            pdb.findMissingResidues()
            pdb.findMissingAtoms()
            pdb.addMissingAtoms()
        
        else:
            pdb = app.PDBFile(input_pdb)
        
        pre_system = app.Modeller(pdb.topology, pdb.positions)
    
        forcefield=self.setForceFields(ff_files=ff_files, 
                                         extra_ff_files=extra_ff_files,
                                         omm_ff=False)
    
        if protonate:
            pre_system.addHydrogens(forcefield, 
                             pH = pH, 
                             variants = self.setProtonationState(pre_system.topology.chains(), 
                                                                 protonation_dict={('A',187): 'ASP', ('A',224): 'HID'}) )

        
        # add ligand structures to the model
        for extra_pdb_file in extra_input_pdb:
            extra_pdb = app.PDBFile(extra_pdb_file)
            pre_system.add(extra_pdb.topology, extra_pdb.positions)


        #Call to static solvate
        if solvate:
            pre_system=self.solvate(pre_system, forcefield)
    
        #Create a openMM topology instance
        system = forcefield.createSystem(pre_system.topology, 
                                         nonbondedMethod=app.PME, 
                                         nonbondedCutoff=1.0*nanometers,
                                         ewaldErrorTolerance=0.0005, 
                                         constraints='HBonds', 
                                         rigidWater=True)
        
        #Update attributes
        self.input_pdb=input_pdb
        self.system=system
        self.topology=pre_system.topology
        self.positions=pre_system.positions
        
        
            
        #TODO: A lot. Link to Visualization
        self.system_pdb=self.writePDB(pre_system.topology, pre_system.positions, name='system')
        
        return self