예제 #1
1
def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written
        ph: the pH at which hydrogens will be determined and added
        chains_to_remove: dictionary containing pdbs with chains to remove
    Returns: nothing, but it does right PDB files

    """
    print(pdbid)

    # Download the topology from rcsb based on pdbod
    fixer = PDBFixer(pdbid=pdbid)

    # Remove chains based on hand curated .csv file
    if pdbid in chains_to_remove['pdbid']:
        chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)]
        chains_list = chains.split()
        fixer.removeChains(chainIds=chains_list)

    # Determine the first and last residue resolved in chain 0
    chains = [chain for chain in fixer.topology.chains()]
    resindices = [residue.index for residue in chains[0].residues()]
    resindices = natsorted(resindices)
    first_resindex = resindices[0]
    last_resindex = resindices[-1]

    # Find Missing residues and determine if they are C or N terminal fragments (which will be removed)

    fixer.findMissingResidues()
    if len(fixer.missingResidues) > 0:
        if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0]))

        if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1]))

    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(ph)
    # Write fixed PDB file, with all of the waters and ligands
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)

    # Remove the ligand and write a pdb file
    fixer.removeHeterogens(True)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)
    # Remove the waters and write a pdb file
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)),
                                                            'w'), keepIds=keepNumbers)
예제 #2
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(pdb_file, 'w'),
                      keepIds=True)

    return pdb_file
예제 #3
0
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb + ".pdb"
        if fromFolder is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif fromFolder[:4] == ".pdb":
            fromFile = fromFolder
        else:
            fromFile = os.path.join(fromFolder, pdbFile)
        if chain is None:  # None mean deafult is chain A unless specified.
            if len(pdb_id) == 5:
                Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile)
        else:
            Chosen_chain = chain
        # clean pdb
        fixer = PDBFixer(filename=fromFile)
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        # print(keys)
        for key in list(keys):
            chain_tmp = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain_tmp.residues())):
                del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'))
예제 #4
0
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True):
    global ONE_THREE_CODE
    fixer = PDBFixer(filename=pdb_file)
    fixer.sequences.clear()
    for chain in fixer.topology.chains():
        seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())])
        fixer.sequences.append(seq)
    if remove_small_mols:
        fixer.removeHeterogens(not remove_water)
    delete_chains = []
    # Convert single AA codes to three letter code
    for key, value in seq_dict.items():
        if not value or value is None:
            delete_chains.append(key)
        else:
            three_letter = []
            for item in value:
                three_letter.append(ONE_THREE_CODE[item])
            seq_dict[key] = three_letter

    for chain in fixer.topology.chains():
        if chain.index in seq_dict:
            if seq_dict[chain.index] is not None:
                fixer.sequences[chain.index].residues = seq_dict[chain.index]
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)
    fixer.removeChains(delete_chains)
    dummy = tempfile.NamedTemporaryFile(suffix=".pdb")
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w'))
    product = mdtraj.load(dummy.name)
    problem_cis = ChiralityCheck.check_cispeptide_bond(product)
    problem_chiral = ChiralityCheck.check_chirality(product)
    print("The following problems have been detected:")
    print(problem_cis)
    print(problem_chiral)
    print("Either rerun or find a tool to solve. Perhaps VMD?")
    return product
예제 #5
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True)

    return pdb_file
예제 #6
0
            c.id = "Z"
            chains_to_remove.append(i)
    elif num_residues <= 15:
        if not found_C:
            found_C = True
            c.id = "C"
        else:
            c.id = "Z"
            chains_to_remove.append(i)
    else:
        c.id = "Z"
        chains_to_remove.append(i)
        #print "ERROR: Found chains with weird number of residues:", num_residues
        #sys.exit(0)

fixer.removeChains(chains_to_remove)

chains = fixer.topology.chains()
print chains
chain_lengths = []
for c in chains:
    num_residues = len(list(c.residues()))
    chain_lengths.append(num_residues)

PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w'))
call(["rm temp.pdb"], shell=True)

if chain_lengths[1] < chain_lengths[2]:
    call(["grep \"[A-Z] B  \" " + pdbcode + ".pdb > temp.pdb"], shell=True)
    call(["sed -i \"/[A-Z] B  /d\" " + pdbcode + ".pdb"], shell=True)
    call(["sed -i \"/END/d\" " + pdbcode + ".pdb"], shell=True)
예제 #7
0
# SET UP SYSTEM
################################################################################

# Load forcefield.
forcefield = app.ForceField(*forcefields_to_use)

# Retrieve structure from PDB.
print('Retrieving %s from PDB...' % pdbid)
fixer = PDBFixer(pdbid=pdbid)

# Build a list of chains to remove.
print('Removing all chains but %s' % chain_ids_to_keep)
all_chains = list(fixer.topology.chains())
chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]
chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep)
fixer.removeChains(chainIds=chain_ids_to_remove)

# Find missing residues.
print('Finding missing residues...')
fixer.findMissingResidues()

# Replace nonstandard residues.
print('Replacing nonstandard residues...')
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()

# Add missing atoms.
print('Adding missing atoms...')
fixer.findMissingAtoms()
fixer.addMissingAtoms()
예제 #8
0
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile)
outfile.close()

if chain_ids_to_keep is not None:
    # Hack to get chain id to chain number mapping.
    chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]

    # Build list of chains to remove
    chain_numbers_to_remove = list()
    for (chain_number, chain_id) in enumerate(chain_id_list):
        if chain_id not in chain_ids_to_keep:
            chain_numbers_to_remove.append(chain_number)

    # Remove all but desired chains.
    logger.info("Removing chains...")
    fixer.removeChains(chain_numbers_to_remove)

# DEBUG
print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ])
    
# Add missing atoms and residues.
logger.info("Adding missing atoms and residues...")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
#fixer.addMissingHydrogens(pH) # DEBUG
fixer.removeHeterogens(keepWater=keep_crystallographic_water)

# Write PDB file for completed output.
logger.info("Writing pdbfixer output...")
pdb_filename = os.path.join(workdir, 'pdbfixer.pdb')
예제 #9
0
def cleanPdb(pdb_list,
             chain=None,
             source=None,
             toFolder="cleaned_pdbs",
             formatName=False,
             removeDNAchains=True,
             verbose=False,
             removeTwoEndsMissingResidues=True,
             addMissingResidues=True,
             removeHeterogens=True,
             keepIds=False):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        print(pdb_id)
        # pdb = f"{pdb_id.lower()[:4]}"
        # pdbFile = pdb+".pdb"
        if formatName:
            pdb = f"{pdb_id.lower()[:4]}"
        else:
            pdb = pdb_id
        pdbFile = pdb + ".pdb"
        if source is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif source[-4:] == ".pdb":
            fromFile = source
        else:
            fromFile = os.path.join(source, pdbFile)

        # clean pdb
        try:
            fixer = PDBFixer(filename=fromFile)
        except Exception as inst:
            print(inst)
            print(f"{fromFile} not found. skipped")
            continue
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        print(chains)
        if chain is None:  # 'None' means deafult is chain A unless specified.
            if len(pdb_id) >= 5:
                Chosen_chain = pdb_id[4]
                # Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile,
                                        removeDNAchains=removeDNAchains)
            print(f"Chains: {Chosen_chain}")
        elif chain == "first":
            Chosen_chain = chains[0].id
        else:
            Chosen_chain = chain

        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        if verbose:
            print("chains to remove", chains_to_remove)
            print("missing residues: ", keys)
        if not addMissingResidues:
            for key in list(keys):
                del fixer.missingResidues[key]
        else:
            if removeTwoEndsMissingResidues:
                for key in list(keys):
                    chain_tmp = chains[key[0]]
                    if key[1] == 0 or key[1] == len(list(
                            chain_tmp.residues())):
                        del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        if removeHeterogens:
            fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        try:
            fixer.addMissingAtoms()
        except:
            print("Unable to add missing atoms")
            continue
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology,
                          fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'),
                          keepIds=keepIds)
예제 #10
0
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

fixer = PDBFixer(filename='3UE4.pdb')

fixer.removeChains(chainIds=['B'])

# Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception
# and if I call fixer.findMissingResidues() several terminal residues are added
fixer.missingResidues = {}
fixer.findMissingAtoms()
fixer.addMissingAtoms()

fixer.removeHeterogens(keepWater=False)

fixer.addMissingHydrogens(7.0)

PDBFile.writeFile(fixer.topology, fixer.positions, open('3UE4-pdbfixer.pdb', 'w'))
예제 #11
0
from simtk.openmm import app
from pdbfixer import PDBFixer

fixer = PDBFixer(pdbid="2z9j")

fixer.removeChains(chainIds=['B'])

fixer.removeHeterogens(keepWater=False)

app.PDBFile.writeFile(fixer.topology, fixer.positions,
                      open("input/2z9j_clean.pdb", "w"))
예제 #12
0
# SET UP SYSTEM
################################################################################

# Load forcefield.
forcefield = app.ForceField(*forcefields_to_use)

# Retrieve structure from PDB.
print('Retrieving %s from PDB...' % pdbid)
fixer = PDBFixer(pdbid=pdbid)

# Build a list of chains to remove.
print('Removing all chains but %s' % chain_ids_to_keep)
all_chains = list(fixer.topology.chains())
chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]
chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep)
fixer.removeChains(chainIds=chain_ids_to_remove)

# Find missing residues.
print('Finding missing residues...')
fixer.findMissingResidues()

# Replace nonstandard residues.
print('Replacing nonstandard residues...')
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()

# Add missing atoms.
print('Adding missing atoms...')
fixer.findMissingAtoms()
fixer.addMissingAtoms()
예제 #13
0
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile)
outfile.close()

if chain_ids_to_keep is not None:
    # Hack to get chain id to chain number mapping.
    chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]

    # Build list of chains to remove
    chain_numbers_to_remove = list()
    for (chain_number, chain_id) in enumerate(chain_id_list):
        if chain_id not in chain_ids_to_keep:
            chain_numbers_to_remove.append(chain_number)

    # Remove all but desired chains.
    logger.info("Removing chains...")
    fixer.removeChains(chain_numbers_to_remove)

# DEBUG
print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ])

# Add missing atoms and residues.
logger.info("Adding missing atoms and residues...")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
#fixer.addMissingHydrogens(pH) # DEBUG
fixer.removeHeterogens(keepWater=keep_crystallographic_water)

# Write PDB file for completed output.
logger.info("Writing pdbfixer output...")
pdb_filename = os.path.join(workdir, 'pdbfixer.pdb')
예제 #14
0
파일: utils.py 프로젝트: simomarsili/mmlite
def prepare_pdb(pdb,
                chains='A',
                ff=('amber99sbildn.xml', 'tip3p.xml'),
                ph=7,
                pad=10 * unit.angstroms,
                nbonded=app.PME,
                constraints=app.HBonds,
                crystal_water=True):
    """
    Fetch, solvate and minimize a protein PDB structure.

    Parameters
    ----------
    pdb : str
        PDB Id.
    chains : str or list
        Chain(s) to keep in the system.
    ff : tuple of xml ff files.
        Forcefields for parametrization.
    ph : float
        pH value for adding missing hydrogens.
    pad: Quantity object
        Padding around macromolecule for filling box with water.
    nbonded : object
        The method to use for nonbonded interactions.  Allowed values are
        NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME.
    constraints : object
        Specifies which bonds and angles should be implemented with
        constraints. Allowed values are None, HBonds, AllBonds, or HAngles.
    crystal_water : bool
        Keep crystal water.

    """

    # Load forcefield.
    logger.info('Retrieving %s from PDB...', pdb)
    ff = app.ForceField(*ff)

    # Retrieve structure from PDB.
    fixer = PDBFixer(pdbid=pdb)

    # Remove unselected chains.
    logger.info('Removing all chains but %s', chains)
    all_chains = [c.id for c in fixer.topology.chains()]
    fixer.removeChains(chainIds=set(all_chains) - set(chains))

    # Find missing residues.
    logger.info('Finding missing residues...')
    fixer.findMissingResidues()

    # Replace nonstandard residues.
    logger.info('Replacing nonstandard residues...')
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()

    # Add missing atoms.
    logger.info('Adding missing atoms...')
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Remove heterogens.
    logger.info('Removing heterogens...')
    fixer.removeHeterogens(keepWater=crystal_water)

    # Add missing hydrogens.
    logger.info('Adding missing hydrogens appropriate for pH %s', ph)
    fixer.addMissingHydrogens(ph)

    if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]:
        # Add solvent.
        logger.info('Adding solvent...')
        fixer.addSolvent(padding=pad)

    # Write PDB file.
    logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb)
    app.PDBFile.writeFile(fixer.topology, fixer.positions,
                          open('%s-pdbfixer.pdb' % pdb, 'w'))

    # Create OpenMM System.
    logger.info('Creating OpenMM system...')
    system = ff.createSystem(fixer.topology,
                             nonbondedMethod=nbonded,
                             constraints=constraints,
                             rigidWater=True,
                             removeCMMotion=False)

    # Minimimze to update positions.
    logger.info('Minimizing...')
    integrator = mm.VerletIntegrator(1.0 * unit.femtosecond)
    context = mm.Context(system, integrator)
    context.setPositions(fixer.positions)
    mm.LocalEnergyMinimizer.minimize(context)
    # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
    state = context.getState(getPositions=True)
    fixer.positions = state.getPositions()

    # Write final coordinates.
    logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb)
    with open('%s-minimized.pdb' % pdb, 'w') as fp:
        app.PDBFile.writeFile(fixer.topology, fixer.positions, fp)

    # Serialize final coordinates.
    logger.info('Serializing to XML...')
    serialize_system(context, system, integrator)
def main(args):
    
    pdbcode = args[0]

    if len(pdbcode) != 4:
        print("Please enter a correct 4 letter pdbid")
        sys.exit(0)

    call("pdbfixer --pdbid " + pdbcode + " --output=temp.pdb --add-atoms=heavy --keep-heterogens=none", shell=True)
    fixer = PDBFixer(filename="temp.pdb")
    found_A = False
    found_B = False
    found_C = False

    num_chains = len(list(fixer.topology.chains()))
    chains = fixer.topology.chains()
    chains_to_remove = []
    for i, c in enumerate(chains):
        num_residues = len(list(c.residues()))
        if num_residues > 250:
            if not found_A:
                found_A = True
                c.id = "A"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        elif num_residues > 50 and num_residues < 150:
            if not found_B:
                found_B = True
                c.id = "B"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        elif num_residues <= 15:
            if not found_C:
                found_C = True
                c.id = "C"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        else:
            c.id = "Z"
            chains_to_remove.append(i)
            #print "ERROR: Found chains with weird number of residues:", num_residues
            #sys.exit(0)

    fixer.removeChains(chains_to_remove)

    chains = fixer.topology.chains()
    chain_lengths = []
    for c in chains:
        num_residues = len(list(c.residues()))
        chain_lengths.append(num_residues)

    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w'))
    call(["rm temp.pdb"], shell=True)

    if chain_lengths[1] < chain_lengths[2]:
        call(["grep \"[A-Z] B  \" " + pdbcode + ".pdb > temp.pdb"], shell=True)
        call(["sed -i \"/[A-Z] B  /d\" " + pdbcode + ".pdb"], shell=True)
        call(["sed -i \"/END/d\" " + pdbcode + ".pdb"], shell=True)
        call(["sed -i \"/CONECT/d\" " + pdbcode + ".pdb"], shell=True)
        call(["less temp.pdb >> " + pdbcode + ".pdb"], shell=True)
        fixer = PDBFixer(filename=pdbcode + ".pdb") 
        PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w'))
        call(["rm temp.pdb"], shell=True)