예제 #1
1
def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written
        ph: the pH at which hydrogens will be determined and added
        chains_to_remove: dictionary containing pdbs with chains to remove
    Returns: nothing, but it does right PDB files

    """
    print(pdbid)

    # Download the topology from rcsb based on pdbod
    fixer = PDBFixer(pdbid=pdbid)

    # Remove chains based on hand curated .csv file
    if pdbid in chains_to_remove['pdbid']:
        chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)]
        chains_list = chains.split()
        fixer.removeChains(chainIds=chains_list)

    # Determine the first and last residue resolved in chain 0
    chains = [chain for chain in fixer.topology.chains()]
    resindices = [residue.index for residue in chains[0].residues()]
    resindices = natsorted(resindices)
    first_resindex = resindices[0]
    last_resindex = resindices[-1]

    # Find Missing residues and determine if they are C or N terminal fragments (which will be removed)

    fixer.findMissingResidues()
    if len(fixer.missingResidues) > 0:
        if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0]))

        if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex:
            fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1]))

    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(ph)
    # Write fixed PDB file, with all of the waters and ligands
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)

    # Remove the ligand and write a pdb file
    fixer.removeHeterogens(True)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'),
                      keepIds=keepNumbers)
    # Remove the waters and write a pdb file
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway,
                                                                         '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)),
                                                            'w'), keepIds=keepNumbers)
예제 #2
1
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile
import mdtraj as md
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from __future__ import print_function
from simtk.openmm import app
import simtk.openmm as mm
from simtk import unit
from sys import stdout

# clean up the original PDB file and add missing residues and heavy atoms
fixer = PDBFixer('pdb4h12.ent')

fixer.findMissingResidues()
# only add missing residues in the middle of the chain, do not add terminal ones
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
missingResidues = dict()
for key in keys:
    chain = chains[key[0]]
    if not (key[1] == 0 or key[1] == len(list(chain.residues()))):
        missingResidues[key] = fixer.missingResidues[key]
fixer.missingResidues = missingResidues

fixer.findMissingAtoms()
fixer.addMissingAtoms()
예제 #3
0
def add_hydrogens_to_mol(mol):
  """
  Add hydrogens to a molecule object
  TODO (LESWING) see if there are more flags to add here for default
  :param mol: Rdkit Mol
  :return: Rdkit Mol
  """
  molecule_file = None
  try:
    pdbblock = Chem.MolToPDBBlock(mol)
    pdb_stringio = StringIO()
    pdb_stringio.write(pdbblock)
    pdb_stringio.seek(0)
    fixer = PDBFixer(pdbfile=pdb_stringio)
    fixer.addMissingHydrogens(7.4)

    hydrogenated_io = StringIO()
    PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io)
    hydrogenated_io.seek(0)
    return Chem.MolFromPDBBlock(
        hydrogenated_io.read(), sanitize=False, removeHs=False)
  except ValueError as e:
    logging.warning("Unable to add hydrogens", e)
    raise MoleculeLoadException(e)
  finally:
    try:
      os.remove(molecule_file)
    except (OSError, TypeError):
      pass
예제 #4
0
def write_file(filename, contents):
    outfile = open(filename, 'w')
    outfile.write(contents)
    outfile.close()

################################################################################
# SET UP SYSTEM
################################################################################

# Load forcefield.
forcefield = app.ForceField(*forcefields_to_use)

# Retrieve structure from PDB.
print('Retrieving %s from PDB...' % pdbid)
fixer = PDBFixer(pdbid=pdbid)

# Build a list of chains to remove.
print('Removing all chains but %s' % chain_ids_to_keep)
all_chains = list(fixer.topology.chains())
chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]
chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep)
fixer.removeChains(chainIds=chain_ids_to_remove)

# Find missing residues.
print('Finding missing residues...')
fixer.findMissingResidues()

# Replace nonstandard residues.
print('Replacing nonstandard residues...')
fixer.findNonstandardResidues()
예제 #5
0
def pdbfix_protein(input_pdb_path,
                   output_pdb_path,
                   find_missing_residues=True,
                   keep_water=False,
                   ph=None):
    """Run PDBFixer on the input PDB file.

    Heterogen atoms are always removed.

    Parameters
    ----------
    input_pdb_path : str
        The PDB to fix.
    output_pdb_path : str
        The path to the output PDB file.
    find_missing_residues : bool, optional
        If True, PDBFixer will try to model the unresolved residues
        that appear in the amino acid sequence (default is True).
    keep_water : bool, optional
        If True, water molecules are not stripped (default is False).
    ph : float or None, optional
        If not None, hydrogen atoms will be added at this pH.

    """
    fixer = PDBFixer(filename=input_pdb_path)
    if find_missing_residues:
        fixer.findMissingResidues()
    else:
        fixer.missingResidues = {}
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(keep_water)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    if ph is not None:
        fixer.addMissingHydrogens(ph)

    # print(fixer.nonstandardResidues)
    # print(fixer.missingAtoms)
    # print(fixer.missingTerminals)

    with open(output_pdb_path, 'w') as f:
        PDBFile.writeFile(fixer.topology, fixer.positions, f)
def main(args):
    
    pdbcode = args[0]

    if len(pdbcode) != 4:
        print("Please enter a correct 4 letter pdbid")
        sys.exit(0)

    call("pdbfixer --pdbid " + pdbcode + " --output=temp.pdb --add-atoms=heavy --keep-heterogens=none", shell=True)
    fixer = PDBFixer(filename="temp.pdb")
    found_A = False
    found_B = False
    found_C = False

    num_chains = len(list(fixer.topology.chains()))
    chains = fixer.topology.chains()
    chains_to_remove = []
    for i, c in enumerate(chains):
        num_residues = len(list(c.residues()))
        if num_residues > 250:
            if not found_A:
                found_A = True
                c.id = "A"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        elif num_residues > 50 and num_residues < 150:
            if not found_B:
                found_B = True
                c.id = "B"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        elif num_residues <= 15:
            if not found_C:
                found_C = True
                c.id = "C"
            else:
                c.id = "Z"
                chains_to_remove.append(i)
        else:
            c.id = "Z"
            chains_to_remove.append(i)
            #print "ERROR: Found chains with weird number of residues:", num_residues
            #sys.exit(0)

    fixer.removeChains(chains_to_remove)

    chains = fixer.topology.chains()
    chain_lengths = []
    for c in chains:
        num_residues = len(list(c.residues()))
        chain_lengths.append(num_residues)

    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w'))
    call(["rm temp.pdb"], shell=True)

    if chain_lengths[1] < chain_lengths[2]:
        call(["grep \"[A-Z] B  \" " + pdbcode + ".pdb > temp.pdb"], shell=True)
        call(["sed -i \"/[A-Z] B  /d\" " + pdbcode + ".pdb"], shell=True)
        call(["sed -i \"/END/d\" " + pdbcode + ".pdb"], shell=True)
        call(["sed -i \"/CONECT/d\" " + pdbcode + ".pdb"], shell=True)
        call(["less temp.pdb >> " + pdbcode + ".pdb"], shell=True)
        fixer = PDBFixer(filename=pdbcode + ".pdb") 
        PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w'))
        call(["rm temp.pdb"], shell=True)
예제 #7
0
def fix_pdb(pdb_id, pdb_file, pdb_group):
    chains_to_retain = get_required_chains(pdb_group)
    chains_to_remove = []

    for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]:
        if chain.get_id() not in chains_to_retain:
            chains_to_remove.append(chain.get_id())

    fixer = PDBFixer(filename=pdb_file)

    fixer.removeChains(chainIds=chains_to_remove)

    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.removeHeterogens(True)

    # KeepIds flag is critical here, otherwise we loose all information binding
    pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb'
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True)

    return pdb_file
예제 #8
0
def add_membrane(pdb_path, membrane_lipid_type='POPC', out_as=None):
    """
        Make a lipid bilayer for your protein easy.

            Parameters
            ----------

            pdb_path: Give your pdb whole path to this parameter

            membrane_lipid_type : Add POPC or POPE lipid membranes to your system.

            out_as: Give and extension list like ['psf', 'crd', 'gro']

            Example
            ----------

            add_membrane('protein.pdb', 'POPC', ['crd', 'gro'])

        """
    fixer = PDBFixer(filename=pdb_path)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)

    print('\nAdding membrane:', membrane_lipid_type)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed.pdb", 'w'))
    fixer.addMembrane(lipidType=membrane_lipid_type,
                      membraneCenterZ=0 * unit.nanometer,
                      minimumPadding=1 * unit.nanometer,
                      positiveIon="Na+",
                      negativeIon="Cl-",
                      ionicStrength=0.0 * unit.molar)
    app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed_membrane.pdb", 'w'), keepIds=True)

    if out_as is not None:
        struct = pmd.load_file('fixed_membrane.pdb')
        for i in out_as:
            try:
                print("Savind *.%s extension File" % i)
                struct.save('fixed_membrane.%s' % i)
            except:
                pass
예제 #9
0
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb + ".pdb"
        if fromFolder is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif fromFolder[:4] == ".pdb":
            fromFile = fromFolder
        else:
            fromFile = os.path.join(fromFolder, pdbFile)
        if chain is None:  # None mean deafult is chain A unless specified.
            if len(pdb_id) == 5:
                Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile)
        else:
            Chosen_chain = chain
        # clean pdb
        fixer = PDBFixer(filename=fromFile)
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        # print(keys)
        for key in list(keys):
            chain_tmp = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain_tmp.residues())):
                del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'))
예제 #10
0
    def pdb2omm(self, 
              input_pdbs=None,
              solvate=True,
              protonate=True,
              fix_pdb=True,
              inspect=False,
              extra_input_pdb=[],
              ff_files=[],
              extra_ff_files=[],
              extra_names=[],
              other_ff_instance=False,
              pH = 7.0):
        """
        

        Parameters
        ----------
        input_pdb : TYPE, optional
            DESCRIPTION. The default is None.
        solvate : TYPE, optional
            DESCRIPTION. The default is True.
        protonate : TYPE, optional
            DESCRIPTION. The default is True.
        fix_pdb : TYPE, optional
            DESCRIPTION. The default is True.
        extra_input_pdb : TYPE, optional
            DESCRIPTION. The default is [].
        ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_names : TYPE, optional
            DESCRIPTION. The default is [].
        other_ff_instance : TYPE, optional
            DESCRIPTION. The default is False.
        pH : TYPE, optional
            DESCRIPTION. The default is 7.0.

        Returns
        -------
        None.

        """

# =============================================================================
#         
#                       extra_input_pdb=[], #['SAM_H3K36.pdb', 'ZNB_H3K36.pdb']
#               ff_files=[], #['amber14-all.xml', 'amber14/tip4pew.xml', 'gaff.xml'],
#               extra_ff_files=[], #['SAM.xml', 'ZNB.xml']
#               extra_names=[], #['SAM', 'ZNB'],
#         
# =============================================================================
        tools.Functions.fileHandler(self.workdir)
       
        input_pdb=f'{self.workdir}/{input_pdbs}'
        
        if fix_pdb:
            
            pdb=PDBFixer(input_pdb)
            
            pdb.findMissingResidues()
            pdb.findMissingAtoms()
            pdb.addMissingAtoms()
        
        else:
            pdb = app.PDBFile(input_pdb)
        
        pre_system = app.Modeller(pdb.topology, pdb.positions)
    
        forcefield=self.setForceFields(ff_files=ff_files, 
                                         extra_ff_files=extra_ff_files,
                                         omm_ff=False)
    
        if protonate:
            pre_system.addHydrogens(forcefield, 
                             pH = pH, 
                             variants = self.setProtonationState(pre_system.topology.chains(), 
                                                                 protonation_dict={('A',187): 'ASP', ('A',224): 'HID'}) )

        
        # add ligand structures to the model
        for extra_pdb_file in extra_input_pdb:
            extra_pdb = app.PDBFile(extra_pdb_file)
            pre_system.add(extra_pdb.topology, extra_pdb.positions)


        #Call to static solvate
        if solvate:
            pre_system=self.solvate(pre_system, forcefield)
    
        #Create a openMM topology instance
        system = forcefield.createSystem(pre_system.topology, 
                                         nonbondedMethod=app.PME, 
                                         nonbondedCutoff=1.0*nanometers,
                                         ewaldErrorTolerance=0.0005, 
                                         constraints='HBonds', 
                                         rigidWater=True)
        
        #Update attributes
        self.input_pdb=input_pdb
        self.system=system
        self.topology=pre_system.topology
        self.positions=pre_system.positions
        
        
            
        #TODO: A lot. Link to Visualization
        self.system_pdb=self.writePDB(pre_system.topology, pre_system.positions, name='system')
        
        return self
예제 #11
0
# ==============================================================================
# PREPARE STRUCTURE
# ==============================================================================

from pdbfixer import PDBFixer

is_periodic = (nonbonded_method not in [app.NoCutoff, app.CutoffNonPeriodic])

# ==============================================================================
# Retrieve the PDB file
# ==============================================================================

if pdb_filename:
    logger.info("Retrieving PDB '%s'..." % pdb_filename)
    fixer = PDBFixer(filename=pdb_filename)
else:
    logger.info("Retrieving PDB '%s'..." % pdbid)
    fixer = PDBFixer(pdbid=pdbid)

# ==============================================================================
# Prepare the structure
# ==============================================================================

# DEBUG
print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ])

# Write PDB file for solute only.
logger.info("Writing source PDB...")
pdb_filename = os.path.join(workdir, pdbid + '.pdb')
outfile = open(pdb_filename, 'w')
예제 #12
0
파일: dafix.py 프로젝트: sgill2/LabStuff
import pdbfixer
from pdbfixer import PDBFixer
from simtk import unit
from simtk.openmm.app import PDBFile
output_file = 't_h.pdb'
fixer = PDBFixer(filename='VER_apo.pdb')
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(pH=7.5)
fixer.addSolvent(padding=11*unit.angstrom, ionicStrength=0.050*unit.molar)
#PDBFile.writeHeader(fixer.topology, open(output_file, 'w'))                     
PDBFile.writeFile(fixer.topology, fixer.positions, open(output_file, 'w'))
#PDBFile.writeFooter(fixer.topology, open(output_file, 'a'))      



예제 #13
0
    def pdb2omm(self, 
              input_pdb=None,
              solvate=True,
              protonate=True,
              fix_pdb=True,
              inspect=False,
              extra_input_pdb=[],
              ff_files=[],
              extra_ff_files=[],
              extra_names=[],
              other_ff_instance=False,
              pH_protein = 7.0,
              residue_variants={},
              other_omm=False,
              input_sdf_file=None,
              box_size=9.0,
              name='NoName'):
        """
        Method to prepare an openMM system from PDB and XML/other force field definitions.
        Returns self, so that other methods can act on it.
        Requires input PDB file(s) handled by "input_pdbs". 
        Uses default AMBER force fields if none are provide by "ff_files".
        Includes to provided force fields (or defaults) additional XML/other definitions with "extra_ff_files".
        TODO: include "extra_input_pdb" methods to build boxes on the fly.
        

        Parameters
        ----------
        input_pdb : TYPE, optional
            DESCRIPTION. The default is None.
        solvate : TYPE, optional
            DESCRIPTION. The default is True.
        protonate : TYPE, optional
            DESCRIPTION. The default is True.
        fix_pdb : TYPE, optional
            DESCRIPTION. The default is True.
        extra_input_pdb : TYPE, optional
            DESCRIPTION. The default is [].
        ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_ff_files : TYPE, optional
            DESCRIPTION. The default is [].
        extra_names : TYPE, optional
            DESCRIPTION. The default is [].
        other_ff_instance : TYPE, optional
            DESCRIPTION. The default is False.
        pH_protein : TYPE, optional
            DESCRIPTION. The default is 7.0.

        Returns
        -------
        None.

        """
        
        self.structures={}
        self.input_pdb=input_pdb
        
        self.structures['input_pdb']=input_pdb
        
        #Fix the input_pdb with PDBFixer
        if fix_pdb:
            
            pdb=PDBFixer(self.input_pdb)
            
            pdb.findMissingResidues()
            pdb.findMissingAtoms()
            pdb.addMissingAtoms()
        
        else:
            pdb = app.PDBFile(self.input_pdb)
        
        
        #Generate a Modeller instance of the fixed pdb
        #It will be used to populate system
        pre_system = app.Modeller(pdb.topology, pdb.positions)
    
    
        #Add ligand structures to the model with addExtraMolecules_PDB
        if len(extra_input_pdb) > 0:

            pre_system, self.extra_molecules=self.addExtraMolecules_PDB(pre_system, extra_input_pdb)


    
        #Create a ForceField instance with provided XMLs with setForceFields()
        forcefield, ff_paths=self.setForceFields(ff_files=ff_files)
    
        #Call to setProtonationState()
        if protonate:
            
            if residue_variants:
            
                pre_system.addHydrogens(forcefield, pH = pH_protein, 
                                        variants = self.setProtonationState(pre_system.topology.chains(), 
                                                                 protonation_dict=residue_variants))

            else:
                pre_system.addHydrogens(forcefield, pH = pH_protein)
        

        #Call to solvate()
        #TODO: For empty box, add waters, remove then
        if solvate:
            pre_system=self.solvate(pre_system, forcefield, box_size=box_size)
    
        self.topology=pre_system.topology
        self.positions=pre_system.positions
    
        #Define system. Either by provided pre_system, or other_omm system instance.
        
        if other_omm:
            
            system, forcefield_other=self.omm_system(input_sdf_file, 
                                                     pre_system,
                                                     forcefield,
                                                     self.def_input_struct,
                                                     ff_files=ff_paths, 
                                                     template_ff='gaff-2.11')
                        
            #forcefield not needed?? 
    
        else:
            
            #Create a openMM topology instance
            system = forcefield.createSystem(pre_system.topology, 
                                         nonbondedMethod=app.PME, 
                                         nonbondedCutoff=1.0*nanometers,
                                         ewaldErrorTolerance=0.0005, 
                                         constraints='HBonds', 
                                         rigidWater=True)
            
        #Update attributes
        self.system=system

        #TODO: A lot. Link to Visualization
        self.structures['system']=self.writePDB(pre_system.topology, pre_system.positions, name='system')
        
        
        print(f"System is now converted to openMM type: \n\tFile: {self.structures['system']}, \n\tTopology: {self.topology}")
        
        return self
예제 #14
0
def prepare_inputs(
        protein: str,
        ligand: str,
        replace_nonstandard_residues: bool = True,
        remove_heterogens: bool = True,
        remove_water: bool = True,
        add_hydrogens: bool = True,
        pH: float = 7.0,
        optimize_ligand: bool = True,
        pdb_name: Optional[str] = None) -> Tuple[RDKitMol, RDKitMol]:
    """This prepares protein-ligand complexes for docking.

  Autodock Vina requires PDB files for proteins and ligands with
  sensible inputs. This function uses PDBFixer and RDKit to ensure
  that inputs are reasonable and ready for docking. Default values
  are given for convenience, but fixing PDB files is complicated and
  human judgement is required to produce protein structures suitable
  for docking. Always inspect the results carefully before trying to
  perform docking.

  Parameters
  ----------
  protein: str
    Filename for protein PDB file or a PDBID.
  ligand: str
    Either a filename for a ligand PDB file or a SMILES string.
  replace_nonstandard_residues: bool (default True)
    Replace nonstandard residues with standard residues.
  remove_heterogens: bool (default True)
    Removes residues that are not standard amino acids or nucleotides.
  remove_water: bool (default True)
    Remove water molecules.
  add_hydrogens: bool (default True)
    Add missing hydrogens at the protonation state given by `pH`.
  pH: float (default 7.0)
    Most common form of each residue at given `pH` value is used.
  optimize_ligand: bool (default True)
    If True, optimize ligand with RDKit. Required for SMILES inputs.
  pdb_name: Optional[str]
    If given, write sanitized protein and ligand to files called
    "pdb_name.pdb" and "ligand_pdb_name.pdb"

  Returns
  -------
  Tuple[RDKitMol, RDKitMol]
    Tuple of `protein_molecule, ligand_molecule` with 3D information.

  Note
  ----
  This function requires RDKit and OpenMM to be installed.
  Read more about PDBFixer here: https://github.com/openmm/pdbfixer.

  Examples
  --------
  >>> p, m = prepare_inputs('3cyx', 'CCC')
  >>> p.GetNumAtoms()
  1415
  >>> m.GetNumAtoms()
  11

  >>> p, m = prepare_inputs('3cyx', 'CCC', remove_heterogens=False)
  >>> p.GetNumAtoms()
  1720

  """

    try:
        from rdkit import Chem
        from rdkit.Chem import AllChem
        from pdbfixer import PDBFixer
        from simtk.openmm.app import PDBFile
    except ModuleNotFoundError:
        raise ImportError(
            "This function requires RDKit and OpenMM to be installed.")

    if protein.endswith('.pdb'):
        fixer = PDBFixer(protein)
    else:
        fixer = PDBFixer(url='https://files.rcsb.org/download/%s.pdb' %
                         (protein))

    if ligand.endswith('.pdb'):
        m = Chem.MolFromPDBFile(ligand)
    else:
        m = Chem.MolFromSmiles(ligand, sanitize=True)

    # Apply common fixes to PDB files
    if replace_nonstandard_residues:
        fixer.findMissingResidues()
        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
    if remove_heterogens and not remove_water:
        fixer.removeHeterogens(True)
    if remove_heterogens and remove_water:
        fixer.removeHeterogens(False)
    if add_hydrogens:
        fixer.addMissingHydrogens(pH)

    PDBFile.writeFile(fixer.topology, fixer.positions, open('tmp.pdb', 'w'))
    p = Chem.MolFromPDBFile('tmp.pdb', sanitize=True)
    os.remove('tmp.pdb')

    # Optimize ligand
    if optimize_ligand:
        m = Chem.AddHs(m)  # need hydrogens for optimization
        AllChem.EmbedMolecule(m)
        AllChem.MMFFOptimizeMolecule(m)

    if pdb_name:
        Chem.rdmolfiles.MolToPDBFile(p, '%s.pdb' % (pdb_name))
        Chem.rdmolfiles.MolToPDBFile(m, 'ligand_%s.pdb' % (pdb_name))

    return (p, m)
예제 #15
0
if len(sys.argv) != 4:
    print('Usage: python prepareComplex.py input.pdb ligand.mol system')
    exit(1)

pdb_in = sys.argv[1]
ligand_in = sys.argv[2]
outname = sys.argv[3]

# This PDB file contains:
# - the protein (single chain)
# - a ligand
# - 3 DMSO molecules
# - A number of waters
# No hydrogens are present.
# The C-teminal THR residue is missing an oxygen atom.
fixer = PDBFixer(filename=pdb_in)
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.findNonstandardResidues()
print('Residues:', fixer.missingResidues)
print('Atoms:', fixer.missingAtoms)
print('Terminals:', fixer.missingTerminals)
print('Non-standard:', fixer.nonstandardResidues)

fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.4)

# The following removes the DMS components and retains the ligand and waters.
# If instead we want to remove the ligand it will be easier to use:
# fixer.removeHeterogens(True) or fixer.removeHeterogens(False)
# True keeps the waters, False removes them leaving only the protein.
예제 #16
0
파일: utils.py 프로젝트: simomarsili/mmlite
def prepare_pdb(pdb,
                chains='A',
                ff=('amber99sbildn.xml', 'tip3p.xml'),
                ph=7,
                pad=10 * unit.angstroms,
                nbonded=app.PME,
                constraints=app.HBonds,
                crystal_water=True):
    """
    Fetch, solvate and minimize a protein PDB structure.

    Parameters
    ----------
    pdb : str
        PDB Id.
    chains : str or list
        Chain(s) to keep in the system.
    ff : tuple of xml ff files.
        Forcefields for parametrization.
    ph : float
        pH value for adding missing hydrogens.
    pad: Quantity object
        Padding around macromolecule for filling box with water.
    nbonded : object
        The method to use for nonbonded interactions.  Allowed values are
        NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME.
    constraints : object
        Specifies which bonds and angles should be implemented with
        constraints. Allowed values are None, HBonds, AllBonds, or HAngles.
    crystal_water : bool
        Keep crystal water.

    """

    # Load forcefield.
    logger.info('Retrieving %s from PDB...', pdb)
    ff = app.ForceField(*ff)

    # Retrieve structure from PDB.
    fixer = PDBFixer(pdbid=pdb)

    # Remove unselected chains.
    logger.info('Removing all chains but %s', chains)
    all_chains = [c.id for c in fixer.topology.chains()]
    fixer.removeChains(chainIds=set(all_chains) - set(chains))

    # Find missing residues.
    logger.info('Finding missing residues...')
    fixer.findMissingResidues()

    # Replace nonstandard residues.
    logger.info('Replacing nonstandard residues...')
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()

    # Add missing atoms.
    logger.info('Adding missing atoms...')
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Remove heterogens.
    logger.info('Removing heterogens...')
    fixer.removeHeterogens(keepWater=crystal_water)

    # Add missing hydrogens.
    logger.info('Adding missing hydrogens appropriate for pH %s', ph)
    fixer.addMissingHydrogens(ph)

    if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]:
        # Add solvent.
        logger.info('Adding solvent...')
        fixer.addSolvent(padding=pad)

    # Write PDB file.
    logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb)
    app.PDBFile.writeFile(fixer.topology, fixer.positions,
                          open('%s-pdbfixer.pdb' % pdb, 'w'))

    # Create OpenMM System.
    logger.info('Creating OpenMM system...')
    system = ff.createSystem(fixer.topology,
                             nonbondedMethod=nbonded,
                             constraints=constraints,
                             rigidWater=True,
                             removeCMMotion=False)

    # Minimimze to update positions.
    logger.info('Minimizing...')
    integrator = mm.VerletIntegrator(1.0 * unit.femtosecond)
    context = mm.Context(system, integrator)
    context.setPositions(fixer.positions)
    mm.LocalEnergyMinimizer.minimize(context)
    # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
    state = context.getState(getPositions=True)
    fixer.positions = state.getPositions()

    # Write final coordinates.
    logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb)
    with open('%s-minimized.pdb' % pdb, 'w') as fp:
        app.PDBFile.writeFile(fixer.topology, fixer.positions, fp)

    # Serialize final coordinates.
    logger.info('Serializing to XML...')
    serialize_system(context, system, integrator)
예제 #17
0
def cleanPdb(pdb_list,
             chain=None,
             source=None,
             toFolder="cleaned_pdbs",
             formatName=False,
             verbose=False,
             removeTwoEndsMissingResidues=True,
             addMissingResidues=True,
             removeHeterogens=True,
             keepIds=False):
    os.system(f"mkdir -p {toFolder}")
    for pdb_id in pdb_list:
        # print(chain)
        print(pdb_id)
        # pdb = f"{pdb_id.lower()[:4]}"
        # pdbFile = pdb+".pdb"
        if formatName:
            pdb = f"{pdb_id.lower()[:4]}"
        else:
            pdb = pdb_id
        pdbFile = pdb + ".pdb"
        if source is None:
            fromFile = os.path.join("original_pdbs", pdbFile)
        elif source[-4:] == ".pdb":
            fromFile = source
        else:
            fromFile = os.path.join(source, pdbFile)

        # clean pdb
        try:
            fixer = PDBFixer(filename=fromFile)
        except Exception as inst:
            print(inst)
            print(f"{fromFile} not found. skipped")
            continue
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        print(chains)
        if chain is None:  # 'None' means deafult is chain A unless specified.
            if len(pdb_id) >= 5:
                Chosen_chain = pdb_id[4]
                # Chosen_chain = pdb_id[4].upper()
            else:
                assert (len(pdb_id) == 4)
                Chosen_chain = "A"
        elif chain == "-1" or chain == -1:
            Chosen_chain = getAllChains(fromFile)
            print(f"Chains: {Chosen_chain}")
        elif chain == "first":
            Chosen_chain = chains[0].id
        else:
            Chosen_chain = chain

        chains_to_remove = [
            i for i, x in enumerate(chains) if x.id not in Chosen_chain
        ]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        if verbose:
            print("chains to remove", chains_to_remove)
            print("missing residues: ", keys)
        if not addMissingResidues:
            for key in list(keys):
                del fixer.missingResidues[key]
        else:
            if removeTwoEndsMissingResidues:
                for key in list(keys):
                    chain_tmp = chains[key[0]]
                    if key[1] == 0 or key[1] == len(list(
                            chain_tmp.residues())):
                        del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        if removeHeterogens:
            fixer.removeHeterogens(keepWater=False)
        fixer.findMissingAtoms()
        try:
            fixer.addMissingAtoms()
        except:
            print("Unable to add missing atoms")
            continue
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology,
                          fixer.positions,
                          open(os.path.join(toFolder, pdbFile), 'w'),
                          keepIds=keepIds)
예제 #18
0
pdbl.download_pdb_files([SEED_PDB], pdir=PDB_DIR, file_format='pdb')

""" Extract 1MFA components """
uni_1MFA = mda.Universe(f'{PDB_DIR}/pdb{SEED_PDB}.ent')
assert hasattr(uni_1MFA, 'trajectory')

lig_1MFA = uni_1MFA.select_atoms('not protein and not resname HOH')
lig_1MFA.write(f'{PDB_DIR}/{SEED_PDB}.lig.pdb')

fab_1MFA = uni_1MFA.select_atoms('protein')
fab_1MFA.write(f'{PDB_DIR}/{SEED_PDB}.fab.pdb')
# light_1MFA = uni_1MFA.select_atoms('segid L')
# heavy_1MFA = uni_1MFA.select_atoms('segid H')

""" Fix/clean the FAb apo protein and save it """
fixer = PDBFixer(PDB_DIR + '/' + SEED_PDB + '.fab.pdb')
fixer.findMissingResidues()
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()
fixer.removeHeterogens(True)
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)

with open(f'{PDB_DIR}/{SEED_PDB}.fab.fixed.pdb', 'w+') as outfile:
    PDBFile.writeFile(fixer.topology, fixer.positions, outfile)

""" Download/save target ligand (PubChem CID: 2978) """
# cpd_2978 = pcp.Compound.from_cid(TARGET_CID)
pcp.download(
    'SDF', f'{SDF_DIR}/{TARGET_CID}.sdf', TARGET_CID, overwrite=True)
예제 #19
0
     ligand_pdbfile = PDBFile(f'{path}/results{i}.pdb')
     force_field = ForceField(
         'openff_unconstrained-1.1.0.offxml'
     )  #smirnoff99Frosst.offxml') #'openff-1.0.0.offxml')
     ligand_system = force_field.create_openmm_system(
         ligand_off_molecule.to_topology())
     ligand_structure = parmed.openmm.load_topology(
         ligand_pdbfile.topology,
         ligand_system,
         xyz=ligand_pdbfile.positions)
 if 1:
     # DO PROTEIN THINGS
     receptor_file = 'receptor.pdb'
     fixed_receptor_file = f'{path}/fixed_receptor.pdb'
     omm_forcefield = app.ForceField('amber14-all.xml')
     fixer = PDBFixer(receptor_file)  #filename='receptor.pdb')
     missingresidues = fixer.findMissingResidues()
     rezez = fixer.findNonstandardResidues()
     fixer.replaceNonstandardResidues()
     fixer.removeHeterogens(keepWater=False)
     missingatoms = fixer.findMissingAtoms()
     fixer.addMissingAtoms()
     fixer.addMissingHydrogens(7.0)
     PDBFile.writeFile(fixer.topology, fixer.positions,
                       open(fixed_receptor_file, 'w'))
     fixed_receptor = PDBFile(fixed_receptor_file)
     receptor_system = omm_forcefield.createSystem(fixed_receptor.topology)
     receptor_structure = parmed.openmm.load_topology(
         fixed_receptor.topology,
         receptor_system,
         xyz=fixed_receptor.positions)
예제 #20
0
def fix_pdb(pdb_id):
    path = os.getcwd()
    if len(pdb_id) != 4:
        print("Creating PDBFixer...")
        fixer = PDBFixer(pdb_id)
        print("Finding missing residues...")
        fixer.findMissingResidues()

        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        for key in list(keys):
            chain = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain.residues())):
                print("ok")
                del fixer.missingResidues[key]

        print("Finding nonstandard residues...")
        fixer.findNonstandardResidues()
        print("Replacing nonstandard residues...")
        fixer.replaceNonstandardResidues()
        print("Removing heterogens...")
        fixer.removeHeterogens(keepWater=True)

        print("Finding missing atoms...")
        fixer.findMissingAtoms()
        print("Adding missing atoms...")
        fixer.addMissingAtoms()
        print("Adding missing hydrogens...")
        fixer.addMissingHydrogens(7)
        print("Writing PDB file...")

        PDBFile.writeFile(
            fixer.topology,
            fixer.positions,
            open(
                os.path.join(path,
                             "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)),
                "w"),
            keepIds=True)
        return "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)
예제 #21
0
iteration = 250000
work_dir = f'/data/chodera/jiayeguo/projects/cv_selection/sams_simulation/new_trials/{pdbid}_{experiment}_{iteration}'
temperature = 310.15 * unit.kelvin
pressure = 1.0 * unit.atmospheres
ndihedrals = 7  # number of dihedrals we want to restrain
ndistances = 2  # number of distances we want to restrain
targets = list(range(8))  # list of dunbrack clusters (sams states) to bias to
coefficient = 1.0  # coefficient for force constant

# if protein is not minimized
if not os.path.isfile(
        os.path.join(work_dir, f'{pdbid}_chain{chain}_minimized.pdb')):
    print("Need to minimize the protein structure.")
    # clean up the input pdb file using pdbfixer and load using Modeller
    if not os.path.isfile(os.path.join(work_dir, f'{pdbid}_chain{chain}.pdb')):
        fixer = PDBFixer(url=f'http://www.pdb.org/pdb/files/{pdbid}.pdb')
        '''
        for this case somehow the pdb after chain selection doesn't go through fixing
        so fix and then select
        '''
        # find missing residues
        fixer.findMissingResidues()
        # modify missingResidues so the extra residues on the end are ignored
        fixer.missingResidues = {}
        # remove ligand but keep crystal waters
        fixer.removeHeterogens(True)
        print("Done removing heterogens.")

        # find missing atoms/terminals
        fixer.findMissingAtoms()
        if fixer.missingAtoms or fixer.missingTerminals:
예제 #22
0
def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written
        ph: the pH at which hydrogens will be determined and added
        chains_to_remove: dictionary containing pdbs with chains to remove
    Returns: nothing, but it does right PDB files

    """
    print(pdbid)

    # Download the topology from rcsb based on pdbod
    fixer = PDBFixer(pdbid=pdbid)

    # Remove chains based on hand curated .csv file
    if pdbid in chains_to_remove['pdbid']:
        chains = chains_to_remove['chain_to_remove'][
            chain_to_remove['pdbid'].index(pdbid)]
        chains_list = chains.split()
        fixer.removeChains(chainIds=chains_list)

    # Determine the first and last residue resolved in chain 0
    chains = [chain for chain in fixer.topology.chains()]
    resindices = [residue.index for residue in chains[0].residues()]
    resindices = natsorted(resindices)
    first_resindex = resindices[0]
    last_resindex = resindices[-1]

    # Find Missing residues and determine if they are C or N terminal fragments (which will be removed)

    fixer.findMissingResidues()
    if len(fixer.missingResidues) > 0:
        if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex:
            fixer.missingResidues.pop(
                (sorted(fixer.missingResidues.keys())[0]))

        if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex:
            fixer.missingResidues.pop(
                (sorted(fixer.missingResidues.keys())[-1]))

    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(ph)
    # Write fixed PDB file, with all of the waters and ligands
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(
                          os.path.join(file_pathway,
                                       '%s_fixed_ph%s.pdb' % (pdbid, ph)),
                          'w'),
                      keepIds=keepNumbers)

    # Remove the ligand and write a pdb file
    fixer.removeHeterogens(True)
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(
                          os.path.join(file_pathway,
                                       '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)),
                          'w'),
                      keepIds=keepNumbers)
    # Remove the waters and write a pdb file
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology,
                      fixer.positions,
                      open(
                          os.path.join(
                              file_pathway,
                              '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)),
                          'w'),
                      keepIds=keepNumbers)
temperature = 310.15 * unit.kelvin
pressure = 1.0 * unit.atmospheres
nstates = 8  # number of states we want to consider
ndihedrals = 10  # number of dihedrals we want to restrain
ndistances = 5  # number of distances we want to restrain
# if protein is not minimized
if not os.path.isfile(os.path.join(work_dir,
                                   '{}_minimized.pdb'.format(pdbid))):
    print("Need to minimize the protein structure.")
    ## clean up the input pdb file using pdbfixer and load using Modeller
    import urllib
    with urllib.request.urlopen(
            'http://www.pdb.org/pdb/files/{}.pdb'.format(pdbid)) as response:
        pdb_file = response.read()

    fixer = PDBFixer(pdbfile=pdb_file)
    fixer.findMissingResidues()

    # modify missingResidues so the extra residues on the end are ignored
    #fixer.missingResidues = {(0,47): fixer.missingResidues[(0,47)]}
    fixer.missingResidues = {}

    # remove ligand but keep crystal waters
    fixer.removeHeterogens(True)
    print("Done removing heterogens.")

    # find missing atoms/terminals
    fixer.findMissingAtoms()
    if fixer.missingAtoms or fixer.missingTerminals:
        fixer.addMissingAtoms()
        print("Done adding atoms/terminals.")
예제 #24
0
def write_file(filename, contents):
    outfile = open(filename, 'w')
    outfile.write(contents)
    outfile.close()


################################################################################
# SET UP SYSTEM
################################################################################

# Load forcefield.
forcefield = app.ForceField(*forcefields_to_use)

# Retrieve structure from PDB.
print('Retrieving %s from PDB...' % pdbid)
fixer = PDBFixer(pdbid=pdbid)

# Build a list of chains to remove.
print('Removing all chains but %s' % chain_ids_to_keep)
all_chains = list(fixer.topology.chains())
chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains]
chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep)
fixer.removeChains(chainIds=chain_ids_to_remove)

# Find missing residues.
print('Finding missing residues...')
fixer.findMissingResidues()

# Replace nonstandard residues.
print('Replacing nonstandard residues...')
fixer.findNonstandardResidues()
예제 #25
0
def hydrate(system, opt):
    """
    This function solvates the system by using PDBFixer

    Parameters:
    -----------
    system: OEMol molecule
        The system to solvate
    opt: python dictionary
        The parameters used to solvate the system

    Return:
    -------
    oe_mol: OEMol
        The solvated system
    """
    def BoundingBox(molecule):
        """
        This function calculates the Bounding Box of the passed
        molecule

        molecule: OEMol

        return: bb (numpy array)
            the calculated bounding box is returned as numpy array:
            [(xmin,ymin,zmin), (xmax,ymax,zmax)]
        """
        coords = [v for k, v in molecule.GetCoords().items()]
        np_coords = np.array(coords)
        min_coord = np_coords.min(axis=0)
        max_coord = np_coords.max(axis=0)
        bb = np.array([min_coord, max_coord])
        return bb

    # Create a system copy
    sol_system = system.CreateCopy()

    # Calculate system BoundingBox (Angstrom units)
    BB = BoundingBox(sol_system)

    # Estimation of the box cube length in A
    box_edge = 2.0 * opt['solvent_padding'] + np.max(BB[1] - BB[0])

    # BB center
    xc = (BB[0][0]+BB[1][0])/2.
    yc = (BB[0][1]+BB[1][1])/2.
    zc = (BB[0][2]+BB[1][2])/2.

    delta = np.array([box_edge/2., box_edge/2., box_edge/2.]) - np.array([xc, yc, zc])

    sys_coord_dic = {k: (v+delta) for k, v in sol_system.GetCoords().items()}

    sol_system.SetCoords(sys_coord_dic)

    # Load a fake system to initialize PDBfixer
    filename = resource_filename('pdbfixer', 'tests/data/test.pdb')
    fixer = PDBFixer(filename=filename)

    # Convert between OE and OpenMM topology
    omm_top, omm_pos = oeommutils.oemol_to_openmmTop(sol_system)

    chain_names = []

    for chain in omm_top.chains():
        chain_names.append(chain.id)

    # Set the correct topology to the fake system
    fixer.topology = omm_top
    fixer.positions = omm_pos

    # Solvate the system
    fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms),
                     ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar))

    # The OpenMM topology produced by the solvation fixer has missing bond
    # orders and aromaticity. The following section is creating a new openmm
    # topology made of just water molecules and ions. The new topology is then
    # converted in an OEMol and added to the passed molecule to produce the
    # solvated system

    wat_ion_top = app.Topology()

    # Atom dictionary between the the PDBfixer topology and the water_ion topology
    fixer_atom_to_wat_ion_atom = {}

    for chain in fixer.topology.chains():
        if chain.id not in chain_names:
            n_chain = wat_ion_top.addChain(chain.id)
            for res in chain.residues():
                n_res = wat_ion_top.addResidue(res.name, n_chain)
                for at in res.atoms():
                    n_at = wat_ion_top.addAtom(at.name, at.element, n_res)
                    fixer_atom_to_wat_ion_atom[at] = n_at

    for bond in fixer.topology.bonds():
        at0 = bond[0]
        at1 = bond[1]
        try:
            wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0],
                                fixer_atom_to_wat_ion_atom[at1], type=None, order=1)
        except:
            pass

    wat_ion_pos = fixer.positions[len(omm_pos):]

    oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos)

    # Setting the box vectors
    omm_box_vectors = fixer.topology.getPeriodicBoxVectors()
    box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors)
    oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors)

    oechem.OEAddMols(oe_mol, sol_system)

    return oe_mol
예제 #26
0
    def _fix(self, atoms):

        try:
            from pdbfixer import PDBFixer
            from simtk.openmm.app import PDBFile
        except ImportError:
            raise ImportError(
                'Please install PDBFixer and OpenMM in order to use ClustENM.')

        stream = createStringIO()
        title = atoms.getTitle()
        writePDBStream(stream, atoms)
        stream.seek(0)
        fixed = PDBFixer(pdbfile=stream)
        stream.close()

        fixed.missingResidues = {}
        fixed.findNonstandardResidues()
        fixed.replaceNonstandardResidues()
        fixed.removeHeterogens(False)
        fixed.findMissingAtoms()
        fixed.addMissingAtoms()
        fixed.addMissingHydrogens(self._ph)

        stream = createStringIO()
        PDBFile.writeFile(fixed.topology,
                          fixed.positions,
                          stream,
                          keepIds=True)
        stream.seek(0)
        self._atoms = parsePDBStream(stream)
        self._atoms.setTitle(title)
        stream.close()

        self._topology = fixed.topology
        self._positions = fixed.positions
예제 #27
0
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

fixer = PDBFixer(filename='3UE4.pdb')

fixer.removeChains(chainIds=['B'])

# Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception
# and if I call fixer.findMissingResidues() several terminal residues are added
fixer.missingResidues = {}
fixer.findMissingAtoms()
fixer.addMissingAtoms()

fixer.removeHeterogens(keepWater=False)

fixer.addMissingHydrogens(7.0)

PDBFile.writeFile(fixer.topology, fixer.positions, open('3UE4-pdbfixer.pdb', 'w'))
예제 #28
0
def solvate(system, opt):
    """
    This function solvates the system by using PDBFixer

    Parameters:
    -----------
    system: OEMol molecule
        The system to solvate
    opt: python dictionary
        The parameters used to solvate the system

    Return:
    -------
    oe_mol: OEMol
        The solvated system
    """

    # Load a fake system to initialize PDBfixer
    filename = resource_filename('pdbfixer', 'tests/data/test.pdb')
    fixer = PDBFixer(filename=filename)

    # Convert between OE and OpenMM topology
    omm_top, omm_pos = oeommutils.oemol_to_openmmTop(system)

    chain_names = []

    for chain in omm_top.chains():
        chain_names.append(chain.id)

    # Set the correct topology to the fake system
    fixer.topology = omm_top
    fixer.positions = omm_pos

    # Solvate the system
    fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms),
                     ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar))

    # The OpenMM topology produced by the solvation fixer has missing bond
    # orders and aromaticity. The following section is creating a new openmm
    # topology made of just water molecules and ions. The new topology is then
    # converted in an OEMol and added to the passed molecule to produce the
    # solvated system

    wat_ion_top = app.Topology()

    # Atom dictionary between the the PDBfixer topology and the water_ion topology
    fixer_atom_to_wat_ion_atom = {}

    for chain in fixer.topology.chains():
        if chain.id not in chain_names:
            n_chain = wat_ion_top.addChain(chain.id)
            for res in chain.residues():
                n_res = wat_ion_top.addResidue(res.name, n_chain)
                for at in res.atoms():
                    n_at = wat_ion_top.addAtom(at.name, at.element, n_res)
                    fixer_atom_to_wat_ion_atom[at] = n_at

    for bond in fixer.topology.bonds():
        at0 = bond[0]
        at1 = bond[1]
        try:
            wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0],
                                fixer_atom_to_wat_ion_atom[at1], type=None, order=1)
        except:
            pass

    wat_ion_pos = fixer.positions[len(omm_pos):]

    oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos)

    # Setting the box vectors
    omm_box_vectors = fixer.topology.getPeriodicBoxVectors()
    box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors)
    oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors)

    oechem.OEAddMols(oe_mol, system)

    return oe_mol
예제 #29
0
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile
import mdtraj as md
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from __future__ import print_function
from simtk.openmm import app
import simtk.openmm as mm
from simtk import unit
from sys import stdout

# clean up the original PDB file and add missing residues and heavy atoms
fixer = PDBFixer('pdb4h12.ent')

fixer.findMissingResidues()
# only add missing residues in the middle of the chain, do not add terminal ones
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
missingResidues = dict()
for key in keys:
    chain = chains[key[0]]
    if not (key[1] == 0 or key[1] == len(list(chain.residues()))):
        missingResidues[key] = fixer.missingResidues[key]
fixer.missingResidues = missingResidues

fixer.findMissingAtoms()
fixer.addMissingAtoms()
예제 #30
0
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

fixer = PDBFixer(pdbid='3UE4')

fixer.removeChains(chainIds=['B'])

# Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception
# and if I call fixer.findMissingResidues() several terminal residues are added
fixer.missingResidues = {}
fixer.findMissingAtoms()
fixer.addMissingAtoms()

fixer.removeHeterogens(keepWater=False)

#fixer.addMissingHydrogens(7.0)

PDBFile.writeFile(fixer.topology, fixer.positions, open('../kinases/abl/3UE4-pdbfixer.pdb', 'w'))
예제 #31
0
# ==============================================================================
# PREPARE STRUCTURE
# ==============================================================================

from pdbfixer import PDBFixer

is_periodic = (nonbonded_method not in [app.NoCutoff, app.CutoffNonPeriodic])

# ==============================================================================
# Retrieve the PDB file
# ==============================================================================

if pdb_filename:
    logger.info("Retrieving PDB '%s'..." % pdb_filename)
    fixer = PDBFixer(filename=pdb_filename)
else:
    logger.info("Retrieving PDB '%s'..." % pdbid)
    fixer = PDBFixer(pdbid=pdbid)

# ==============================================================================
# Prepare the structure
# ==============================================================================

# DEBUG
print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ])

# Write PDB file for solute only.
logger.info("Writing source PDB...")
pdb_filename = os.path.join(workdir, pdbid + '.pdb')
outfile = open(pdb_filename, 'w')
예제 #32
0
def process_pdb(path,
                corr_path,
                chain_id,
                max_atoms,
                gsd_file,
                embedding_dicts,
                NN,
                nlist_model,
                keep_residues=[-1, 1],
                debug=False,
                units=unit.nanometer,
                frame_number=3,
                model_index=0,
                log_file=None,
                shiftx_style=False):

    global MA_LOST_FRAGS
    if shiftx_style:
        frame_number = 1
    # load pdb
    pdb = app.PDBFile(path)

    # load cs sets
    peak_data, sequence_map, peak_seq = process_corr(corr_path, debug,
                                                     shiftx_style)

    result = []
    # check for weird/null chain
    if chain_id == '_':
        chain_id = list(pdb.topology.residues())[0].chain.id[0]
    # sometimes chains have extra characters (why?)
    residues = list(
        filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues()))
    if len(residues) == 0:
        if debug:
            raise ValueError('Failed to find requested chain ', chain_id)

    pdb_offset, seq_offset = None, None

    # from pdb residue index to our aligned residue index
    residue_lookup = {}
    # bonded neighbor mask
    nlist_mask = None
    peak_count = 0
    # select a random set of frames for generating data without replacement
    frame_choices = random.sample(range(0, pdb.getNumFrames()),
                                  k=min(pdb.getNumFrames(), frame_number))
    for fi in frame_choices:
        peak_successes = set()
        # clean up individual frame
        frame = pdb.getPositions(frame=fi)
        # have to fix at each frame since inserted atoms may change
        # fix missing residues/atoms
        fixer = PDBFixer(filename=path)
        # overwrite positions with frame positions
        fixer.positions = frame
        # we want to add missing atoms,
        # but not replace missing residue. We'd
        # rather just ignore those
        fixer.findMissingResidues()
        # remove the missing residues
        fixer.missingResidues = []
        # remove water!
        fixer.removeHeterogens(False)
        if not shiftx_style:
            fixer.findMissingAtoms()
            fixer.findNonstandardResidues()
            fixer.replaceNonstandardResidues()
            fixer.addMissingAtoms()
            fixer.addMissingHydrogens(7.0)
        # get new positions
        frame = fixer.positions
        num_atoms = len(frame)
        # remake residue list each time so they have correct atom ids
        residues = list(
            filter(lambda r: r.chain.id[0] == chain_id,
                   fixer.topology.residues()))
        if num_atoms > 20000:
            MA_LOST_FRAGS += len(residues)
            if debug:
                print(
                    'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}'
                    .format(fi, path))
            break
        # check alignment once
        if pdb_offset is None:
            # create sequence from residues
            pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues])
            for r in residues:
                rid = int(r.id)
                if rid >= 0:
                    pdb_seq[int(r.id)] = r.name
            if debug:
                print('pdb_seq', pdb_seq)
                print('peak_seq', peak_seq)
            pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug)
            #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO?????
            # Maybe it's ok
            pdb_offset = 0
            if debug:
                print('pdb_offset', pdb_offset)
                print('seq_offset', seq_offset)
                print(sequence_map)
                # now check alignment - rarely perfect
                saw_one = False
                aligned = 0
                for i in range(len(residues)):
                    segid = int(residues[i].id) + pdb_offset
                    saw_one = pdb_seq[segid] == residues[i].name
                    if not saw_one:
                        print('Mismatch (A) at position {} ({}). {} != {}'.
                              format(segid, residues[i].id, pdb_seq[segid],
                                     residues[i].name))
                        continue
                    if segid + seq_offset in sequence_map:
                        peakid = sequence_map[segid + seq_offset]
                        print(segid, segid + seq_offset, len(pdb_seq),
                              len(peak_seq))
                        saw_one = pdb_seq[segid] == peak_seq[segid +
                                                             seq_offset]
                        if not saw_one:
                            print(
                                'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}'
                                .format(segid, peak_seq[segid + seq_offset],
                                        pdb_seq[peakid]))
                            continue
                        saw_one = peak_data[peakid]['name'] == residues[i].name
                        if not saw_one:
                            print(
                                'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}'
                                .format(segid, i, peak_seq[segid + seq_offset],
                                        peak_data[peakid]['name'],
                                        residues[i].name))
                            continue
                        aligned += 1
                if aligned < 5:
                    raise ValueError(
                        'Could not find more than 5 aligned residues, very unusual'
                    )

            # create resiud look-up from atom index
            for i, r in enumerate(residues):
                for a in r.atoms():
                    residue_lookup[a.index] = i
            # This alignment will be checked as we compare shifts against the pdb
        # get neighbor list for frame
        np_pos = np.array([v.value_in_unit(units) for v in frame])
        frame_nlist = nlist_model(np_pos)

        for ri in range(len(residues)):
            # we build up fragment by getting residues around us, both in chain
            # and those within a certain distance of us
            rmin = max(0, ri + keep_residues[0])
            # have to +1 here (and not in range) to get min to work :)
            rmax = min(len(residues), ri + keep_residues[1] + 1)
            # do we have any residues to consider?
            success = rmax - rmin > 0

            consider = set(range(rmin, rmax))

            # Used to indicate an atom should be included from a different residue
            marked = [False for _ in range(len(frame))]

            # now grab spatial neighbor residues
            # NOTE: I checked this by hand a lot
            # Believe this code.
            for a in residues[ri].atoms():
                for ni in range(NN):
                    j = int(frame_nlist[a.index, ni, 1])
                    try:
                        consider.add(residue_lookup[j])
                        marked[j] = True
                    except KeyError as e:
                        success = False
                        if debug:
                            print(
                                'Neighboring residue in different chain, skipping'
                            )
                        break
            atoms = np.zeros((max_atoms), dtype=np.int64)
            # we will put dummy atom at end to keep bond counts the same by bonding to it
            # Z-DISABLED
            #atoms[-1] = embedding_dicts['atom']['Z']
            mask = np.zeros((max_atoms), dtype=np.float)
            bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64)
            # nlist:
            # :,:,0 -> distance
            # :,:,1 -> neighbor index
            # :,:,2 -> bond count
            nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float)
            positions = np.zeros((max_atoms, 3), dtype=np.float)
            peaks = np.zeros((max_atoms), dtype=np.float)
            names = np.zeros((max_atoms), dtype=np.int64)
            # going from pdb atom index to index in these data structures
            rmap = dict()
            index = 0
            # check our two conditions that could have made this false: there are residues and
            # we didn't have off-chain spatial neighboring residues
            if not success:
                continue
            for rj in consider:
                residue = residues[rj]
                # use the alignment result to get offset
                segid = int(residue.id) + pdb_offset
                if segid + seq_offset not in sequence_map:
                    if debug:
                        print('Could not find residue index', rj, ': ',
                              residue, 'in the sequence map. Its index is',
                              segid + seq_offset, 'ri: ', ri)
                        print('We are considering', consider)
                    success = False
                    break
                peak_id = sequence_map[segid + seq_offset]
                #peak_id = segid
                if peak_id >= len(peak_data):
                    success = False
                    if debug:
                        print('peakd id is outside of peak range')
                    break
                # only check for residue we actually care about
                if ri == rj and residue.name != peak_data[peak_id]['name']:
                    if debug:
                        print('Mismatch between residue ', ri, rj, peak_id,
                              residue, segid, peak_data[peak_id], path,
                              corr_path, chain_id)
                    success = False
                    break
                for atom in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[atom.index]:
                        continue
                    mask[index] = float(ri == rj)
                    atom_name = residue.name + '-' + atom.name
                    if atom_name not in embedding_dicts['name']:
                        embedding_dicts['name'][atom_name] = len(
                            embedding_dicts['name'])
                    names[index] = embedding_dicts['name'][atom_name]

                    if atom.element.symbol not in embedding_dicts['atom']:
                        if debug:
                            print('Could not identify atom',
                                  atom.element.symbol)
                        success = False
                        break
                    atoms[index] = embedding_dicts['atom'][atom.element.symbol]
                    positions[index] = np_pos[atom.index, :]
                    rmap[atom.index] = index
                    peaks[index] = 0
                    if mask[index]:
                        if atom.name[:3] in peak_data[peak_id]:
                            peaks[index] = peak_data[peak_id][atom.name[:3]]
                            peak_count += 1
                            peak_successes.add(peak_id)
                        else:
                            mask[index] = 0
                    index += 1
                    # Z-DISABLED
                    # -1 for dummy atom which is stored at end
                    if index == max_atoms - 1:  #2:
                        MA_LOST_FRAGS += 1
                        if debug:
                            print('Not enough space for all atoms in ri', ri)
                        success = False
                        break
                if ri == rj and sum(mask) == 0:
                    if debug:
                        print('Warning found no peaks for', ri, rj, residue,
                              peak_data[peak_id])
                    success = False
                if not success:
                    break
            if not success:
                continue
            # do this after so our reverse mapping is complete
            for rj in consider:
                residue = residues[rj]
                for b in residue.bonds():
                    # set bonds
                    try:
                        bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1
                        bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1
                    except KeyError:
                        # for bonds that cross residue
                        pass
            for rj in consider:
                residue = residues[rj]
                for a in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[a.index]:
                        continue
                    index = rmap[a.index]
                    # convert to local indices and filter neighbors
                    n_index = 0
                    for ni in range(NN):
                        if frame_nlist[a.index, ni, 0] > 50.0:
                            # large distances are sentinels for things
                            # like self neighbors
                            continue
                        try:
                            j = rmap[int(frame_nlist[a.index, ni, 1])]
                        except KeyError:
                            # either we couldn't find a neighbor on the root residue (which is bad)
                            # or just one of the neighbors is not on a considered residue.
                            if rj == ri:
                                success = False
                                if debug:
                                    print('Could not find all neighbors',
                                          int(frame_nlist[a.index, ni, 1]),
                                          consider)
                                break
                            # Z-DISABLED
                            #j = max_atoms - 1 # point to dummy atom
                            continue
                        # mark as not a neighbor if out of molecule (only for non-subject nlists)
                        if False and j == max_atoms - 1:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['none']
                            n_index += 1
                        # a 0 -> non-bonded
                        elif bonds[index, j] == 0:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['nonbonded']
                            n_index += 1
                        # single bonded
                        else:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist'][1]
                            n_index += 1
                        if n_index == NEIGHBOR_NUMBER:
                            break
                    # how did we do on peaks
                    if False and (peaks[index] > 0 and peaks[index] < 25):
                        nonbonded_count = np.sum(
                            nlist[index, :,
                                  2] == embedding_dicts['nlist']['nonbonded'])
                        bonded_count = np.sum(
                            nlist[index, :, 2] == embedding_dicts['nlist'][1])
                        print(
                            'neighbor summary: non-bonded: {}, bonded: {}, total: {}'
                            .format(nonbonded_count, bonded_count,
                                    NEIGHBOR_NUMBER))
                        print(nlist[index, :, :])
                        exit()
            if not success:
                if debug:
                    raise RuntimeError()
                continue
            if gsd_file is not None:
                snapshot = write_record_traj(
                    positions, atoms, mask, nlist, peaks,
                    embedding_dicts['class'][residues[ri].name], names,
                    embedding_dicts)
                snapshot.configuration.step = len(gsd_file)
                gsd_file.append(snapshot)
            result.append(
                make_tfrecord(atoms,
                              mask,
                              nlist,
                              peaks,
                              embedding_dicts['class'][residues[ri].name],
                              names,
                              indices=np.array(
                                  [model_index, fi,
                                   int(residues[ri].id)],
                                  dtype=np.int64)))
            if log_file is not None:
                log_file.write('{} {} {} {} {} {} {} {}\n'.format(
                    path.split('/')[-1],
                    corr_path.split('/')[-1], chain_id, len(peak_successes),
                    len(gsd_file), model_index, fi, residues[ri].id))
    return result, len(peak_successes) / len(peak_data), len(
        result), peak_count
예제 #33
0
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, 
           pH: float = 7.0, path: str = '.', **kwargs) -> str:
    if pdbid:
        fixer = PDBFixer(pdbid=pdbid)
    else:
        fixer = PDBFixer(filename=receptor)

    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH)

    if receptor:
        outfile = receptor
    else:
        outfile = Path(path)/f'{pdbid}.pdb'

    PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w'))
    
    return outfile
예제 #34
0
def fix_pdb(pdb_file):
    fixer = PDBFixer(filename=pdb_file)
    fixer.findMissingResidues()
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(True)
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(7.0)
    PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))