Example #1
0
def clean_protein(
    prot: protein.Protein,
    checks: bool = True):
  """Adds missing atoms to Protein instance.

  Args:
    prot: A `protein.Protein` instance.
    checks: A `bool` specifying whether to add additional checks to the cleaning
      process.

  Returns:
    pdb_string: A string of the cleaned protein.
  """
  _check_atom_mask_is_ideal(prot)

  # Clean pdb.
  prot_pdb_string = protein.to_pdb(prot)
  pdb_file = io.StringIO(prot_pdb_string)
  alterations_info = {}
  fixed_pdb = cleanup.fix_pdb(pdb_file, alterations_info)
  fixed_pdb_file = io.StringIO(fixed_pdb)
  pdb_structure = PdbStructure(fixed_pdb_file)
  cleanup.clean_structure(pdb_structure, alterations_info)

  logging.info("alterations info: %s", alterations_info)

  # Write pdb file of cleaned structure.
  as_file = openmm_app.PDBFile(pdb_structure)
  pdb_string = _get_pdb_string(as_file.getTopology(), as_file.getPositions())
  if checks:
    _check_cleaned_atoms(pdb_string, prot_pdb_string)
  return pdb_string
Example #2
0
def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
    pdb_file = io.StringIO(pdb_str)
    structure = PdbStructure(pdb_file)
    topology = openmm_app.PDBFile(structure).getTopology()
    with io.StringIO() as f:
        openmm_app.PDBFile.writeFile(topology, pos, f)
        return f.getvalue()
def get_initial_energies(pdb_strs: Sequence[str],
                         stiffness: float = 0.0,
                         restraint_set: str = "non_hydrogen",
                         exclude_residues: Optional[Sequence[int]] = None):
    """Returns initial potential energies for a sequence of PDBs.

  Assumes the input PDBs are ready for minimization, and all have the same
  topology.
  Allows time to be saved by not pdbfixing / rebuilding the system.

  Args:
    pdb_strs: List of PDB strings.
    stiffness: kcal/mol A**2, spring constant of heavy atom restraining
        potential.
    restraint_set: Which atom types to restrain.
    exclude_residues: An optional list of zero-indexed residues to exclude from
        restraints.

  Returns:
    A list of initial energies in the same order as pdb_strs.
  """
    exclude_residues = exclude_residues or []

    openmm_pdbs = [
        openmm_app.PDBFile(PdbStructure(io.StringIO(p))) for p in pdb_strs
    ]
    force_field = openmm_app.ForceField("amber99sb.xml")
    system = force_field.createSystem(openmm_pdbs[0].topology,
                                      constraints=openmm_app.HBonds)
    stiffness = stiffness * ENERGY / (LENGTH**2)
    if stiffness > 0 * ENERGY / (LENGTH**2):
        _add_restraints(system, openmm_pdbs[0], stiffness, restraint_set,
                        exclude_residues)

    k = 'CUDA_VISIBLE_DEVICES'
    if k in os.environ and len(os.environ[k]) == 0:
        platform = openmm.Platform.getPlatformByName("CPU")
    else:
        platform = openmm.Platform.getPlatformByName("CUDA")
    simulation = openmm_app.Simulation(openmm_pdbs[0].topology, system,
                                       openmm.LangevinIntegrator(0, 0.01, 0.0),
                                       platform)
    energies = []
    for pdb in openmm_pdbs:
        try:
            simulation.context.setPositions(pdb.positions)
            state = simulation.context.getState(getEnergy=True)
            energies.append(state.getPotentialEnergy().value_in_unit(ENERGY))
        except Exception as e:  # pylint: disable=broad-except
            logger.error(
                "Error getting initial energy, returning large value %s", e)
            energies.append(unit.Quantity(1e20, ENERGY))
    return energies
Example #4
0
def startPageCallback(parameters, handler):
    global fixer
    if 'type' in parameters:
        if parameters.getfirst('type') == 'local':
            pdb = PdbStructure(parameters['pdbfile'].value.splitlines())
            fixer = PDBFixer(pdb)
            displayDeleteChainsPage()
        else:
            id = parameters.getfirst('pdbid')
            url = "ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/pdb" + id.lower(
            ) + ".ent.gz"
            try:
                response = urllib2.urlopen(url)
                content = gzip.GzipFile(
                    fileobj=StringIO(response.read())).read()
                pdb = PdbStructure(content.splitlines())
                fixer = PDBFixer(pdb)
                displayDeleteChainsPage()
            except:
                handler.sendResponse(
                    header +
                    "Unable to download the PDB file. This may indicate an invalid PDB identifier, or an error in network connectivity."
                    + loadHtmlFile("error.html"))
Example #5
0
    def __init__(self, file, extraParticleIdentifier='EP'):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters
        ----------
        file : string
            the name of the file to load
        extraParticleIdentifier : string='EP'
            if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle
        """
        
        metalElements = ['Al','As','Ba','Ca','Cd','Ce','Co','Cs','Cu','Dy','Fe','Gd','Hg','Ho','In','Ir','K','Li','Mg',
        'Mn','Mo','Na','Ni','Pb','Pd','Pt','Rb','Rh','Sm','Sr','Te','Tl','V','W','Yb','Zn']
        
        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            own_handle = False
            if isinstance(file, str):
                inputfile = open(file)
                own_handle = True
            pdb = PdbStructure(inputfile, load_all_models=True, extraParticleIdentifier=extraParticleIdentifier)
            if own_handle:
                inputfile.close()
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain(chain.chain_id)
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c, str(residue.number))
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element == 'EP':
                        element = None
                    elif element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        while len(upper) > 1 and upper[0].isdigit():
                            upper = upper[1:]
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif( len( residue ) == 1 and upper.startswith('CA') ):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(upper[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r, str(atom.serial_number))
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords*nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added.

        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:    
                    if atomByNumber[i].element is not None and atomByNumber[j].element is not None:
                        if atomByNumber[i].element.symbol not in metalElements and atomByNumber[j].element.symbol not in metalElements:
                            connectBonds.append((atomByNumber[i], atomByNumber[j])) 
                        elif atomByNumber[i].element.symbol in metalElements and atomByNumber[j].residue.name not in PDBFile._standardResidues:
                            connectBonds.append((atomByNumber[i], atomByNumber[j])) 
                        elif atomByNumber[j].element.symbol in metalElements and atomByNumber[i].residue.name not in PDBFile._standardResidues:
                            connectBonds.append((atomByNumber[i], atomByNumber[j]))     
                    else:
                        connectBonds.append((atomByNumber[i], atomByNumber[j]))         
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Example #6
0
    def __init__(self, filename=None, pdbfile=None, url=None, pdbid=None):
        """Create a new PDBFixer instance to fix problems in a PDB file.
        
        Parameters
        ----------
        filename : str, optional, default=None
            A filename specifying the file from which the PDB file is to be read.
        pdbfile : file, optional, default=None
            A file-like object from which the PDB file is to be read.
            The file is not closed after reading.
        url : str, optional, default=None
            A URL specifying the internet location from which the PDB file contents should be retrieved.
        pdbid : str, optional, default=None
            A four-letter PDB code specifying the structure to be retrieved from the RCSB.
            
        Notes
        -----
        Only one of structure, filename, pdbfile, url, or pdbid may be specified or an exception will be thrown.
            
        Examples
        --------
        
        Start from a filename.
        
        >>> filename = resource_filename('pdbfixer', 'tests/data/test.pdb')
        >>> fixer = PDBFixer(filename=filename)

        Start from a file object.

        >>> with open(filename) as f:
        ...     fixer = PDBFixer(pdbfile=f)

        Start from a URL.

        >>> fixer = PDBFixer(url='http://www.rcsb.org/pdb/files/1VII.pdb')

        Start from a PDB code.
        
        >>> fixer = PDBFixer(pdbid='1VII')

        """

        # Check to make sure only one option has been specified.
        if bool(filename) + bool(pdbfile) + bool(url) + bool(pdbid) != 1:
            raise Exception("Exactly one option [filename, pdbfile, url, pdbid] must be specified.")

        self.source = None
        if filename:
            self.source = filename
            # A local file has been specified.
            file = open(filename, 'r')                
            structure = PdbStructure(file)
            file.close()
        elif pdbfile:
            # A file-like object has been specified.
            structure = PdbStructure(pdbfile)  
        elif url:
            self.source = url
            # A URL has been specified.
            file = urlopen(url)
            structure = PdbStructure(file)
            file.close()
        elif pdbid:
            # A PDB id has been specified.
            url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
            self.source = url
            file = urlopen(url)
            # Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
            contents = file.read().decode('utf-8')
            lines = contents.split('\n')
            file.close()
            structure = PdbStructure(lines)
            
        # Check the structure has some atoms in it.
        atoms = list(structure.iter_atoms())
        if len(atoms)==0:
            raise Exception("Structure contains no atoms.")
            
        self.structure = structure
        self.pdb = app.PDBFile(structure)
        self.topology = self.pdb.topology
        self.positions = self.pdb.positions
        
        # Load the templates.
        
        self.templates = {}
        templatesPath = os.path.join(os.path.dirname(__file__), 'templates')
        for file in os.listdir(templatesPath):
            templatePdb = app.PDBFile(os.path.join(templatesPath, file))
            name = next(templatePdb.topology.residues()).name
            self.templates[name] = templatePdb
        
        return
Example #7
0
    def __init__(self, file):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters
        ----------
        file : string
            the name of the file to load
        """
        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            own_handle = False
            if isinstance(file, str):
                inputfile = open(file)
                own_handle = True
            pdb = PdbStructure(inputfile, load_all_models=True)
            if own_handle:
                inputfile.close()
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain(chain.chain_id)
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c, str(residue.number))
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif( len( residue ) == 1 and upper.startswith('CA') ):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(atomName[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r, str(atom.serial_number))
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords*nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records.

        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Example #8
0
    def __init__(self, file, extraParticleIdentifier='EP'):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters
        ----------
        file : string
            the name of the file to load
        extraParticleIdentifier : string='EP'
            if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle
        """

        metalElements = [
            'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe',
            'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na',
            'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V',
            'W', 'Yb', 'Zn'
        ]

        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            own_handle = False
            if isinstance(file, str):
                inputfile = open(file)
                own_handle = True
            pdb = PdbStructure(inputfile,
                               load_all_models=True,
                               extraParticleIdentifier=extraParticleIdentifier)
            if own_handle:
                inputfile.close()
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain(chain.chain_id)
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c, str(residue.number),
                                   residue.insertion_code)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.iter_atoms():
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element == 'EP':
                        element = None
                    elif element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        while len(upper) > 1 and upper[0].isdigit():
                            upper = upper[1:]
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif (len(residue) == 1 and upper.startswith('CA')):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(upper[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r,
                                          str(atom.serial_number))
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.iter_atoms():
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords * nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added.

        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    if atomByNumber[i].element is not None and atomByNumber[
                            j].element is not None:
                        if atomByNumber[
                                i].element.symbol not in metalElements and atomByNumber[
                                    j].element.symbol not in metalElements:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                i].element.symbol in metalElements and atomByNumber[
                                    j].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                j].element.symbol in metalElements and atomByNumber[
                                    i].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                    else:
                        connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Example #9
0
    def __init__(self, file):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load
        """
        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            if isinstance(file, str):
                inputfile = open(file)
            pdb = PdbStructure(inputfile, load_all_models=True)
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif (len(residue) == 1 and upper.startswith('CA')):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(atomName[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r)
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords * nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records.

        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Example #10
0
 parser.add_option('--output', default='output.pdb', dest='output', metavar='FILENAME', help='output pdb file [default: output.pdb]')
 parser.add_option('--add-atoms', default='all', dest='atoms', choices=('all', 'heavy', 'hydrogen', 'none'), help='which missing atoms to add: all, heavy, hydrogen, or none [default: all]')
 parser.add_option('--keep-heterogens', default='all', dest='heterogens', choices=('all', 'water', 'none'), metavar='OPTION', help='which heterogens to keep: all, water, or none [default: all]')
 parser.add_option('--replace-nonstandard', action='store_true', default=False, dest='nonstandard', help='replace nonstandard residues with standard equivalents')
 parser.add_option('--add-residues', action='store_true', default=False, dest='residues', help='add missing residues')
 parser.add_option('--water-box', dest='box', type='float', nargs=3, metavar='X Y Z', help='add a water box. The value is the box dimensions in nm [example: --water-box=2.5 2.4 3.0]')
 parser.add_option('--ph', type='float', default=7.0, dest='ph', help='the pH to use for adding missing hydrogens [default: 7.0]')
 parser.add_option('--positive-ion', default='Na+', dest='positiveIon', choices=('Cs+', 'K+', 'Li+', 'Na+', 'Rb+'), metavar='ION', help='positive ion to include in the water box: Cs+, K+, Li+, Na+, or Rb+ [default: Na+]')
 parser.add_option('--negative-ion', default='Cl-', dest='negativeIon', choices=('Cl-', 'Br-', 'F-', 'I-'), metavar='ION', help='negative ion to include in the water box: Cl-, Br-, F-, or I- [default: Cl-]')
 parser.add_option('--ionic-strength', type='float', default=0.0, dest='ionic', metavar='STRENGTH', help='molar concentration of ions to add to the water box [default: 0.0]')
 (options, args) = parser.parse_args()
 if len(args) == 0:
     parser.error('No filename specified')
 if len(args) > 1:
     parser.error('Must specify a single filename')
 fixer = PDBFixer(PdbStructure(open(args[0])))
 if options.residues:
     fixer.findMissingResidues()
 else:
     fixer.missingResidues = {}
 if options.nonstandard:
     fixer.findNonstandardResidues()
     fixer.replaceNonstandardResidues()
 fixer.findMissingAtoms()
 if options.atoms not in ('all', 'heavy'):
     fixer.missingAtoms = {}
     fixer.missingTerminals = {}
 fixer.addMissingAtoms()
 if options.heterogens == 'none':
     fixer.removeHeterogens(False)
 elif options.heterogens == 'water':
Example #11
0
    def __init__(self, filename=None, file=None, url=None, pdbid=None):
        """Create a new PDBFixer instance to fix problems in a PDB file.
        
        Parameters
        ----------
        filename : str, optional, default=None
            A filename specifying the file from which the PDB file is to be read.
        file : file, optional, default=None
            A file-like object from which the PDB file is to be read.
            The file is not closed after reading.
        url : str, optional, default=None
            A URL specifying the internet location from which the PDB file contents should be retrieved.
        pdbid : str, optional, default=None
            A four-letter PDB code specifying the structure to be retrieved from the RCSB.
            
        Notes
        -----
        Only one of structure, filename, file, url, or pdbid may be specified or an exception will be thrown.
            
        Examples
        --------
        
        Start from a file object.

        >>> pdbid = '1VII'
        >>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
        >>> file = urlopen(url)
        >>> fixer = PDBFixer(file=file)

        Start from a filename.
        
        >>> filename = 'test.pdb'
        >>> file = urlopen(url)
        >>> outfile = open(filename, 'w')
        >>> outfile.write(file.read())
        >>> outfile.close()
        >>> fixer = PDBFixer(filename=filename)
        
        Start from a URL.

        >>> fixer = PDBFixer(url=url)

        Start from a PDB code.
        
        >>> fixer = PDBFixer(pdbid=pdbid)

        """

        # Check to make sure only one option has been specified.
        if bool(filename) + bool(file) + bool(url) + bool(pdbid) != 1:
            raise Exception("Exactly one option [filename, file, url, pdbid] must be specified.")

        self.source = None
        if filename:
            self.source = filename
            # A local file has been specified.
            file = open(filename, 'r')                
            structure = PdbStructure(file)
            file.close()
        elif file:
            # A file-like object has been specified.
            structure = PdbStructure(file)  
        elif url:
            self.source = url
            # A URL has been specified.
            file = urlopen(url)
            structure = PdbStructure(file)
            file.close()
        elif pdbid:
            # A PDB id has been specified.
            url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
            self.source = url
            file = urlopen(url)
            # Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
            contents = file.read()
            lines = contents.split('\n')
            file.close()
            structure = PdbStructure(lines)
            
        # Check the structure has some atoms in it.
        atoms = list(structure.iter_atoms())
        if len(atoms)==0:
            raise Exception("Structure contains no atoms.")
            
        self.structure = structure
        self.pdb = app.PDBFile(structure)
        self.topology = self.pdb.topology
        self.positions = self.pdb.positions
        self.centroid = unit.sum(self.positions)/len(self.positions)
        self.structureChains = list(self.structure.iter_chains())
        
        # Load the templates.
        
        self.templates = {}
        templatesPath = os.path.join(os.path.dirname(__file__), 'templates')
        for file in os.listdir(templatesPath):
            templatePdb = app.PDBFile(os.path.join(templatesPath, file))
            name = next(templatePdb.topology.residues()).name
            self.templates[name] = templatePdb
        
        return