def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] PDBFile._loadNameReplacementTables() # Load the file. inputFile = file ownHandle = False if isinstance(file, str): inputFile = open(file) ownHandle = True reader = PdbxReader(inputFile) data = [] reader.read(data) if ownHandle: inputFile.close() block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('auth_atom_id') if atomNameCol == -1: atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('auth_comp_id') if resNameCol == -1: resNameCol = atomData.getAttributeIndex('label_comp_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') if resNumCol == -1: resNumCol = atomData.getAttributeIndex('label_seq_id') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') chainIdCol = atomData.getAttributeIndex('auth_asym_id') if chainIdCol == -1: chainIdCol = atomData.getAttributeIndex('label_asym_id') altChainIdCol = -1 else: altChainIdCol = atomData.getAttributeIndex('label_asym_id') if altChainIdCol != -1: # Figure out which column is best to use for chain IDs. idSet = set(row[chainIdCol] for row in atomData.getRowList()) altIdSet = set(row[altChainIdCol] for row in atomData.getRowList()) if len(altIdSet) > len(idSet): chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol) elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastAltChainId = None lastResId = None lastInsertionCode = '' atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if resInsertionCol == -1: insertionCode = '' else: insertionCode = row[resInsertionCol] if insertionCode in ('.', '?'): insertionCode = '' if lastChainId != row[chainIdCol] or ( altChainIdCol != -1 and lastAltChainId != row[altChainIdCol]): # The start of a new chain. chain = top.addChain(row[chainIdCol]) lastChainId = row[chainIdCol] lastResId = None if altChainIdCol != -1: lastAltChainId = row[altChainIdCol] if lastResId != row[resNumCol] or lastChainId != row[ chainIdCol] or lastInsertionCode != insertionCode or ( lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. resId = (None if resNumCol == -1 else row[resNumCol]) resIC = insertionCode resName = row[resNameCol] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] res = top.addResidue(resName, chain, resId, resIC) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} lastResId = row[resNumCol] lastInsertionCode = insertionCode atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atomName = row[atomNameCol] if atomName in atomReplacements: atomName = atomReplacements[atomName] atom = top.addAtom(atomName, element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(atomName) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resNumCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file, extraParticleIdentifier='EP'): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string or file the name of the file to load. Alternatively you can pass an open file object. extraParticleIdentifier : string='EP' if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle """ metalElements = [ 'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe', 'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na', 'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V', 'W', 'Yb', 'Zn' ] top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file own_handle = False if isinstance(file, str): inputfile = open(file) own_handle = True pdb = PdbStructure(inputfile, load_all_models=True, extraParticleIdentifier=extraParticleIdentifier) if own_handle: inputfile.close() PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain(chain.chain_id) for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c, str(residue.number), residue.insertion_code) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.iter_atoms(): atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element == 'EP': element = None elif element is None: # Try to guess the element. upper = atomName.upper() while len(upper) > 1 and upper[0].isdigit(): upper = upper[1:] if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif len(residue) == 1 and upper.startswith('CA'): element = elem.calcium elif upper.startswith('D') and any( a.name == atomName[1:] for a in residue.iter_atoms()): pass # A Drude particle else: try: element = elem.get_by_symbol(upper[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r, str(atom.serial_number)) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.iter_atoms(): pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords * nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added. connectBonds = [] for connect in pdb.models[-1].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: if atomByNumber[i].element is not None and atomByNumber[ j].element is not None: if atomByNumber[ i].element.symbol not in metalElements and atomByNumber[ j].element.symbol not in metalElements: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ i].element.symbol in metalElements and atomByNumber[ j].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ j].element.symbol in metalElements and atomByNumber[ i].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) else: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def extract(item, atom_indices='all', structure_indices='all', copy_if_all=True, check=True): if check: digest_item(item, 'openmm.Topology') atom_indices = digest_atom_indices(atom_indices) structure_indices = digest_structure_indices(structure_indices) from openmm.app import Topology if (atom_indices is 'all') and (structure_indices is 'all'): if copy_if_all: new_item = Topology() newAtoms = {} for chain in item.chains(): newChain = new_item.addChain(chain.id) for residue in chain.residues(): newResidue = new_item.addResidue(residue.name, newChain, residue.id, residue.insertionCode) for atom in residue.atoms(): newAtom = new_item.addAtom(atom.name, atom.element, newResidue, atom.id) newAtoms[atom] = newAtom for bond in item.bonds(): new_item.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) del (newAtoms) new_item.setPeriodicBoxVectors(item.getPeriodicBoxVectors()) tmp_item = new_item else: tmp_item = item else: new_item = Topology() atom_indices_to_be_kept = atom_indices newAtoms = {} set_atom_indices = set(atom_indices_to_be_kept) for chain in item.chains(): needNewChain = True for residue in chain.residues(): needNewResidue = True for atom in residue.atoms(): if atom.index in set_atom_indices: if needNewChain: newChain = new_item.addChain(chain.id) needNewChain = False if needNewResidue: newResidue = new_item.addResidue( residue.name, newChain, residue.id, residue.insertionCode) needNewResidue = False newAtom = new_item.addAtom(atom.name, atom.element, newResidue, atom.id) newAtoms[atom] = newAtom for bond in item.bonds(): if bond[0].index in set_atom_indices and bond[ 1].index in set_atom_indices: new_item.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) del (newAtoms) new_item.setPeriodicBoxVectors(item.getPeriodicBoxVectors()) tmp_item = new_item return tmp_item
def _createTopology(self): """Build the topology of the system """ top = Topology() positions = [] velocities = [] boxVectors = [] #assume cell dimensions are set in the first file #the other molecules inherit the same cell conn = self._conn[0] for x, y, z in conn.execute('SELECT x, y, z FROM global_cell'): boxVectors.append(mm.Vec3(x, y, z)) unitCellDimensions = [ boxVectors[0][0], boxVectors[1][1], boxVectors[2][2] ] top.setUnitCellDimensions(unitCellDimensions * angstrom) #process each file nfiles = len(self._conn) for (fcounter, conn, tables) in zip(range(0, nfiles), self._conn, self._tables): atoms = {} lastChain = None lastResId = None c = top.addChain() q = """SELECT id, name, anum, resname, resid, chain, x, y, z, vx, vy, vz FROM particle ORDER BY id""" for (atomId, atomName, atomNumber, resName, resId, chain, x, y, z, vx, vy, vz) in conn.execute(q): newChain = False if chain != lastChain: lastChain = chain c = top.addChain() newChain = True if resId != lastResId or newChain: lastResId = resId if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} if atomNumber == 0 and atomName.startswith('Vrt'): elem = None else: elem = Element.getByAtomicNumber(atomNumber) if atomName in atomReplacements: atomName = atomReplacements[atomName] atoms[atomId] = top.addAtom(atomName, elem, r) positions.append(mm.Vec3(x, y, z)) velocities.append(mm.Vec3(vx, vy, vz)) self._natoms[fcounter] = len(atoms) for p0, p1 in conn.execute('SELECT p0, p1 FROM bond'): top.addBond(atoms[p0], atoms[p1]) positions = positions * angstrom velocities = velocities * angstrom / femtosecond return top, positions, velocities