def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] PDBFile._loadNameReplacementTables() # Load the file. inputFile = file ownHandle = False if isinstance(file, str): inputFile = open(file) ownHandle = True reader = PdbxReader(inputFile) data = [] reader.read(data) if ownHandle: inputFile.close() block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('auth_atom_id') if atomNameCol == -1: atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('auth_comp_id') if resNameCol == -1: resNameCol = atomData.getAttributeIndex('label_comp_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') if resNumCol == -1: resNumCol = atomData.getAttributeIndex('label_seq_id') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') chainIdCol = atomData.getAttributeIndex('auth_asym_id') if chainIdCol == -1: chainIdCol = atomData.getAttributeIndex('label_asym_id') altChainIdCol = -1 else: altChainIdCol = atomData.getAttributeIndex('label_asym_id') if altChainIdCol != -1: # Figure out which column is best to use for chain IDs. idSet = set(row[chainIdCol] for row in atomData.getRowList()) altIdSet = set(row[altChainIdCol] for row in atomData.getRowList()) if len(altIdSet) > len(idSet): chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol) elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastAltChainId = None lastResId = None lastInsertionCode = '' atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if resInsertionCol == -1: insertionCode = '' else: insertionCode = row[resInsertionCol] if insertionCode in ('.', '?'): insertionCode = '' if lastChainId != row[chainIdCol] or ( altChainIdCol != -1 and lastAltChainId != row[altChainIdCol]): # The start of a new chain. chain = top.addChain(row[chainIdCol]) lastChainId = row[chainIdCol] lastResId = None if altChainIdCol != -1: lastAltChainId = row[altChainIdCol] if lastResId != row[resNumCol] or lastChainId != row[ chainIdCol] or lastInsertionCode != insertionCode or ( lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. resId = (None if resNumCol == -1 else row[resNumCol]) resIC = insertionCode resName = row[resNameCol] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] res = top.addResidue(resName, chain, resId, resIC) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} lastResId = row[resNumCol] lastInsertionCode = insertionCode atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atomName = row[atomNameCol] if atomName in atomReplacements: atomName = atomReplacements[atomName] atom = top.addAtom(atomName, element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(atomName) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resNumCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file, extraParticleIdentifier='EP'): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string or file the name of the file to load. Alternatively you can pass an open file object. extraParticleIdentifier : string='EP' if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle """ metalElements = [ 'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe', 'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na', 'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V', 'W', 'Yb', 'Zn' ] top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file own_handle = False if isinstance(file, str): inputfile = open(file) own_handle = True pdb = PdbStructure(inputfile, load_all_models=True, extraParticleIdentifier=extraParticleIdentifier) if own_handle: inputfile.close() PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain(chain.chain_id) for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c, str(residue.number), residue.insertion_code) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.iter_atoms(): atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element == 'EP': element = None elif element is None: # Try to guess the element. upper = atomName.upper() while len(upper) > 1 and upper[0].isdigit(): upper = upper[1:] if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif len(residue) == 1 and upper.startswith('CA'): element = elem.calcium elif upper.startswith('D') and any( a.name == atomName[1:] for a in residue.iter_atoms()): pass # A Drude particle else: try: element = elem.get_by_symbol(upper[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r, str(atom.serial_number)) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.iter_atoms(): pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords * nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added. connectBonds = [] for connect in pdb.models[-1].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: if atomByNumber[i].element is not None and atomByNumber[ j].element is not None: if atomByNumber[ i].element.symbol not in metalElements and atomByNumber[ j].element.symbol not in metalElements: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ i].element.symbol in metalElements and atomByNumber[ j].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ j].element.symbol in metalElements and atomByNumber[ i].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) else: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)