def __init__(self, file): """Load a prmtop file.""" top = Topology() ## The Topology read from the prmtop file self.topology = top # Load the prmtop file prmtop = amber_file_parser.PrmtopLoader(file) self._prmtop = prmtop # Add atoms to the topology PDBFile._loadNameReplacementTables() lastResidue = None c = top.addChain() for index in range(prmtop.getNumAtoms()): resNumber = prmtop.getResidueNumber(index) if resNumber != lastResidue: lastResidue = resNumber resName = prmtop.getResidueLabel(iAtom=index).strip() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = prmtop.getAtomName(index).strip() if atomName in atomReplacements: atomName = atomReplacements[atomName] # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None top.addAtom(atomName, element, r) # Add bonds to the topology atoms = list(top.atoms()) for bond in prmtop.getBondsWithH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) for bond in prmtop.getBondsNoH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) # Set the periodic box size. if prmtop.getIfBox(): top.setUnitCellDimensions(tuple(x.value_in_unit(unit.nanometer) for x in prmtop.getBoxBetaAndDimensions()[1:4])*unit.nanometer)
def __init__(self, file, unitCellDimensions=None, includeDir='/usr/local/gromacs/share/gromacs/top', defines={}): """Load a top file. Parameters: - file (string) the name of the file to load - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell - includeDir (string=/usr/local/gromacs/share/gromacs/top) a directory in which to look for other files included from the top file - defines (map={}) preprocessor definitions that should be predefined when parsing the file """ self._includeDirs = (os.path.dirname(file), includeDir) self._defines = defines # Parse the file. self._currentCategory = None self._ifStack = [] self._moleculeTypes = {} self._molecules = [] self._currentMoleculeType = None self._atomTypes = {} self._bondTypes = {} self._angleTypes = {} self._dihedralTypes = {} self._implicitTypes = {} self._pairTypes = {} self._cmapTypes = {} self._processFile(file) # Create the Topology from it. top = Topology() ## The Topology read from the prmtop file self.topology = top top.setUnitCellDimensions(unitCellDimensions) PDBFile._loadNameReplacementTables() for moleculeName, moleculeCount in self._molecules: if moleculeName not in self._moleculeTypes: raise ValueError("Unknown molecule type: " + moleculeName) moleculeType = self._moleculeTypes[moleculeName] # Create the specified number of molecules of this type. for i in range(moleculeCount): atoms = [] lastResidue = None c = top.addChain() for index, fields in enumerate(moleculeType.atoms): resNumber = fields[2] if resNumber != lastResidue: lastResidue = resNumber resName = fields[3] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} atomName = fields[4] if atomName in atomReplacements: atomName = atomReplacements[atomName] # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None atoms.append(top.addAtom(atomName, element, r)) # Add bonds to the topology for fields in moleculeType.bonds: top.addBond(atoms[int(fields[0]) - 1], atoms[int(fields[1]) - 1])
def __init__(self, file, periodicBoxVectors=None, unitCellDimensions=None, includeDir=None, defines=None): """Load a top file. Parameters: - file (string) the name of the file to load - periodicBoxVectors (tuple of Vec3=None) the vectors defining the periodic box - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell. For non-rectangular unit cells, specify periodicBoxVectors instead. - includeDir (string=None) A directory in which to look for other files included from the top file. If not specified, we will attempt to locate a gromacs installation on your system. When gromacs is installed in /usr/local, this will resolve to /usr/local/gromacs/share/gromacs/top - defines (dict={}) preprocessor definitions that should be predefined when parsing the file """ if includeDir is None: includeDir = _defaultGromacsIncludeDir() self._includeDirs = (os.path.dirname(file), includeDir) # Most of the gromacs water itp files for different forcefields, # unless the preprocessor #define FLEXIBLE is given, don't define # bonds between the water hydrogen and oxygens, but only give the # constraint distances and exclusions. self._defines = {'FLEXIBLE': True} if defines is not None: self._defines.update(defines) # Parse the file. self._currentCategory = None self._ifStack = [] self._elseStack = [] self._moleculeTypes = {} self._molecules = [] self._currentMoleculeType = None self._atomTypes = {} self._bondTypes= {} self._angleTypes = {} self._dihedralTypes = {} self._implicitTypes = {} self._pairTypes = {} self._cmapTypes = {} self._processFile(file) # Create the Topology from it. top = Topology() ## The Topology read from the prmtop file self.topology = top if periodicBoxVectors is not None: if unitCellDimensions is not None: raise ValueError("specify either periodicBoxVectors or unitCellDimensions, but not both") top.setPeriodicBoxVectors(periodicBoxVectors) else: top.setUnitCellDimensions(unitCellDimensions) PDBFile._loadNameReplacementTables() for moleculeName, moleculeCount in self._molecules: if moleculeName not in self._moleculeTypes: raise ValueError("Unknown molecule type: "+moleculeName) moleculeType = self._moleculeTypes[moleculeName] # Create the specified number of molecules of this type. for i in range(moleculeCount): atoms = [] lastResidue = None c = top.addChain() for index, fields in enumerate(moleculeType.atoms): resNumber = fields[2] if resNumber != lastResidue: lastResidue = resNumber resName = fields[3] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = fields[4] if atomName in atomReplacements: atomName = atomReplacements[atomName] # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None atoms.append(top.addAtom(atomName, element, r)) # Add bonds to the topology for fields in moleculeType.bonds: top.addBond(atoms[int(fields[0])-1], atoms[int(fields[1])-1])
def __init__(self, file): """Load a prmtop file.""" top = Topology() ## The Topology read from the prmtop file self.topology = top self.elements = [] # Load the prmtop file prmtop = amber_file_parser.PrmtopLoader(file) self._prmtop = prmtop # Add atoms to the topology PDBFile._loadNameReplacementTables() lastResidue = None c = top.addChain() for index in range(prmtop.getNumAtoms()): resNumber = prmtop.getResidueNumber(index) if resNumber != lastResidue: lastResidue = resNumber resName = prmtop.getResidueLabel(iAtom=index).strip() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = prmtop.getAtomName(index).strip() if atomName in atomReplacements: atomName = atomReplacements[atomName] # Get the element from the prmtop file if available if prmtop.has_atomic_number: try: element = elem.Element.getByAtomicNumber(int(prmtop._raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None top.addAtom(atomName, element, r) self.elements.append(element) # Add bonds to the topology atoms = list(top.atoms()) for bond in prmtop.getBondsWithH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) for bond in prmtop.getBondsNoH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) # Set the periodic box size. if prmtop.getIfBox(): box = prmtop.getBoxBetaAndDimensions() top.setPeriodicBoxVectors(computePeriodicBoxVectors(*(box[1:4] + box[0:1]*3)))
def __init__(self, file, unitCellDimensions=None, includeDir=None, defines=None): """Load a top file. Parameters: - file (string) the name of the file to load - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell - includeDir (string=None) A directory in which to look for other files included from the top file. If not specified, we will attempt to locate a gromacs installation on your system. When gromacs is installed in /usr/local, this will resolve to /usr/local/gromacs/share/gromacs/top - defines (dict={}) preprocessor definitions that should be predefined when parsing the file """ if includeDir is None: includeDir = _defaultGromacsIncludeDir() self._includeDirs = (os.path.dirname(file), includeDir) # Most of the gromacs water itp files for different forcefields, # unless the preprocessor #define FLEXIBLE is given, don't define # bonds between the water hydrogen and oxygens, but only give the # constraint distances and exclusions. self._defines = {'FLEXIBLE': True} if defines is not None: self._defines.update(defines) # Parse the file. self._currentCategory = None self._ifStack = [] self._elseStack = [] self._moleculeTypes = {} self._molecules = [] self._currentMoleculeType = None self._atomTypes = {} self._bondTypes = {} self._angleTypes = {} self._dihedralTypes = {} self._implicitTypes = {} self._pairTypes = {} self._cmapTypes = {} self._processFile(file) # Create the Topology from it. top = Topology() ## The Topology read from the prmtop file self.topology = top top.setUnitCellDimensions(unitCellDimensions) PDBFile._loadNameReplacementTables() for moleculeName, moleculeCount in self._molecules: if moleculeName not in self._moleculeTypes: raise ValueError("Unknown molecule type: " + moleculeName) moleculeType = self._moleculeTypes[moleculeName] # Create the specified number of molecules of this type. for i in range(moleculeCount): atoms = [] lastResidue = None c = top.addChain() for index, fields in enumerate(moleculeType.atoms): resNumber = fields[2] if resNumber != lastResidue: lastResidue = resNumber resName = fields[3] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} atomName = fields[4] if atomName in atomReplacements: atomName = atomReplacements[atomName] # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None atoms.append(top.addAtom(atomName, element, r)) # Add bonds to the topology for fields in moleculeType.bonds: top.addBond(atoms[int(fields[0]) - 1], atoms[int(fields[1]) - 1])
def __init__(self, file): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load """ top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file if isinstance(file, str): inputfile = open(file) pdb = PdbStructure(inputfile, load_all_models=True) PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain() for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.atoms: atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element is None: # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif (len(residue) == 1 and upper.startswith('CA')): element = elem.calcium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.atoms: pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords * nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. connectBonds = [] for connect in pdb.models[0].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file, unitCellDimensions=None, includeDir='/usr/local/gromacs/share/gromacs/top', defines={}): """Load a top file. Parameters: - file (string) the name of the file to load - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell - includeDir (string=/usr/local/gromacs/share/gromacs/top) a directory in which to look for other files included from the top file - defines (map={}) preprocessor definitions that should be predefined when parsing the file """ self._includeDirs = (os.path.dirname(file), includeDir) self._defines = defines # Parse the file. self._currentCategory = None self._ifStack = [] self._moleculeTypes = {} self._molecules = [] self._currentMoleculeType = None self._atomTypes = {} self._bondTypes= {} self._angleTypes = {} self._dihedralTypes = {} self._implicitTypes = {} self._pairTypes = {} self._cmapTypes = {} self._processFile(file) # Create the Topology from it. top = Topology() ## The Topology read from the prmtop file self.topology = top top.setUnitCellDimensions(unitCellDimensions) PDBFile._loadNameReplacementTables() for moleculeName, moleculeCount in self._molecules: if moleculeName not in self._moleculeTypes: raise ValueError("Unknown molecule type: "+moleculeName) moleculeType = self._moleculeTypes[moleculeName] # Create the specified number of molecules of this type. for i in range(moleculeCount): atoms = [] lastResidue = None c = top.addChain() for index, fields in enumerate(moleculeType.atoms): resNumber = fields[2] if resNumber != lastResidue: lastResidue = resNumber resName = fields[3] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = fields[4] if atomName in atomReplacements: atomName = atomReplacements[atomName] # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None atoms.append(top.addAtom(atomName, element, r)) # Add bonds to the topology for fields in moleculeType.bonds: top.addBond(atoms[int(fields[0])-1], atoms[int(fields[1])-1])
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('label_comp_id') resIdCol = atomData.getAttributeIndex('label_seq_id') asymIdCol = atomData.getAttributeIndex('label_asym_id') chainIdCol = atomData.getAttributeIndex('label_entity_id') elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastResId = None lastAsymId = None atomTable = {} models = [] for row in atomData.getRowList(): atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len(self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain() lastChainId = row[chainIdCol] lastResId = None lastAsymId = None if lastResId != row[resIdCol] or lastAsymId != row[asymIdCol]: # The start of a new residue. res = top.addResidue(row[resNameCol], chain) lastResId = row[resIdCol] if lastResId == '.': lastResId = None lastAsymId = row[asymIdCol] element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res) atomTable[atomKey] = atom else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError('Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError('Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i]*nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) cellSize = [float(row[cell.getAttributeIndex(attribute)]) for attribute in ('length_a', 'length_b', 'length_c')]*angstroms self.topology.setUnitCellDimensions(cellSize) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load """ top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file if isinstance(file, str): inputfile = open(file) pdb = PdbStructure(inputfile, load_all_models=True) PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain() for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.atoms: atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element is None: # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif( len( residue ) == 1 and upper.startswith('CA') ): element = elem.calcium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.atoms: pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords*nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. connectBonds = [] for connect in pdb.models[0].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('label_comp_id') resIdCol = atomData.getAttributeIndex('label_seq_id') asymIdCol = atomData.getAttributeIndex('label_asym_id') chainIdCol = atomData.getAttributeIndex('label_entity_id') elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastResId = None lastAsymId = None atomTable = {} models = [] for row in atomData.getRowList(): atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain() lastChainId = row[chainIdCol] lastResId = None lastAsymId = None if lastResId != row[resIdCol] or lastAsymId != row[asymIdCol]: # The start of a new residue. res = top.addResidue(row[resNameCol], chain) lastResId = row[resIdCol] if lastResId == '.': lastResId = None lastAsymId = row[asymIdCol] element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res) atomTable[atomKey] = atom else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) cellSize = [ float(row[cell.getAttributeIndex(attribute)]) for attribute in ('length_a', 'length_b', 'length_c') ] * angstroms self.topology.setUnitCellDimensions(cellSize) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, pdb_line): """Create a new pdb.Atom from an ATOM or HETATM line. Example line: ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C 00000000011111111112222222222333333333344444444445555555555666666666677777777778 12345678901234567890123456789012345678901234567890123456789012345678901234567890 ATOM line format description from http://deposit.rcsb.org/adit/docs/pdb_atom_format.html: COLUMNS DATA TYPE CONTENTS -------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer Atom serial number. 13 - 16 Atom Atom name. 17 Character Alternate location indicator. 18 - 20 Residue name Residue name. 22 Character Chain identifier. 23 - 26 Integer Residue sequence number. 27 AChar Code for insertion of residues. 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) Occupancy (Default = 1.0). 61 - 66 Real(6.2) Temperature factor (Default = 0.0). 73 - 76 LString(4) Segment identifier, left-justified. 77 - 78 LString(2) Element symbol, right-justified. 79 - 80 LString(2) Charge on the atom. """ # We might modify first/final status during _finalize() methods self.is_first_atom_in_chain = False self.is_final_atom_in_chain = False self.is_first_residue_in_chain = False self.is_final_residue_in_chain = False # Start parsing fields from pdb line self.record_name = pdb_line[0:6].strip() self.serial_number = int(pdb_line[6:11]) self.name_with_spaces = pdb_line[12:16] alternate_location_indicator = pdb_line[16] self.residue_name_with_spaces = pdb_line[17:20] # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in # column 21 possible_fourth_character = pdb_line[20:21] if possible_fourth_character != " ": # Fourth character should only be there if official 3 are already full if len(self.residue_name_with_spaces.strip()) != 3: raise ValueError('Misaligned residue name: %s' % pdb_line) self.residue_name_with_spaces += possible_fourth_character self.residue_name = self.residue_name_with_spaces.strip() self.chain_id = pdb_line[21] self.residue_number = int(pdb_line[22:26]) self.insertion_code = pdb_line[26] # coordinates, occupancy, and temperature factor belong in Atom.Location object x = float(pdb_line[30:38]) y = float(pdb_line[38:46]) z = float(pdb_line[46:54]) try: occupancy = float(pdb_line[54:60]) except: occupancy = 1.0 try: temperature_factor = float(pdb_line[60:66]) except: temperature_factor = 0.0 self.locations = {} loc = Atom.Location(alternate_location_indicator, np.array([x,y,z]), occupancy, temperature_factor, self.residue_name_with_spaces) self.locations[alternate_location_indicator] = loc self.default_location_id = alternate_location_indicator # segment id, element_symbol, and formal_charge are not always present self.segment_id = pdb_line[72:76].strip() self.element_symbol = pdb_line[76:78].strip() try: self.formal_charge = int(pdb_line[78:80]) except ValueError: self.formal_charge = None # figure out atom element try: # First try to find a sensible element symbol from columns 76-77 self.element = element.get_by_symbol(self.element_symbol) except KeyError: # otherwise, deduce element from first two characters of atom name # remove digits found in some hydrogen atom names symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789") try: # Some molecular dynamics PDB files, such as gromacs with ffamber force # field, include 4-character hydrogen atom names beginning with "H". # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four # characters in the atom name. This problem is the fault of molecular # dynamics code authors who feel the need to make up their own atom # nomenclature because it is too tedious to read that provided by the PDB. # These are the same folks who invent their own meanings for biochemical terms # like "dipeptide". Clowntards. if len(self.name) == 4 and self.name[0:1] == "H": self.element = element.hydrogen else: self.element = element.get_by_symbol(symbol) except KeyError: # OK, I give up self.element = None
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj("atom_site") atomNameCol = atomData.getAttributeIndex("label_atom_id") atomIdCol = atomData.getAttributeIndex("id") resNameCol = atomData.getAttributeIndex("label_comp_id") resIdCol = atomData.getAttributeIndex("label_seq_id") resNumCol = atomData.getAttributeIndex("auth_seq_id") asymIdCol = atomData.getAttributeIndex("label_asym_id") chainIdCol = atomData.getAttributeIndex("label_entity_id") elementCol = atomData.getAttributeIndex("type_symbol") altIdCol = atomData.getAttributeIndex("label_alt_id") modelCol = atomData.getAttributeIndex("pdbx_PDB_model_num") xCol = atomData.getAttributeIndex("Cartn_x") yCol = atomData.getAttributeIndex("Cartn_y") zCol = atomData.getAttributeIndex("Cartn_z") lastChainId = None lastResId = None lastAsymId = None atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = (row[resIdCol], row[asymIdCol], row[atomNameCol]) model = "1" if modelCol == -1 else row[modelCol] if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if ( row[altIdCol] != "." and atomKey in atomTable and len(self._positions[modelIndex]) > atomTable[atomKey].index ): # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain(row[asymIdCol]) lastChainId = row[chainIdCol] lastResId = None lastAsymId = None if ( lastResId != row[resIdCol] or lastAsymId != row[asymIdCol] or (lastResId == "." and row[atomNameCol] in atomsInResidue) ): # The start of a new residue. res = top.addResidue(row[resNameCol], chain, None if resNumCol == -1 else row[resNumCol]) lastResId = row[resIdCol] lastAsymId = row[asymIdCol] atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(row[atomNameCol]) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( "Unknown atom %s in residue %s %s for model %s" % (row[atomNameCol], row[resNameCol], row[resIdCol], model) ) if atom.index != len(self._positions[modelIndex]): raise ValueError( "Atom %s for model %s does not match the order of atoms for model %s" % (row[atomIdCol], model, models[0]) ) self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj("cell") if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ("length_a", "length_b", "length_c") ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ("angle_alpha", "angle_beta", "angle_gamma") ] self.topology.setPeriodicBoxVectors(computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj("struct_conn") if connectData is not None: res1Col = connectData.getAttributeIndex("ptnr1_label_seq_id") res2Col = connectData.getAttributeIndex("ptnr2_label_seq_id") atom1Col = connectData.getAttributeIndex("ptnr1_label_atom_id") atom2Col = connectData.getAttributeIndex("ptnr2_label_atom_id") asym1Col = connectData.getAttributeIndex("ptnr1_label_asym_id") asym2Col = connectData.getAttributeIndex("ptnr2_label_asym_id") typeCol = connectData.getAttributeIndex("conn_type_id") connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ("covale", "disulf", "modres"): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, pdb_line): """Create a new pdb.Atom from an ATOM or HETATM line. Example line: ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C 00000000011111111112222222222333333333344444444445555555555666666666677777777778 12345678901234567890123456789012345678901234567890123456789012345678901234567890 ATOM line format description from http://deposit.rcsb.org/adit/docs/pdb_atom_format.html: COLUMNS DATA TYPE CONTENTS -------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer Atom serial number. 13 - 16 Atom Atom name. 17 Character Alternate location indicator. 18 - 20 Residue name Residue name. 22 Character Chain identifier. 23 - 26 Integer Residue sequence number. 27 AChar Code for insertion of residues. 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) Occupancy (Default = 1.0). 61 - 66 Real(6.2) Temperature factor (Default = 0.0). 73 - 76 LString(4) Segment identifier, left-justified. 77 - 78 LString(2) Element symbol, right-justified. 79 - 80 LString(2) Charge on the atom. """ # We might modify first/final status during _finalize() methods self.is_first_atom_in_chain = False self.is_final_atom_in_chain = False self.is_first_residue_in_chain = False self.is_final_residue_in_chain = False # Start parsing fields from pdb line self.record_name = pdb_line[0:6].strip() self.serial_number = int(pdb_line[6:11]) self.name_with_spaces = pdb_line[12:16] alternate_location_indicator = pdb_line[16] self.residue_name_with_spaces = pdb_line[17:20] # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in # column 21 possible_fourth_character = pdb_line[20:21] if possible_fourth_character != " ": # Fourth character should only be there if official 3 are already full if len(self.residue_name_with_spaces.strip()) != 3: raise ValueError('Misaligned residue name: %s' % pdb_line) self.residue_name_with_spaces += possible_fourth_character self.residue_name = self.residue_name_with_spaces.strip() self.chain_id = pdb_line[21] self.residue_number = int(pdb_line[22:26]) self.insertion_code = pdb_line[26] # coordinates, occupancy, and temperature factor belong in Atom.Location object x = float(pdb_line[30:38]) y = float(pdb_line[38:46]) z = float(pdb_line[46:54]) try: occupancy = float(pdb_line[54:60]) except: occupancy = 1.0 try: temperature_factor = float(pdb_line[60:66]) except: temperature_factor = 0.0 self.locations = {} loc = Atom.Location(alternate_location_indicator, np.array([x, y, z]), occupancy, temperature_factor, self.residue_name_with_spaces) self.locations[alternate_location_indicator] = loc self.default_location_id = alternate_location_indicator # segment id, element_symbol, and formal_charge are not always present self.segment_id = pdb_line[72:76].strip() self.element_symbol = pdb_line[76:78].strip() try: self.formal_charge = int(pdb_line[78:80]) except ValueError: self.formal_charge = None # figure out atom element try: # First try to find a sensible element symbol from columns 76-77 self.element = element.get_by_symbol(self.element_symbol) except KeyError: # otherwise, deduce element from first two characters of atom name # remove digits found in some hydrogen atom names symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789") try: # Some molecular dynamics PDB files, such as gromacs with ffamber force # field, include 4-character hydrogen atom names beginning with "H". # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four # characters in the atom name. This problem is the fault of molecular # dynamics code authors who feel the need to make up their own atom # nomenclature because it is too tedious to read that provided by the PDB. # These are the same folks who invent their own meanings for biochemical terms # like "dipeptide". Clowntards. if len(self.name) == 4 and self.name[0:1] == "H": self.element = element.hydrogen else: self.element = element.get_by_symbol(symbol) except KeyError: # OK, I give up self.element = None
def __init__(self, file): """Load a prmtop file.""" top = Topology() ## The Topology read from the prmtop file self.topology = top self.elements = [] # Load the prmtop file prmtop = amber_file_parser.PrmtopLoader(file) self._prmtop = prmtop # Add atoms to the topology PDBFile._loadNameReplacementTables() lastResidue = None c = top.addChain() for index in range(prmtop.getNumAtoms()): resNumber = prmtop.getResidueNumber(index) if resNumber != lastResidue: lastResidue = resNumber resName = prmtop.getResidueLabel(iAtom=index).strip() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = prmtop.getAtomName(index).strip() if atomName in atomReplacements: atomName = atomReplacements[atomName] # Get the element from the prmtop file if available if prmtop.has_atomic_number: try: element = elem.Element.getByAtomicNumber( int(prmtop._raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None top.addAtom(atomName, element, r) self.elements.append(element) # Add bonds to the topology atoms = list(top.atoms()) for bond in prmtop.getBondsWithH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) for bond in prmtop.getBondsNoH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) # Set the periodic box size. if prmtop.getIfBox(): top.setUnitCellDimensions( tuple( x.value_in_unit(unit.nanometer) for x in prmtop.getBoxBetaAndDimensions()[1:4]) * unit.nanometer)
def __init__(self, file): """Load a .gro file. The atom positions can be retrieved by calling getPositions(). Parameters: - file (string) the name of the file to load """ xyzs = [] elements = [] # The element, most useful for quantum chemistry calculations atomname = [] # The atom name, for instance 'HW1' comms = [] resid = [] resname = [] boxes = [] xyz = [] ln = 0 frame = 0 for line in open(file): if ln == 0: comms.append(line.strip()) elif ln == 1: na = int(line.strip()) elif _is_gro_coord(line): if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame. (thisresnum, thisresname, thisatomname) = [line[i*5:i*5+5].strip() for i in range(3)] resname.append(thisresname) resid.append(int(thisresnum)) atomname.append(thisatomname) thiselem = thisatomname if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:]) try: elements.append(elem.get_by_symbol(thiselem)) except KeyError: elements.append(None) firstDecimalPos = line.index('.', 20) secondDecimalPos = line.index('.', firstDecimalPos+1) digits = secondDecimalPos-firstDecimalPos pos = [float(line[20+i*digits:20+(i+1)*digits]) for i in range(3)] xyz.append(Vec3(pos[0], pos[1], pos[2])) elif _is_gro_box(line) and ln == na + 2: sline = line.split() boxes.append(tuple([float(i) for i in sline])*nanometers) xyzs.append(xyz*nanometers) xyz = [] ln = -1 frame += 1 else: raise Exception("Unexpected line in .gro file: "+line) ln += 1 ## The atom positions read from the file. If the file contains multiple frames, these are the positions in the first frame. self.positions = xyzs[0] ## A list containing the element of each atom stored in the file self.elements = elements ## A list containing the name of each atom stored in the file self.atomNames = atomname ## A list containing the ID of the residue that each atom belongs to self.residueIds = resid ## A list containing the name of the residue that each atom belongs to self.residueNames = resname self._positions = xyzs self._unitCellDimensions = boxes self._numpyPositions = None
def __init__(self, file): """Load a .gro file. The atom positions can be retrieved by calling getPositions(). Parameters: - file (string) the name of the file to load """ xyzs = [] elements = [ ] # The element, most useful for quantum chemistry calculations atomname = [] # The atom name, for instance 'HW1' comms = [] resid = [] resname = [] boxes = [] xyz = [] ln = 0 frame = 0 for line in open(file): if ln == 0: comms.append(line.strip()) elif ln == 1: na = int(line.strip()) elif _is_gro_coord(line): if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame. (thisresnum, thisresname, thisatomname) = [ line[i * 5:i * 5 + 5].strip() for i in range(3) ] resname.append(thisresname) resid.append(int(thisresnum)) atomname.append(thisatomname) thiselem = thisatomname if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]', '', thiselem[1:]) try: elements.append(elem.get_by_symbol(thiselem)) except KeyError: elements.append(None) firstDecimalPos = line.index('.', 20) secondDecimalPos = line.index('.', firstDecimalPos + 1) digits = secondDecimalPos - firstDecimalPos pos = [ float(line[20 + i * digits:20 + (i + 1) * digits]) for i in range(3) ] xyz.append(Vec3(pos[0], pos[1], pos[2])) elif _is_gro_box(line) and ln == na + 2: sline = line.split() boxes.append(_construct_box_vectors(line)) xyzs.append(xyz * nanometers) xyz = [] ln = -1 frame += 1 else: raise Exception("Unexpected line in .gro file: " + line) ln += 1 ## The atom positions read from the file. If the file contains multiple frames, these are the positions in the first frame. self.positions = xyzs[0] ## A list containing the element of each atom stored in the file self.elements = elements ## A list containing the name of each atom stored in the file self.atomNames = atomname ## A list containing the ID of the residue that each atom belongs to self.residueIds = resid ## A list containing the name of the residue that each atom belongs to self.residueNames = resname self._positions = xyzs self._periodicBoxVectors = boxes self._numpyPositions = None
def __init__(self, file): """Load a .gro file. The atom positions can be retrieved by calling getPositions(). Parameters: - file (string) the name of the file to load """ xyzs = [] elements = [] # The element, most useful for quantum chemistry calculations atomname = [] # The atom name, for instance 'HW1' comms = [] resid = [] resname = [] boxes = [] xyz = [] ln = 0 frame = 0 for line in open(file): sline = line.split() if ln == 0: comms.append(line.strip()) elif ln == 1: na = int(line.strip()) elif _is_gro_coord(line): if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame. # Name of the residue, for instance '153SOL1 -> SOL1' ; strips leading numbers thisresname = sub('^[0-9]*','',sline[0]) resname.append(thisresname) resid.append(int(sline[0].replace(thisresname,''))) atomname.append(sline[1]) thiselem = sline[1] if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:]) try: elements.append(elem.get_by_symbol(thiselem)) except KeyError: elements.append(None) pos = [float(i) for i in sline[-3:]] xyz.append(Vec3(pos[0], pos[1], pos[2])) elif _is_gro_box(line) and ln == na + 2: boxes.append([float(i) for i in sline]*nanometers) xyzs.append(xyz*nanometers) xyz = [] ln = -1 frame += 1 else: raise Exception("Unexpected line in .gro file: "+line) ln += 1 ## The atom positions read from the file. If the file contains multiple frames, these are the positions in the first frame. self.positions = xyzs[0] ## A list containing the element of each atom stored in the file self.elements = elements ## A list containing the name of each atom stored in the file self.atomNames = atomname ## A list containing the ID of the residue that each atom belongs to self.residueIds = resid ## A list containing the name of the residue that each atom belongs to self.residueNames = resname self._positions = xyzs self._unitCellDimensions = boxes self._numpyPositions = None