def testUpdateDataFile(self): """Test case - update data file """ self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # Create a initial data file -- # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) #self.lfh.write("Assigned data category state-----------------\n") #aCat.dumpIt(fh=self.lfh) curContainer.append(aCat) myDataList.append(curContainer) ofh = open("test-output-1.cif", "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # # # Read and update the data - # myDataList = [] ifh = open("test-output-1.cif", "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() # myBlock = myDataList[0] myBlock.printIt() myCat = myBlock.getObj('pdbx_seqtool_mapping_ref') myCat.printIt() for iRow in xrange(0, myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id', iRow) myCat.setValue(100, 'ref_mon_num', iRow) ofh = open("test-output-2.cif", "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # except: traceback.print_exc(file=self.lfh) self.fail()
def testUpdateDataFile(self): """Test case - update data file """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # Create a initial data file -- # myDataList=[] curContainer=DataContainer("myblock") aCat=DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9,2,3,4,5,6,7]) aCat.append([10,2,3,4,5,6,7]) aCat.append([11,2,3,4,5,6,7]) aCat.append([12,2,3,4,5,6,7]) #self.lfh.write("Assigned data category state-----------------\n") #aCat.dumpIt(fh=self.lfh) curContainer.append(aCat) myDataList.append(curContainer) ofh = open("test-output-1.cif", "w") pdbxW=PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # # # Read and update the data - # myDataList=[] ifh = open("test-output-1.cif", "r") pRd=PdbxReader(ifh) pRd.read(myDataList) ifh.close() # myBlock=myDataList[0] myBlock.printIt() myCat=myBlock.getObj('pdbx_seqtool_mapping_ref') myCat.printIt() for iRow in xrange(0,myCat.getRowCount()): myCat.setValue('some value', 'ref_mon_id',iRow) myCat.setValue(100, 'ref_mon_num',iRow) ofh = open("test-output-2.cif", "w") pdbxW=PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # except: traceback.print_exc(file=self.lfh) self.fail()
def testSimpleInitialization(self): """Test case - Simple initialization of a data category and data block """ self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # fn = "test-simple.cif" attributeNameList = [ 'aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen' ] rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] nameCat = 'myCategory' # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.printIt() curContainer.append(aCat) curContainer.printIt() # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() self.lfh.write("Recovered data category %s\n" % name) self.lfh.write("Attribute list %r\n" % repr(aList)) self.lfh.write("Row list %r\n" % repr(rList)) except: traceback.print_exc(file=self.lfh) self.fail()
def testReadSFDataFile(self): """Test case - read PDB structure factor data file and compute statistics on f/sig(f). """ self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # myContainerList = [] ifh = open(self.pathSFDataFile, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) c0 = myContainerList[0] # catObj = c0.getObj("refln") if catObj is None: return false nRows = catObj.getRowCount() # # Get column name index. # itDict = {} itNameList = catObj.getItemNameList() for idxIt, itName in enumerate(itNameList): itDict[str(itName).lower()] = idxIt # idf = itDict['_refln.f_meas_au'] idsigf = itDict['_refln.f_meas_sigma_au'] minR = 100 maxR = -1 sumR = 0 icount = 0 for row in catObj.getRowList(): try: f = float(row[idf]) sigf = float(row[idsigf]) ratio = sigf / f #self.lfh.write(" %f %f %f\n" % (f,sigf,ratio)) maxR = max(maxR, ratio) minR = min(minR, ratio) sumR += ratio icount += 1 except: continue ifh.close() self.lfh.write("f/sig(f) min %f max %f avg %f count %d\n" % (minR, maxR, sumR / icount, icount)) except: traceback.print_exc(file=sys.stderr) self.fail()
def testReadSFDataFile(self): """Test case - read PDB structure factor data file and compute statistics on f/sig(f). """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # myContainerList=[] ifh = open(self.pathSFDataFile, "r") pRd=PdbxReader(ifh) pRd.read(myContainerList) c0=myContainerList[0] # catObj=c0.getObj("refln") if catObj is None: return false nRows=catObj.getRowCount() # # Get column name index. # itDict={} itNameList=catObj.getItemNameList() for idxIt,itName in enumerate(itNameList): itDict[str(itName).lower()]=idxIt # idf=itDict['_refln.f_meas_au'] idsigf=itDict['_refln.f_meas_sigma_au'] minR=100 maxR=-1 sumR=0 icount=0 for row in catObj.getRowList(): try: f=float(row[idf]) sigf=float(row[idsigf]) ratio=sigf/f #self.lfh.write(" %f %f %f\n" % (f,sigf,ratio)) maxR=max(maxR,ratio) minR=min(minR,ratio) sumR+=ratio icount+=1 except: continue ifh.close() self.lfh.write("f/sig(f) min %f max %f avg %f count %d\n" % (minR, maxR, sumR/icount,icount)) except: traceback.print_exc(file=sys.stderr) self.fail()
def testSimpleInitialization(self): """Test case - Simple initialization of a data category and data block """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # fn="test-simple.cif" attributeNameList=['aOne','aTwo','aThree','aFour','aFive','aSix','aSeven','aEight','aNine','aTen'] rowList=[[1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10] ] nameCat='myCategory' # # curContainer=DataContainer("myblock") aCat=DataCategory(nameCat,attributeNameList,rowList) aCat.printIt() curContainer.append(aCat) curContainer.printIt() # myContainerList=[] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW=PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList=[] ifh = open(fn, "r") pRd=PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name,aList,rList=container.getObj(objName).get() self.lfh.write("Recovered data category %s\n" % name) self.lfh.write("Attribute list %r\n" % repr(aList)) self.lfh.write("Row list %r\n" % repr(rList)) except: traceback.print_exc(file=self.lfh) self.fail()
def testReadBigDataFile(self): """Test case - read data file """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # myDataList=[] ifh = open(self.pathBigPdbxDataFile, "r") pRd=PdbxReader(ifh) pRd.read(myDataList) ifh.close() except: traceback.print_exc(file=sys.stderr) self.fail()
def testReadDataFile(self): """Test case - read data file """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # myDataList=[] ifh = open(self.pathPdbxDataFile, "r") pRd=PdbxReader(ifh) pRd.read(myDataList) ifh.close() except: traceback.print_exc(file=sys.stderr) self.fail()
def testReadWriteDataFile(self): """Test case - data file read write test """ self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: myDataList=[] ifh = open(self.pathPdbxDataFile, "r") pRd=PdbxReader(ifh) pRd.read(myDataList) ifh.close() ofh = open(self.pathOutputFile, "w") pWr=PdbxWriter(ofh) pWr.write(myDataList) ofh.close() except: traceback.print_exc(file=self.lfh) self.fail()
def testReadWriteDataFile(self): """Test case - data file read write test """ self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: myDataList = [] ifh = open(self.pathPdbxDataFile, "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() ofh = open(self.pathOutputFile, "w") pWr = PdbxWriter(ofh) pWr.write(myDataList) ofh.close() except: traceback.print_exc(file=self.lfh) self.fail()
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('auth_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('auth_comp_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') chainIdCol = atomData.getAttributeIndex('auth_asym_id') elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastResId = None atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len(self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain(row[chainIdCol]) lastChainId = row[chainIdCol] lastResId = None if lastResId != row[resNumCol] or lastChainId != row[chainIdCol] or (lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. resId = (None if resNumCol == -1 else row[resNumCol]) resIC = ('' if resInsertionCol == -1 else row[resInsertionCol]) res = top.addResidue(row[resNameCol], chain, resId, resIC) lastResId = row[resNumCol] atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(row[atomNameCol]) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError('Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resNumCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError('Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i]*nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [float(row[cell.getAttributeIndex(attribute)])*0.1 for attribute in ('length_a', 'length_b', 'length_c')] (alpha, beta, gamma) = [float(row[cell.getAttributeIndex(attribute)])*math.pi/180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma')] self.topology.setPeriodicBoxVectors(computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('label_comp_id') resIdCol = atomData.getAttributeIndex('label_seq_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') asymIdCol = atomData.getAttributeIndex('label_asym_id') chainIdCol = atomData.getAttributeIndex('label_entity_id') elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastResId = None lastAsymId = None atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain(row[asymIdCol]) lastChainId = row[chainIdCol] lastResId = None lastAsymId = None if lastResId != row[resIdCol] or lastAsymId != row[ asymIdCol] or (lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. res = top.addResidue( row[resNameCol], chain, None if resNumCol == -1 else row[resNumCol]) lastResId = row[resIdCol] lastAsymId = row[asymIdCol] atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(row[atomNameCol]) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)