def run(): relationName = 'nrgcing_ranges' fn = os.path.join( '/Users/jd/CMBI/Papers/CING/Data', relationName + '.csv') nentries = 0 nchains = 0 nsegments = 0 nresidues = 0 nresiduesMin = 999 nresiduesMinEntry = None r = getRelationFromCsvFile(fn) columnIdxId = 0 columnIdxRange = 1 for rowIdx in range(r.sizeRows()): if rowIdx >= 200000: print "Stopping early." break # end if nentries += 1 pdb_id = r.getValue( rowIdx, columnIdxId) rangeStr = r.getValue( rowIdx, columnIdxRange) chain_id = None # nTdebug( 'pdb_id: %s' % pdb_id) # nTdebug( 'rangeStr: %s' % rangeStr) startStopLoL = Molecule.ranges2StartStopLoLStatic(rangeStr) # nTdebug( 'startStopLoL: %s' % str(startStopLoL) ) nsegments += len(startStopLoL) for startStopList in startStopLoL: residueCount = startStopList[2] - startStopList[1] + 1 nresidues += residueCount if residueCount < nresiduesMin: nresiduesMin = residueCount nresiduesMinEntry = pdb_id # end for if residueCount < 5: nTdebug( 'residueCount for %s with range %s and segment %s is %s' % ( pdb_id, rangeStr, str(startStopList), residueCount)) # end for segmentChainId = startStopList[0] if segmentChainId != chain_id: chain_id = segmentChainId nchains += 1 # end if # end for # end for print "nentries: %s" % nentries print "nchains: %s" % nchains print "nsegments: %s" % nsegments print "nresidues: %s" % nresidues print "chainsPerEntry: %s" % (nsegments / float(nchains)) print "segmentsPerEntry: %s" % (nsegments / float(nentries)) print "segmentsPerChain: %s" % (nsegments / float(nchains)) print "residuesPerEntry: %s" % (nresidues / float(nentries)) print "residuesPerChain: %s" % (nresidues / float(nchains)) print "residuesPerSegments: %s" % (nresidues / float(nsegments)) print "residuesMin: %s" % nresiduesMin print "residuesMinEntry: %s" % nresiduesMinEntry
def getBmrbEntries(): 'Return None on error.' r1 = urllib.urlopen(bmrbUrl) data = r1.read() fileNameGz = getFileName(bmrbUrl) writeDataToFile(fileNameGz, data) fileName = fileNameGz[:-3] # remove .gz gunzip(fileNameGz, outputFileName=fileName, removeOriginal=True) bmrbDepRelation = getRelationFromCsvFile(fileName, containsHeaderRow=0) if not bmrbDepRelation: nTerror('No relation read from CSV file: %s' % fileName) return None bmrbDateList = bmrbDepRelation.getColumnByIdx(0) bmrbIdList = [int(bmrbData[5:]) for bmrbData in bmrbDateList] nTmessage("Read %s BMRB entries from DB dump" % len(bmrbIdList)) bmrbIdList.sort() return bmrbIdList
def getBmrbEntries(): 'Return None on error.' r1 = urllib.urlopen(bmrbUrl) data = r1.read() fileNameGz = getFileName(bmrbUrl) writeDataToFile(fileNameGz, data) fileName = fileNameGz[:-3] # remove .gz gunzip(fileNameGz, outputFileName=fileName, removeOriginal=True) bmrbDepRelation = getRelationFromCsvFile( fileName, containsHeaderRow=0 ) if not bmrbDepRelation: nTerror('No relation read from CSV file: %s' % fileName ) return None bmrbDateList = bmrbDepRelation.getColumnByIdx(0) bmrbIdList = [ int(bmrbData[5:]) for bmrbData in bmrbDateList ] nTmessage("Read %s BMRB entries from DB dump" % len(bmrbIdList)) bmrbIdList.sort() return bmrbIdList
def run(): relationName = 'nrgcing_ranges' fn = os.path.join('/Users/jd/CMBI/Papers/CING/Data', relationName + '.csv') nentries = 0 nchains = 0 nsegments = 0 nresidues = 0 nresiduesMin = 999 nresiduesMinEntry = None r = getRelationFromCsvFile(fn) columnIdxId = 0 columnIdxRange = 1 for rowIdx in range(r.sizeRows()): if rowIdx >= 200000: print "Stopping early." break # end if nentries += 1 pdb_id = r.getValue(rowIdx, columnIdxId) rangeStr = r.getValue(rowIdx, columnIdxRange) chain_id = None # nTdebug( 'pdb_id: %s' % pdb_id) # nTdebug( 'rangeStr: %s' % rangeStr) startStopLoL = Molecule.ranges2StartStopLoLStatic(rangeStr) # nTdebug( 'startStopLoL: %s' % str(startStopLoL) ) nsegments += len(startStopLoL) for startStopList in startStopLoL: residueCount = startStopList[2] - startStopList[1] + 1 nresidues += residueCount if residueCount < nresiduesMin: nresiduesMin = residueCount nresiduesMinEntry = pdb_id # end for if residueCount < 5: nTdebug( 'residueCount for %s with range %s and segment %s is %s' % (pdb_id, rangeStr, str(startStopList), residueCount)) # end for segmentChainId = startStopList[0] if segmentChainId != chain_id: chain_id = segmentChainId nchains += 1 # end if # end for # end for print "nentries: %s" % nentries print "nchains: %s" % nchains print "nsegments: %s" % nsegments print "nresidues: %s" % nresidues print "chainsPerEntry: %s" % (nsegments / float(nchains)) print "segmentsPerEntry: %s" % (nsegments / float(nentries)) print "segmentsPerChain: %s" % (nsegments / float(nchains)) print "residuesPerEntry: %s" % (nresidues / float(nentries)) print "residuesPerChain: %s" % (nresidues / float(nchains)) print "residuesPerSegments: %s" % (nresidues / float(nsegments)) print "residuesMin: %s" % nresiduesMin print "residuesMinEntry: %s" % nresiduesMinEntry