def run():
    relationName =  'nrgcing_ranges'
    fn = os.path.join( '/Users/jd/CMBI/Papers/CING/Data', relationName + '.csv')
    
    nentries = 0
    nchains = 0
    nsegments = 0 
    nresidues = 0
    nresiduesMin = 999
    nresiduesMinEntry = None
    r = getRelationFromCsvFile(fn)
    
    columnIdxId = 0
    columnIdxRange = 1
    
    for rowIdx in range(r.sizeRows()):
        if rowIdx >= 200000:
            print "Stopping early."
            break
        # end if
        nentries += 1
        pdb_id = r.getValue( rowIdx, columnIdxId)
        rangeStr = r.getValue( rowIdx, columnIdxRange)
        chain_id = None
#        nTdebug( 'pdb_id:       %s' % pdb_id)
#        nTdebug( 'rangeStr:     %s' % rangeStr)
        startStopLoL = Molecule.ranges2StartStopLoLStatic(rangeStr)
#        nTdebug( 'startStopLoL:  %s' % str(startStopLoL) )
        nsegments += len(startStopLoL)
        for startStopList in startStopLoL:
            residueCount = startStopList[2] - startStopList[1] + 1
            nresidues += residueCount
            if residueCount < nresiduesMin:
                nresiduesMin = residueCount
                nresiduesMinEntry = pdb_id
            # end for
            if residueCount < 5:
                nTdebug( 'residueCount for %s with range %s and segment %s is %s' % ( pdb_id, rangeStr, str(startStopList), residueCount))
            # end for            
            segmentChainId = startStopList[0]
            if segmentChainId != chain_id:
                chain_id = segmentChainId
                nchains += 1
            # end if
        # end for
    # end for    
    
    print "nentries:                %s" % nentries
    print "nchains:                 %s" % nchains
    print "nsegments:               %s" % nsegments
    print "nresidues:               %s" % nresidues
    print "chainsPerEntry:          %s" % (nsegments / float(nchains))
    print "segmentsPerEntry:        %s" % (nsegments / float(nentries))
    print "segmentsPerChain:        %s" % (nsegments / float(nchains))
    print "residuesPerEntry:        %s" % (nresidues / float(nentries))
    print "residuesPerChain:        %s" % (nresidues / float(nchains))
    print "residuesPerSegments:     %s" % (nresidues / float(nsegments))
    print "residuesMin:             %s" % nresiduesMin
    print "residuesMinEntry:        %s" % nresiduesMinEntry
Example #2
0
def getBmrbEntries():
    'Return None on error.'
    r1 = urllib.urlopen(bmrbUrl)
    data = r1.read()
    fileNameGz = getFileName(bmrbUrl)
    writeDataToFile(fileNameGz, data)
    fileName = fileNameGz[:-3]  # remove .gz
    gunzip(fileNameGz, outputFileName=fileName, removeOriginal=True)
    bmrbDepRelation = getRelationFromCsvFile(fileName, containsHeaderRow=0)
    if not bmrbDepRelation:
        nTerror('No relation read from CSV file: %s' % fileName)
        return None
    bmrbDateList = bmrbDepRelation.getColumnByIdx(0)
    bmrbIdList = [int(bmrbData[5:]) for bmrbData in bmrbDateList]
    nTmessage("Read %s BMRB entries from DB dump" % len(bmrbIdList))
    bmrbIdList.sort()
    return bmrbIdList
Example #3
0
def getBmrbEntries():
    'Return None on error.'
    r1 = urllib.urlopen(bmrbUrl)
    data = r1.read()
    fileNameGz = getFileName(bmrbUrl)
    writeDataToFile(fileNameGz, data)
    fileName = fileNameGz[:-3] # remove .gz
    gunzip(fileNameGz, outputFileName=fileName, removeOriginal=True)
    bmrbDepRelation = getRelationFromCsvFile( fileName, containsHeaderRow=0 )
    if not bmrbDepRelation:
        nTerror('No relation read from CSV file: %s' % fileName )
        return None
    bmrbDateList = bmrbDepRelation.getColumnByIdx(0)
    bmrbIdList = [ int(bmrbData[5:]) for bmrbData in bmrbDateList ]
    nTmessage("Read %s BMRB entries from DB dump" % len(bmrbIdList))
    bmrbIdList.sort()
    return bmrbIdList
def run():
    relationName = 'nrgcing_ranges'
    fn = os.path.join('/Users/jd/CMBI/Papers/CING/Data', relationName + '.csv')

    nentries = 0
    nchains = 0
    nsegments = 0
    nresidues = 0
    nresiduesMin = 999
    nresiduesMinEntry = None
    r = getRelationFromCsvFile(fn)

    columnIdxId = 0
    columnIdxRange = 1

    for rowIdx in range(r.sizeRows()):
        if rowIdx >= 200000:
            print "Stopping early."
            break
        # end if
        nentries += 1
        pdb_id = r.getValue(rowIdx, columnIdxId)
        rangeStr = r.getValue(rowIdx, columnIdxRange)
        chain_id = None
        #        nTdebug( 'pdb_id:       %s' % pdb_id)
        #        nTdebug( 'rangeStr:     %s' % rangeStr)
        startStopLoL = Molecule.ranges2StartStopLoLStatic(rangeStr)
        #        nTdebug( 'startStopLoL:  %s' % str(startStopLoL) )
        nsegments += len(startStopLoL)
        for startStopList in startStopLoL:
            residueCount = startStopList[2] - startStopList[1] + 1
            nresidues += residueCount
            if residueCount < nresiduesMin:
                nresiduesMin = residueCount
                nresiduesMinEntry = pdb_id
            # end for
            if residueCount < 5:
                nTdebug(
                    'residueCount for %s with range %s and segment %s is %s' %
                    (pdb_id, rangeStr, str(startStopList), residueCount))
            # end for
            segmentChainId = startStopList[0]
            if segmentChainId != chain_id:
                chain_id = segmentChainId
                nchains += 1
            # end if
        # end for
    # end for

    print "nentries:                %s" % nentries
    print "nchains:                 %s" % nchains
    print "nsegments:               %s" % nsegments
    print "nresidues:               %s" % nresidues
    print "chainsPerEntry:          %s" % (nsegments / float(nchains))
    print "segmentsPerEntry:        %s" % (nsegments / float(nentries))
    print "segmentsPerChain:        %s" % (nsegments / float(nchains))
    print "residuesPerEntry:        %s" % (nresidues / float(nentries))
    print "residuesPerChain:        %s" % (nresidues / float(nchains))
    print "residuesPerSegments:     %s" % (nresidues / float(nsegments))
    print "residuesMin:             %s" % nresiduesMin
    print "residuesMinEntry:        %s" % nresiduesMinEntry