Esempio n. 1
0
def getCasdNmrProjectInfo(casdNmrRefFile=None):
    """
  Code to get list of CASD-NMR projects info
  """

    result = []

    # This file is customisable!
    if not casdNmrRefFile:
        casdNmrRefFile = os.path.join(allDataDir, 'dataPage.html')

    # Get the web page...
    text = ''.join(
        getReferenceTextFileFromHttp(casdNmrDataUrl,
                                     casdNmrRefFile,
                                     refText="CASD-NMR data",
                                     isGzipped=False))

    table, links = casdUtil.parseHtmlTable(text)

    tags = table[0]
    for ii in range(1, len(table)):
        dd = {}
        result.append(dd)
        ll = table[ii]
        for jj, val in enumerate(table[ii]):
            dd[tags[jj]] = val

        dd['DataLink'] = links[ii][0]

    #
    return result
Esempio n. 2
0
def getBmrbPdbMappingInfo(dataFilePath=None):

    # NOTE: this gives UNIQUE mappings between PDB and BMRB ID!
    # Use getBmrbDatabaseMatches for list of all mappings.

    if not dataFilePath:
        dataFilePath = bmrbPdbMappingFilePath

    dataLines = getReferenceTextFileFromHttp(bmrbPdbMappingUrl,
                                             dataFilePath,
                                             refText="BMRB to PDB mappings")

    bmrbPdbMappingDict = {}

    for dataLine in dataLines:
        if dataLine:
            (bmrbId, pdbCode) = dataLine.strip().split(',')

            bmrbId = int(bmrbId)

            if not bmrbPdbMappingDict.has_key(bmrbId):
                bmrbPdbMappingDict[bmrbId] = pdbCode.lower()
            else:
                # This should never happen... raise it?
                print "MULTIPLE MATCH FOR %d!!" % bmrbId

    return bmrbPdbMappingDict
Esempio n. 3
0
def getPdbChainInfo():

  """
  Read info on chain information of PDB entries
  """
  
  pdbChainInfoDict = {}
   
  dataLines = getReferenceTextFileFromHttp(pdbChainInfoUrl,pdbChainInfoFilePath,refText = "PDB entry chain information", isGzipped = True)
  
  for dataLine in dataLines:
  
    cols = dataLine.split()
    
    if cols and cols[0][0] == '>':
      
      (pdbCode,chainCode) = cols[0][1:].split("_")
      molType = cols[1][4:]
      chainLength = returnInt(cols[2][7:])
      molName = ' '.join(cols[3:])
      
      if not pdbChainInfoDict.has_key(pdbCode):
        pdbChainInfoDict[pdbCode] = {}
      
      pdbChainInfoDict[pdbCode][chainCode] = [chainLength,molName,None]
    
    else:
    
      pdbChainInfoDict[pdbCode][chainCode][2] = cols[0]
      
  return pdbChainInfoDict
Esempio n. 4
0
def getPdbEntryType(dataFilePath=None,updateFile=False):

  """
  Read info on experiment type of PDB entries
  """
  
  pdbEntryTypeDict = {}
  
  if not dataFilePath:
    dataFilePath = pdbEntryTypeFilePath
  
  if updateFile or not os.path.exists(dataFilePath):
    dataLines = getReferenceTextFileFromHttp(pdbEntryTypeUrl,dataFilePath,refText = "PDB entry experiment type information")
  else:
    fin = open(dataFilePath)
    dataLines = fin.readlines()
    fin.close()
  
  for dataLine in dataLines:
  
    cols = dataLine.split()
    
    if len(cols) == 3:
    
      (pdbCode,molType,expType) = cols
    
      # molType is prot, nuc or prot-nuc
      # expType is diffraction, NMR, or EM
    
      pdbEntryTypeDict[pdbCode] = (molType,expType)
      
  return pdbEntryTypeDict
Esempio n. 5
0
def getCasdNmrProjectInfo(casdNmrRefFile=None):
    """
  Code to get list of CASD-NMR projects info
  """

    hrefPatt = re.compile('\<a href\=\"([^\"]+)\"')
    hrefNamePatt = re.compile('[^ ]\"\>([^\>]+)\<\/a')
    pdbCodePatt = re.compile('\>\s*([A-Za-z0-9]{4})\s*\<\/a')

    # This file is customisable!
    if not casdNmrRefFile:
        casdNmrRefFile = os.path.join(casdNmrDataDir, 'reference',
                                      'dataPage.html')

    # Get the web page...
    dataLines = getReferenceTextFileFromHttp(casdNmrDataUrl,
                                             casdNmrRefFile,
                                             refText="CASD-NMR data",
                                             isGzipped=False)

    # Now get the info out...
    projectInfo = []
    for dataLine in dataLines:

        if dataLine.count('href'):

            if dataLine.count("assignment-software"):
                continue

            # Some custum hacking here - content of href lines with data not dependable enough...
            if dataLine.count("rutgers") or dataLine.count(
                    'Data for') or dataLine.count('/wenmr/files/files/'):

                hrefSearch = hrefPatt.search(dataLine)
                dataLine = dataLine.replace("<span>", "")
                dataLine = dataLine.replace("</span>", "")
                projectNameSearch = hrefNamePatt.search(dataLine)

                if hrefSearch:
                    urlName = projectNameSearch.group(1)
                    if urlName.count("/wenmr"):
                        urlName = urlName.replace("/wenmr", eNmrUrl)
                    projectInfo.append([urlName, hrefSearch.group(1), []])

            elif dataLine.count("structureId="):

                pdbCodeSearch = pdbCodePatt.search(dataLine)

                if pdbCodeSearch:
                    projectInfo[-1][-1].append(pdbCodeSearch.group(1))

    if not projectInfo:
        raise CasdNmrError("No files found, probably web page change!")

    return projectInfo
Esempio n. 6
0
def getBmrbDatabaseMatches(dataFilePath=None):
    """
  Read info on relation between BMRB and other database entries...
  """

    if not dataFilePath:
        dataFilePath = bmrbDatabaseMatchFilePath

    dataLines = getReferenceTextFileFromHttp(bmrbDatabaseMatchUrl,
                                             dataFilePath,
                                             refText="BMRB database matches")

    bmrbDatabaseDict = {}

    for dataLine in dataLines:
        if dataLine:
            dataLine = dataLine.strip()
            values = dataLine.split(',')

            # Ignore lines with inconsistent length
            if len(values) != 11:
                continue

            (bmrbId, dbName, dbCode, matchData, matchScore, unknown, unknown,
             method, resolution, unknown, unknown) = values

            bmrbId = bmrbId.strip('"')
            if not bmrbId:
                continue

            bmrbId = int(bmrbId)
            dbName = dbName.strip('"')
            dbCode = dbCode.strip('"')

            if not bmrbDatabaseDict.has_key(bmrbId):
                bmrbDatabaseDict[bmrbId] = {}
            if not bmrbDatabaseDict[bmrbId].has_key(dbName):
                bmrbDatabaseDict[bmrbId][dbName] = []

            bmrbDatabaseDict[bmrbId][dbName].append(dbCode)

    return bmrbDatabaseDict
Esempio n. 7
0
def getBmrbNmrGridDict(dataFilePath=None, skipCodes=None):
    """
  Read info on DOCR/FRED from BMRB...
  """

    bmrbNmrGridDict = {}

    #1047	1017	classified	1a51	2004-12-02
    #26561	1064	parsed	1ajw	2004-12-03
    #39888	1263	filtered	1c54	2004-12-03
    #39889	1266	converted	1c7v	2004-12-03
    # Where classified means there is a constraint file, but it's not been parsed
    # rest is obvious: converted DOCR, filtered FRED.

    if not dataFilePath:
        dataFilePath = os.path.join(nmrGridReferenceDir, nmrGridInfoFile)

    dataLines = getReferenceTextFileFromHttp(nmrGridInfoUrl,
                                             dataFilePath,
                                             refText="NMR GRID information")

    for dataLine in dataLines:
        if dataLine:
            (gridId, bmrbId, statusCode, pdbCode, date) = dataLine.split()

            if skipCodes and pdbCode in skipCodes:
                continue

            if statusCode in statusConvert:
                if not bmrbNmrGridDict.has_key(pdbCode):
                    bmrbNmrGridDict[pdbCode] = {}

                status = statusConvert[statusCode]

                if not bmrbNmrGridDict[pdbCode].has_key(status):
                    webLink = '%s?pdb_id=%s&min_items=0&block_text_type=%s' % (
                        nmrGridServletUrl, pdbCode, statusCode)
                    bmrbNmrGridDict[pdbCode][status] = webLink

    return bmrbNmrGridDict
Esempio n. 8
0
def getXrayResolution():

  """
  Read info on X-ray resolution PDB entries
  """

  resolutionDict = {}

  dataLines = getReferenceTextFileFromHttp(xrayResolutionUrl,xrayResolutionFilePath,refText = "PDB x-ray resolution information")
  
  for dataLine in dataLines:
    
    cols = dataLine.split()
    
    if len(cols) == 3 and cols[1] == ';':
      pdbCode = cols[0].lower()
      resolution = float(cols[2])
      
      if resolution != -1.0:
        resolutionDict[pdbCode] = resolution
        
  return resolutionDict
Esempio n. 9
0
def getBmrbInfo(dataType=None):

    if not dataType:
        return None

    elif dataType == 'rdc':
        bmrbQueryFile = "query_1_67.html"
        valueNames = ('bmrbId', 'name', 'numRdcs', 'hasProtein', 'hasDNA',
                      'hasRNA')

    elif dataType == 'coupling':
        bmrbQueryFile = "query_1_29.html"
        valueNames = ('bmrbId', 'name', 'numCouplings', 'hasProtein', 'hasDNA',
                      'hasRNA')

    elif dataType == 't1':
        bmrbQueryFile = "query_1_33.html"
        valueNames = ('bmrbId', 'name', 'numT1s', 'hasProtein', 'hasDNA',
                      'hasRNA')

    elif dataType == 't2':
        bmrbQueryFile = "query_1_38.html"
        valueNames = ('bmrbId', 'name', 'numT2s', 'hasProtein', 'hasDNA',
                      'hasRNA')

    elif dataType == 'hetNuclNoe':
        bmrbQueryFile = "query_1_41.html"
        valueNames = ('bmrbId', 'name', 'numHetNuclNoes', 'hasProtein',
                      'hasDNA', 'hasRNA')

    elif dataType == 'shift':
        bmrbQueryFile = "query_1_5_4.html"
        valueNames = ('bmrbId', 'name', 'numShifts_1H', 'numShifts_13C',
                      'numShifts_15N', 'numShifts_31P', 'hasProtein', 'hasDNA',
                      'hasRNA')

    elif dataType == 'orderParam':
        bmrbQueryFile = "query_1_45.html"
        valueNames = ('bmrbId', 'name', 'numOrderParams', 'hasProtein',
                      'hasDNA', 'hasRNA')

    elif dataType == 'hExchange':
        bmrbQueryFile = "query_1_49.html"
        valueNames = ('bmrbId', 'name', 'numHExchangeValues', 'hasProtein',
                      'hasDNA', 'hasRNA')

    elif dataType == 'hProtection':
        bmrbQueryFile = "query_1_53.html"
        valueNames = ('bmrbId', 'name', 'numHProtectionValues', 'hasProtein',
                      'hasDNA', 'hasRNA')

    else:
        print "Unknown data type %s, aborting..." % dataType
        return None

    bmrbQueryUrl = "%s/search/query_grid/%s" % (bmrbUrl, bmrbQueryFile)
    bmrbQueryFilePath = os.path.join(bmrbReferenceDir, bmrbQueryFile)

    entryLinePatt = re.compile("data_library/generate_summary")
    valuePatt = re.compile("\>([^\<\>]+)\<")

    bmrbValuesDict = {}

    dataLines = getReferenceTextFileFromHttp(bmrbQueryUrl,
                                             bmrbQueryFilePath,
                                             refText="BMRB %s information" %
                                             dataType)

    ln = 0

    while (ln < len(dataLines)):

        dataLine = dataLines[ln]

        if entryLinePatt.search(dataLine):
            infoDict = {}
            for valueName in valueNames:

                valueSearch = valuePatt.search(dataLine)
                if not valueSearch:
                    print dataLine

                value = valueSearch.group(1)

                if value in ('&nbsp;', '&nbsp'):
                    value = False
                elif value == 'X':
                    value = True
                elif valueName[:3] == 'num':
                    value = int(value)

                infoDict[valueName] = value

                ln += 1
                dataLine = dataLines[ln]

            bmrbId = int(infoDict['bmrbId'])
            del (infoDict['bmrbId'])

            bmrbValuesDict[bmrbId] = infoDict

        ln += 1

    return bmrbValuesDict