def getDBDatasets(jobO,trf,dbrelease): from pandatools import Client # get DB datasets dbrFiles = {} dbrDsList = [] if trf or dbrelease != '': if trf: # parse jobO for TRF tmpItems = jobO.split() else: # mimic a trf parameter to reuse following algorithm tmpItems = ['%DB='+dbrelease] # look for DBRelease for tmpItem in tmpItems: match = re.search('%DB=([^:]+):(.+)$',tmpItem) if match: tmpDbrDS = match.group(1) tmpDbrLFN = match.group(2) # get files in the dataset if not tmpDbrDS in dbrDsList: logger.info("Querying files in dataset:%s" % tmpDbrDS) try: tmpList = Client.queryFilesInDataset(tmpDbrDS,False) except: raise ApplicationConfigurationError("ERROR : error while looking up dataset %s. Perhaps this dataset does not exist?"%tmpDbrDS) # append for tmpLFN,tmpVal in tmpList.iteritems(): dbrFiles[tmpLFN] = tmpVal dbrDsList.append(tmpDbrDS) # check if tmpDbrLFN not in dbrFiles: raise ApplicationConfigurationError("ERROR : %s is not in %s"%(tmpDbrLFN,tmpDbrDS)) return dbrFiles,dbrDsList
def getDBDatasets(jobO, trf, dbrelease): from pandatools import Client # get DB datasets dbrFiles = {} dbrDsList = [] if trf or dbrelease != '': if trf: # parse jobO for TRF tmpItems = jobO.split() else: # mimic a trf parameter to reuse following algorithm tmpItems = ['%DB=' + dbrelease] # look for DBRelease for tmpItem in tmpItems: match = re.search('%DB=([^:]+):(.+)$', tmpItem) if match: tmpDbrDS = match.group(1) tmpDbrLFN = match.group(2) # get files in the dataset if not tmpDbrDS in dbrDsList: logger.info("Querying files in dataset:%s" % tmpDbrDS) try: tmpList = Client.queryFilesInDataset(tmpDbrDS, False) except: raise ApplicationConfigurationError( None, "ERROR : error while looking up dataset %s. Perhaps this dataset does not exist?" % tmpDbrDS) # append for tmpLFN, tmpVal in tmpList.iteritems(): dbrFiles[tmpLFN] = tmpVal dbrDsList.append(tmpDbrDS) # check if tmpDbrLFN not in dbrFiles: raise ApplicationConfigurationError( None, "ERROR : %s is not in %s" % (tmpDbrLFN, tmpDbrDS)) return dbrFiles, dbrDsList
def printGUIDsWithDatasets(guids): """ list datasets by GUIDs """ try: from pandatools import Client except ImportError: if os.environ.has_key('PANDA_SYS'): pandapath = os.environ['PANDA_SYS'] else: pandapath = PandaSys sys.path.append(pandapath + '/lib/python2.4/site-packages') try: from pandatools import Client except ImportError: print "EventLookup failed to import PanDA client, GUID->dataset name resolution disabled" return False # instantiate curl curl = Client._Curl() curl.verbose = False iLookUp = 0 guidLfnMap = {} checkedDSList = [] # loop over all GUIDs for guid in guids.keys(): # check existing map to avid redundant lookup if guidLfnMap.has_key(guid): continue iLookUp += 1 if iLookUp % 20 == 0: time.sleep(1) # get vuids url = Client.baseURLDQ2 + '/ws_content/rpc' data = { 'operation': 'queryDatasetsWithFileByGUID', 'guid': guid, 'API': '0_3_0', 'tuid': Client.MiscUtils.wrappedUuidGen() } status, out = curl.get(url, data, rucioAccount=True) if status != 0 or re.search('Exception', str(out)) != None: # failed guidLfnMap[guid] = "DQ2 query ERROR: " + str(out) continue if out == '\x00' or out == (): guidLfnMap[ guid] = "DQ2.queryDatasetsWithFileByGUID() returned no results" continue tmpVUIDs = list(out) # get dataset name url = Client.baseURLDQ2 + '/ws_repository/rpc' data = { 'operation': 'queryDatasetByVUIDs', 'vuids': tmpVUIDs, 'API': '0_3_0', 'tuid': Client.MiscUtils.wrappedUuidGen() } status, out = curl.post(url, data, rucioAccount=True) if status != 0 or re.search('Exception', str(out)) != None: # failed guidLfnMap[guid] = "DQ2 query ERROR: " + str(out) continue if out == '\x00': guidLfnMap[guid] = "DQ2.queryDatasetByVUIDs() returned no results" continue for tmpDsName in out.keys(): # ignore junk datasets if not (tmpDsName.startswith('panda') or \ tmpDsName.startswith('user') or \ tmpDsName.startswith('group') or \ re.search('_sub\d+$',tmpDsName) != None or \ re.search('_dis\d+$',tmpDsName) != None or \ re.search('_shadow$',tmpDsName) != None \ or tmpDsName in checkedDSList ): tmpMap = Client.queryFilesInDataset(tmpDsName) for tmpLFN, tmpVal in tmpMap.iteritems(): guidLfnMap.setdefault(tmpVal['guid'], []).append([tmpLFN, tmpDsName]) checkedDSList.append(tmpDsName) for guid in guids.keys(): print guid, guids[guid], guidLfnMap.setdefault(guid, "") return True