def dasFileQuery(dataset): query = 'dataset dataset=%s' % dataset host = 'https://cmsweb.cern.ch' # default idx = 0 # default limit = 0 # unlimited debug = 0 # default thr = 300 # default ckey = "" # default cert = "" # default jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert) # check if the pattern matches none, many, or one dataset if not jsondict['data'] or not jsondict['data'][0]['dataset']: sys.stderr.write('Error: the pattern "%s" does not match any dataset\n' % dataset) sys.exit(1) return [] elif len(jsondict['data']) > 1: sys.stderr.write('Error: the pattern "%s" matches multiple datasets\n' % dataset) for d in jsondict['data']: sys.stderr.write(' %s\n' % d['dataset'][0]['name']) sys.exit(1) return [] else: # expand the dataset name dataset = jsondict['data'][0]['dataset'][0]['name'] query = 'file dataset=%s' % dataset jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert) # parse the results in JSON format, and extract the list of files files = sorted( f['file'][0]['name'] for f in jsondict['data'] ) return files
def dasFileQuery(dataset): query = 'dataset dataset=%s' % dataset host = 'https://cmsweb.cern.ch' # default idx = 0 # default limit = 0 # unlimited debug = 0 # default thr = 300 # default ckey = "" # default cert = "" # default jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert) # check if the pattern matches none, many, or one dataset if not jsondict['data'] or not jsondict['data'][0]['dataset']: sys.stderr.write( 'Error: the pattern "%s" does not match any dataset\n' % dataset) sys.exit(1) return [] elif len(jsondict['data']) > 1: sys.stderr.write( 'Error: the pattern "%s" matches multiple datasets\n' % dataset) for d in jsondict['data']: sys.stderr.write(' %s\n' % d['dataset'][0]['name']) sys.exit(1) return [] else: # expand the dataset name dataset = jsondict['data'][0]['dataset'][0]['name'] query = 'file dataset=%s' % dataset jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert) # parse the results in JSON format, and extract the list of files files = sorted(f['file'][0]['name'] for f in jsondict['data']) return files
def __getData(self, dasQuery, dasLimit=0): dasData = das_client.get_data('https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False) if isinstance(dasData, str): jsondict = json.loads(dasData) else: jsondict = dasData # Check, if the DAS query fails try: error = self.__findInJson(jsondict, ["data", "error"]) except KeyError: error = None if error or self.__findInJson( jsondict, "status") != 'ok' or "data" not in jsondict: jsonstr = str(jsondict) if len(jsonstr) > 10000: jsonfile = "das_query_output_%i.txt" i = 0 while os.path.lexists(jsonfile % i): i += 1 jsonfile = jsonfile % i theFile = open(jsonfile, "w") theFile.write(jsonstr) theFile.close() msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile else: msg = "The DAS query returned a error. Here is the output\n" + jsonstr msg += "\nIt's possible that this was a server error. If so, it may work if you try again later" raise AllInOneError(msg) return self.__findInJson(jsondict, "data")
def duplicateLumi(dataset): """ checks if output dataset has a duplicate lumi """ #registry of lumis checked, better a set lumisChecked=set() #get dtaset info frm das query="file lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: preresult=result['data'] #check each file for filename in preresult: newLumis=filename['lumi'][0]['number'] #for each file we check each lumi range. for lumiRange in newLumis: newlumiRange=[lumiRange[0]] if lumiRange[0]<lumiRange[1]: newlumiRange=range(lumiRange[0], lumiRange[1]) #check each lumi, if its in the lumiset for lumi in newlumiRange: if lumi in lumisChecked: return True else: lumisChecked.add(lumi) return False
def dasQuery(query, limit = 0): global das_client try: result = das_client.get_data('https://cmsweb.cern.ch', query, 0, limit, False, 300, '', '') return result['data'] except: return []
def __getData( self, dasQuery, dasLimit = 0 ): dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False ) if isinstance(dasData, str): jsondict = json.loads( dasData ) else: jsondict = dasData # Check, if the DAS query fails try: error = self.__findInJson(jsondict,["data","error"]) except KeyError: error = None if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict: jsonstr = str(jsondict) if len(jsonstr) > 10000: jsonfile = "das_query_output_%i.txt" i = 0 while os.path.lexists(jsonfile % i): i += 1 jsonfile = jsonfile % i theFile = open( jsonfile, "w" ) theFile.write( jsonstr ) theFile.close() msg = "The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile else: msg = "The DAS query returned a error. Here is the output\n" + jsonstr msg += "\nIt's possible that this was a server error. If so, it may work if you try again later" raise AllInOneError(msg) return self.__findInJson(jsondict,"data")
def getFilesFromDAS(release, runtype, globalTag): '''Get proxy with "voms-proxy-init -voms cms" to use this option.''' print "Getting files from DAS. May take a while...." host = 'https://cmsweb.cern.ch' capath = '/etc/grid-security/certificates' query = "file dataset=/*{0}*/*{1}*{2}*/MINIAODSIM".format( runtype, release, globalTag, ) output = get_data(host=host, query=query, idx=0, limit=0, debug=0, cert=x509(), capath=capath) files = [] for entry in output["data"]: file = "root://cms-xrd-global.cern.ch/" + str(entry["file"][0]["name"]) if "/".join([release, runtype, "MINIAODSIM", globalTag]) in file: files.append(file) return files
def duplicateRunLumi(dataset): """ checks if output dataset has duplicate lumis for every run. """ RunlumisChecked = {} query = "file run lumi dataset=" + dataset das_data = get_data(das_host, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: preresult = result['data'] #check ever file in dataset for filename in preresult: run = filename['run'][0]['run_number'] #add run if new if run not in RunlumisChecked: RunlumisChecked[run] = set() newLumis = filename['lumi'][0]['number'] #check every lumi on range for lumiRange in newLumis: newlumiRange = range(lumiRange[0], lumiRange[1] + 1) for lumi in newlumiRange: #if already checked in the same run if lumi in RunlumisChecked[run]: return True else: RunlumisChecked[run].add(lumi) return False
def duplicateLumi(dataset): """ checks if output dataset has a duplicate lumi """ #registry of lumis checked, better a set lumisChecked = set() #get dtaset info frm das query = "file lumi dataset=" + dataset das_data = get_data(das_host, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: preresult = result['data'] #check each file for filename in preresult: newLumis = filename['lumi'][0]['number'] #for each file we check each lumi range. for lumiRange in newLumis: newlumiRange = [lumiRange[0]] if lumiRange[0] < lumiRange[1]: newlumiRange = range(lumiRange[0], lumiRange[1]) #check each lumi, if its in the lumiset for lumi in newlumiRange: if lumi in lumisChecked: return True else: lumisChecked.add(lumi) return False
def autocomplete_Datasets(data): result_array = [] ckey = x509() #os.path.join(os.environ['HOME'], '.globus/userkey.pem') cert = x509() #os.path.join(os.environ['HOME'], '.globus/usercert.pem') cpath = '/etc/grid-security/certificates/' #os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/' for element in data: if '*' in element: jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element, 0, 0, 0, 300, ckey, cert, cpath) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) try: for i in range(len(jsondict['data'])): result_array.append( jsondict['data'][i]['dataset'][0]['name']) except: print '=' * 10 print 'Not found', element print '=' * 10 else: result_array.append(element) if len(result_array) == 0: print "No samples found going to exit" sys.exit(0) return result_array
def duplicateRunLumi(dataset): RunlumisChecked={} query="file run lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: preresult=result['data'] for filename in preresult: run=filename['run'][0]['run_number'] if run not in RunlumisChecked: RunlumisChecked[run]=[] newLumis=filename['lumi'][0]['number'] for lumiRange in newLumis: newlumiRange=range(lumiRange[0], lumiRange[1]+1) for lumi in newlumiRange: if lumi in RunlumisChecked[run]: return True else: RunlumisChecked[run].append(lumi) return False
def duplicateRunLumi(dataset): """ checks if output dataset has duplicate lumis for every run. """ RunlumisChecked={} query="file run lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: preresult=result['data'] #check ever file in dataset for filename in preresult: run=filename['run'][0]['run_number'] #add run if new if run not in RunlumisChecked: RunlumisChecked[run]=set() newLumis=filename['lumi'][0]['number'] #check every lumi on range for lumiRange in newLumis: newlumiRange=range(lumiRange[0], lumiRange[1]+1) for lumi in newlumiRange: #if already checked in the same run if lumi in RunlumisChecked[run]: return True else: RunlumisChecked[run].add(lumi) return False
def __getData( self, dasQuery, dasLimit = 0 ): dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False ) jsondict = json.loads( dasData ) # Check, if the DAS query fails if jsondict["status"] != 'ok': msg = "Status not 'ok', but:", jsondict["status"] raise AllInOneError(msg) return jsondict["data"]
def __getData(self, dasQuery, dasLimit=0): dasData = das_client.get_data('https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False) jsondict = json.loads(dasData) # Check, if the DAS query fails if jsondict["status"] != 'ok': msg = "Status not 'ok', but:", jsondict["status"] raise AllInOneError(msg) return jsondict["data"]
def dasQuery(queryString, entryTitle) : import das_client dasinfo = das_client.get_data('https://cmsweb.cern.ch', queryString, 0, 0, False) if dasinfo['status'] != 'ok' : raise Exception('DAS query failed.\nQuery: %s\nDAS Status returned: %s' % (queryString, dasinfo['status'])) for entry in dasinfo['data'] : if len(entry[entryTitle]) > 0 : yield entry[entryTitle][0]
def getLumisInDataset(dataset): lumis=[] query="run lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: #preresult=result['data'][0]['lumi'][[0]['number'][0] #print result preresult=result['data'][0]['lumi'][0]['number'] for lumiRange in preresult: if lumiRange[0]==lumiRange[1]: lumis.append(lumiRange[0]) else: lumis=lumis+range(lumiRange[0], lumiRange[1]) return lumis def getBlocksNotProcessed(lumisOutput, BlockLumisInput): BlocksNotProcessed=[] for blockLumiPair in BlockLumisInput: BlockProcessed=False lumisBlock=blockLumiPair[1] for lumi in lumisBlock: if lumi in lumisOutput: BlockProcessed=True if BlockProcessed==False: BlocksNotProcessed.append(blockLumiPair[0]) return [x.encode() for x in BlocksNotProcessed] def getListUnprocessedBlocks(url, workflow): wfInfo = WorkflowWithInput(workflow) outputDataSets = wfInfo.outputDatasets inputDataset = wfInfo.inputDatasets lumisOutput = getLumisInDataset(outputDataSets[0]) BlockLumisInput = getBlocksLumi(inputDataset) BlocksNotProcessed = getBlocksNotProcessed(lumisOutput, BlockLumisInput) return BlocksNotProcessed def main(): args=sys.argv[1:] if not len(args)==1: print "usage:unprocessedBlocks workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' BlocksNotProcessed=getListUnprocessedBlocks(url, workflow) print "Number of blocks not processed", str(len(BlocksNotProcessed)) print [x.encode() for x in BlocksNotProcessed] sys.exit(0); if __name__ == "__main__": main()
def getNumberofFilesPerRun(das_url, dataset, run): query="file dataset="+dataset+" run="+str(run)+" | count(file.name)" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: return result['data'][0]['result']['value']
def autocomplete_Datasets(data): result_array =[] for element in data: if '*' in element: jsondict = get_data('https://cmsweb.cern.ch',"dataset="+element,0,0,0) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) for i in range(len(jsondict['data'])): result_array.append(jsondict['data'][i]['dataset'][0]['name']) else: result_array.append(element) return result_array
def dasQuery(queryString, entryTitle): dasinfo = das_client.get_data('https://cmsweb.cern.ch', queryString, 0, 0, False) if dasinfo['status'] != 'ok': raise Exception( 'DAS query failed.\nQuery: %s\nDAS Status returned: %s' % (queryString, dasinfo['status'])) if len(dasinfo['data']) > 0: for entry in dasinfo['data']: if entryTitle in entry and len(entry[entryTitle]) > 0: yield entry[entryTitle][0]
def getLumisInDataset(dataset): lumis=[] query="run lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: #preresult=result['data'][0]['lumi'][[0]['number'][0] #print result preresult=result['data'][0]['lumi'][0]['number'] for lumiRange in preresult: if lumiRange[0]==lumiRange[1]: lumis.append(lumiRange[0]) else: lumis=lumis+range(lumiRange[0], lumiRange[1]) return lumis def getBlocksNotProcessed(lumisOutput, BlockLumisInput): BlocksNotProcessed=[] for blockLumiPair in BlockLumisInput: BlockProcessed=False lumisBlock=blockLumiPair[1] for lumi in lumisBlock: if lumi in lumisOutput: BlockProcessed=True if BlockProcessed==False: BlocksNotProcessed.append(blockLumiPair[0]) return [x.encode() for x in BlocksNotProcessed] def getListUnprocessedBlocks(url, workflow): wfInfo = WorkflowWithInput(workflow) outputDataSets = wfInfo.outputDatasets inputDataset = wfInfo.inputDatasets lumisOutput = getLumisInDataset(outputDataSets[0]) BlockLumisInput = getBlocksLumi(inputDataset) BlocksNotProcessed = getBlocksNotProcessed(lumisOutput, BlockLumisInput) return BlocksNotProcessed def main(): args=sys.argv[1:] if not len(args)==1: print "usage:unprocessedBlocks workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' BlocksNotProcessed=getListUnprocessedBlocks(url, workflow) print "Number of blocks not processed", str(len(BlocksNotProcessed)) print [x.encode() for x in BlocksNotProcessed]
def checkDatasetExistenceDAS(dataset): query="dataset dataset="+dataset+" status=*" das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] if len(result['data'])==0: return False else: return True
def getFileNames (event): files = [] # Query DAS query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event jsondict = das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data']
def query(query_str, verbose=False): 'simple query function to interface with DAS, better than using Popen as everything is handled by python' if verbose: print 'querying DAS with: "%s"' % query_str data = get_data('https://cmsweb.cern.ch', query_str, 0, 0, False) to_get = query_str.split()[0].strip(',') if data['status'] != 'ok': raise RuntimeError('Das query crashed') #-1 works both when getting dataset from files and files from datasets, #not checked on everything return [i[to_get][-1]['name'] for i in data['data']]
def autocomplete_Datasets(data): result_array = [] for element in data: if '*' in element: jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element, 0, 0, 0) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) for i in range(len(jsondict['data'])): result_array.append(jsondict['data'][i]['dataset'][0]['name']) else: result_array.append(element) return result_array
def listFichier(theRelease, theSample, dataTier, tag): theQuery = "file dataset=/"+theSample+"*/"+theRelease+"-"+tag+"*/"+dataTier jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) data = jsondict['data'] theFileList = [] for aRaw in data: recordStuff = aRaw['file'] for aRecord in recordStuff: print "theFile===",aRecord['name'] theFileList.append(aRecord['name']) return theFileList
def getNumberOfEvents(dataset): query = "file dataset=" + dataset + " | sum(file.nevents)" data = das.get_data(query) if isinstance(data, basestring): dasjson = json.loads(data) else: dasjson = data status = dasjson.get('status') if status == 'ok': data = dasjson.get('data') sumevents=0 for idata in data: sumevents+=idata.get('result').get('value') return sumevents
def runInDataset(url, run, dataset): query = "file run=" + str(run) + " dataset=" + dataset output = {} das_data = get_data(das_host, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: if len(result['data']) > 0: return True else: return False
def runInDataset(url, run, dataset): query="file run="+str(run)+ " dataset="+dataset output = {} das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])>0: return True else: return False
def getNumberOfEvents(dataset): query = "file dataset=" + dataset + " | sum(file.nevents)" data = das.get_data(query) if isinstance(data, basestring): dasjson = json.loads(data) else: dasjson = data status = dasjson.get('status') if status == 'ok': data = dasjson.get('data') sumevents = 0 for idata in data: sumevents += idata.get('result').get('value') return sumevents
def getRunsInDataset(das_url, dataset): query = "run dataset=" + dataset das_data = get_data(das_url, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: runs = [] preresult = result['data'] for run in preresult: runs.append(run['run'][0]['run_number']) return runs
def listFichier(theRelease, theSample, dataTier, tag): theQuery = "file dataset=/" + theSample + "*/" + theRelease + "-" + tag + "*/" + dataTier jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s" % (status) data = jsondict['data'] theFileList = [] for aRaw in data: recordStuff = aRaw['file'] for aRecord in recordStuff: print "theFile===", aRecord['name'] theFileList.append(aRecord['name']) return theFileList
def getNumberOfEvents(dataset): query = "file dataset=" + dataset + " | sum(file.nevents)" data = das.get_data(query) if isinstance(data, basestring): dasjson = json.loads(data) else: dasjson = data status = dasjson.get("status") if status == "ok": data = dasjson.get("data") sumevents = 0 for idata in data: sumevents += idata.get("result").get("value") return sumevents
def getRunsInDataset(das_url, dataset): query="run dataset="+dataset das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: runs=[] preresult=result['data'] for run in preresult: runs.append(run['run'][0]['run_number']) return runs
def autocomplete_Datasets(data): result_array =[] ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/' for element in data: if '*' in element: jsondict = get_data('https://cmsweb.cern.ch',"dataset="+element,0,0,0,300,ckey,cert) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) for i in range(len(jsondict['data'])): result_array.append(jsondict['data'][i]['dataset'][0]['name']) else: result_array.append(element) return result_array
def get_data(self, dataset, queryobject = None, query_aggregation=None): if queryobject is not None: query = queryobject + " " else: query = '' query += "dataset=%s " % dataset if query_aggregation is not None: query += " | %s" % query_aggregation jsondict = das_client.get_data( self.opts.host, query, self.opts.idx, self.opts.limit, self.opts.verbose, self.opts.threshold, self.opts.ckey, self.opts.cert) return jsondict
def query(query_str, verbose=False): 'simple query function to interface with DAS, better than using Popen as everything is handled by python' if verbose: print('querying DAS with: "%s"' % query_str) data = get_data( 'https://cmsweb.cern.ch', query_str, 0, 0, False) to_get = query_str.split()[0].strip(',') if data['status'] != 'ok': raise RuntimeError('Das query crashed') #-1 works both when getting dataset from files and files from datasets, #not checked on everything return [i[to_get][-1]['name'] for i in data['data']]
def getEventsRun(das_url, dataset, run): query="summary dataset dataset="+dataset+" run="+str(run)+" | grep grep summary.nevents" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return 0 preresult=result['data'][0]['summary'] for key in preresult: if 'nevents' in key: return key['nevents'] return -1
def getEventCountDataSet(das_url, dataset): query="dataset dataset="+dataset+" status=* | grep dataset.nevents" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return 0 preresult=result['data'][0]['dataset'] for key in preresult: if 'nevents' in key: return key['nevents'] return -1
def listFichier(theRun, theSample, dataTier,runNumberRAW): runNumber = putRunInShape(runNumberRAW) theQuery = "file dataset=/"+theSample+"*/"+theRun+"*/"+dataTier jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) data = jsondict['data'] theFileList = [] for aRaw in data: recordStuff = aRaw['file'] for aRecord in recordStuff: # if ((len(re.split("251/244",aRecord['name']))>1) or (len(re.split("251/252",aRecord['name']))>1) or (len(re.split("251/562",aRecord['name']))>1) or (len(re.split("251/643",aRecord['name']))>1) or (len(re.split("251/638",aRecord['name']))>1) or (len(re.split("251/721",aRecord['name']))>1) or (len(re.split("251/718",aRecord['name']))>1)): if ((len(re.split(runNumber,aRecord['name']))>1)): theFileList.append(aRecord['name']) print "\""+aRecord['name']+"\"," return theFileList
def getSizeAtSizeDAS(dataset, site): query = "site dataset=" + dataset das_data = get_data(das_host, query, 0, 0, 0) myStatus = '' if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'ERROR: DAS query failed with reason:', result['reason'] sys.exit(0) else: preresult = result['data'] for key in preresult: if key['site'][0]['name'] == site: return key['site'][0]['dataset_fraction'] return 'Unknown'
def getEventCountDataSet(das_url, dataset): query = "dataset dataset=" + dataset + " status=* | grep dataset.nevents" das_data = get_data(das_url, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query' + query + ' failed with reason:', result['reason'] else: if len(result['data']) == 0: #dataset not yet registered in DBS return 0 preresult = result['data'][0]['dataset'] for key in preresult: if 'nevents' in key: return key['nevents'] return -1
def hasAllBlocksClosed(dataset): """ checks if a given dataset has all blocks closed and can be used as input """ query = "block dataset=" + dataset das_data = get_data(das_host, query, 0, 0, 0)['data'] #traverse blocks for ds in das_data: #print 'block', ds['block'][0]['name'] for block in ds['block']: #print ' is_open', block['is_open'] if 'is_open' in block else "?" if 'is_open' not in block: pass elif block['is_open'] == 'y': return False return True
def getSizeAtSizeDAS(dataset, site): query = "site dataset="+dataset das_data = get_data(das_host,query,0,0,0) myStatus = '' if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'ERROR: DAS query failed with reason:',result['reason'] sys.exit(0) else: preresult = result['data'] for key in preresult: if key['site'][0]['name'] == site: return key['site'][0]['dataset_fraction'] return 'Unknown'
def hasAllBlocksClosed(dataset): """ checks if a given dataset has all blocks closed and can be used as input """ query="block dataset="+dataset das_data = get_data(das_host,query,0,0,0)['data'] #traverse blocks for ds in das_data: #print 'block', ds['block'][0]['name'] for block in ds['block']: #print ' is_open', block['is_open'] if 'is_open' in block else "?" if 'is_open' not in block: pass elif block['is_open'] == 'y': return False return True
def autocomplete_Datasets(data): result_array = [] ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/' for element in data: if '*' in element: jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element, 0, 0, 0, 300, ckey, cert) #print json.dumps(jsondict, indent=4, sort_keys=True) #print json.dumps(jsondict['data'], indent=4, sort_keys=True) for i in range(len(jsondict['data'])): result_array.append(jsondict['data'][i]['dataset'][0]['name']) else: result_array.append(element) return result_array
def __getData( self, dasQuery, dasLimit = 0 ): dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False ) if isinstance(dasData, str): jsondict = json.loads( dasData ) else: jsondict = dasData # Check, if the DAS query fails try: error = self.__findInJson(jsondict,["data","error"]) except KeyError: error = None if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict: msg = ("The DAS query returned a error. Here is the output\n" + str(jsondict) + "\nIt's possible that this was a server error. If so, it may work if you try again later") raise AllInOneError(msg) return self.__findInJson(jsondict,"data")
def getRunsDataset(das_url, dataset): runList = [] query = "run dataset=" + dataset + "| grep run.run_number" das_data = get_data(das_url, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: if len(result['data']) == 0: #dataset not yet registered in DBS return runList preresult = result['data'] for key in preresult: if 'run' in key: runList.append(key['run'][0]['run_number']) return runList
def getRunLumiCountDataset(das_url, dataset): lumis = 0 query = "summary dataset=" + dataset + " | grep summary.nlumis" das_data = get_data(das_url, query, 0, 0, 0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail': print 'DAS query failed with reason:', result['reason'] else: if len(result['data']) == 0: #dataset not yet registered in DBS return 0 preresult = result['data'][0] for key in preresult: if 'summary' in key: return preresult['summary'][0]['nlumis'] return -1
def listFichier(theRun, theSample, dataTier, runNumberRAW): runNumber = putRunInShape(runNumberRAW) theQuery = "file dataset=/" + theSample + "*/" + theRun + "*/" + dataTier jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s" % (status) data = jsondict['data'] theFileList = [] for aRaw in data: recordStuff = aRaw['file'] for aRecord in recordStuff: # if ((len(re.split("251/244",aRecord['name']))>1) or (len(re.split("251/252",aRecord['name']))>1) or (len(re.split("251/562",aRecord['name']))>1) or (len(re.split("251/643",aRecord['name']))>1) or (len(re.split("251/638",aRecord['name']))>1) or (len(re.split("251/721",aRecord['name']))>1) or (len(re.split("251/718",aRecord['name']))>1)): if ((len(re.split(runNumber, aRecord['name'])) > 1)): theFileList.append(aRecord['name']) print "\"" + aRecord['name'] + "\"," return theFileList
def getDASData (query): results = [] jsondict = das_client.get_data ('https://cmsweb.cern.ch', query, 0, 0, False) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return results mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] for row in data: result = [r for r in das_client.get_value (row, filters['grep'])][0] if len (result) > 0 and not result in results: results.append (result) return results
def getEventsBlock(das_url, block_name): query="block="+block_name+" | grep block.nevents" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return 0 preresult=result['data'][0]['block'] for key in preresult: if 'nevents' in key: return key['nevents'] return -1 return 0
def getRunsDataset(das_url, dataset): runList=[] query="run dataset="+dataset+"| grep run.run_number" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return runList preresult=result['data'] for key in preresult: if 'run' in key: runList.append(key['run'][0]['run_number']) return runList
def getRunLumiCountDatasetRun(das_url, dataset, run): lumis=0 query="summary dataset="+dataset+" run="+str(run)+" | sum(summary.nlumis)" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return 0 preresult=result['data'] for key in preresult: if 'result' in key: return key['result']['value'] return -1
def getRunLumiCountDatasetListLimited(das_url,dataset, runlist): lumis=0 query="summary dataset="+dataset+" run in "+str(runlist)+ " | grep summary.nlumis" das_data = get_data(das_url,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: if len(result['data'])==0:#dataset not yet registered in DBS return 0 preresult=result['data'][0] for key in preresult: if 'summary' in key: return preresult['summary'][0]['nlumis'] return -1
def getLumisInDataset(dataset): lumis=[] query="run lumi dataset="+dataset das_data = get_data(das_host,query,0,0,0) if isinstance(das_data, basestring): result = json.loads(das_data) else: result = das_data if result['status'] == 'fail' : print 'DAS query failed with reason:',result['reason'] else: #preresult=result['data'][0]['lumi'][[0]['number'][0] #print result preresult=result['data'][0]['lumi'][0]['number'] for lumiRange in preresult: if lumiRange[0]==lumiRange[1]: lumis.append(lumiRange[0]) else: lumis=lumis+range(lumiRange[0], lumiRange[1]) return lumis