Exemple #1
0
def dasFileQuery(dataset):
  query   = 'dataset dataset=%s' % dataset
  host    = 'https://cmsweb.cern.ch'      # default
  idx     = 0                             # default
  limit   = 0                             # unlimited
  debug   = 0                             # default
  thr     = 300                           # default
  ckey    = ""                            # default
  cert    = ""                            # default
  jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert)

  # check if the pattern matches none, many, or one dataset
  if not jsondict['data'] or not jsondict['data'][0]['dataset']:
    sys.stderr.write('Error: the pattern "%s" does not match any dataset\n' % dataset)
    sys.exit(1)
    return []
  elif len(jsondict['data']) > 1:
    sys.stderr.write('Error: the pattern "%s" matches multiple datasets\n' % dataset)
    for d in jsondict['data']:
      sys.stderr.write('    %s\n' % d['dataset'][0]['name'])
    sys.exit(1)
    return []
  else:
    # expand the dataset name
    dataset = jsondict['data'][0]['dataset'][0]['name']
    query = 'file dataset=%s' % dataset
    jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey, cert)
    # parse the results in JSON format, and extract the list of files
    files = sorted( f['file'][0]['name'] for f in jsondict['data'] )
    return files
Exemple #2
0
def dasFileQuery(dataset):
    query = 'dataset dataset=%s' % dataset
    host = 'https://cmsweb.cern.ch'  # default
    idx = 0  # default
    limit = 0  # unlimited
    debug = 0  # default
    thr = 300  # default
    ckey = ""  # default
    cert = ""  # default
    jsondict = das_client.get_data(host, query, idx, limit, debug, thr, ckey,
                                   cert)

    # check if the pattern matches none, many, or one dataset
    if not jsondict['data'] or not jsondict['data'][0]['dataset']:
        sys.stderr.write(
            'Error: the pattern "%s" does not match any dataset\n' % dataset)
        sys.exit(1)
        return []
    elif len(jsondict['data']) > 1:
        sys.stderr.write(
            'Error: the pattern "%s" matches multiple datasets\n' % dataset)
        for d in jsondict['data']:
            sys.stderr.write('    %s\n' % d['dataset'][0]['name'])
        sys.exit(1)
        return []
    else:
        # expand the dataset name
        dataset = jsondict['data'][0]['dataset'][0]['name']
        query = 'file dataset=%s' % dataset
        jsondict = das_client.get_data(host, query, idx, limit, debug, thr,
                                       ckey, cert)
        # parse the results in JSON format, and extract the list of files
        files = sorted(f['file'][0]['name'] for f in jsondict['data'])
        return files
Exemple #3
0
 def __getData(self, dasQuery, dasLimit=0):
     dasData = das_client.get_data('https://cmsweb.cern.ch', dasQuery, 0,
                                   dasLimit, False)
     if isinstance(dasData, str):
         jsondict = json.loads(dasData)
     else:
         jsondict = dasData
     # Check, if the DAS query fails
     try:
         error = self.__findInJson(jsondict, ["data", "error"])
     except KeyError:
         error = None
     if error or self.__findInJson(
             jsondict, "status") != 'ok' or "data" not in jsondict:
         jsonstr = str(jsondict)
         if len(jsonstr) > 10000:
             jsonfile = "das_query_output_%i.txt"
             i = 0
             while os.path.lexists(jsonfile % i):
                 i += 1
             jsonfile = jsonfile % i
             theFile = open(jsonfile, "w")
             theFile.write(jsonstr)
             theFile.close()
             msg = "The DAS query returned an error.  The output is very long, and has been stored in:\n" + jsonfile
         else:
             msg = "The DAS query returned a error.  Here is the output\n" + jsonstr
         msg += "\nIt's possible that this was a server error.  If so, it may work if you try again later"
         raise AllInOneError(msg)
     return self.__findInJson(jsondict, "data")
def duplicateLumi(dataset):
    """
    checks if output dataset has a duplicate lumi
    """
    #registry of lumis checked, better a set
    lumisChecked=set()
    #get dtaset info frm das
    query="file lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
        preresult=result['data']
    #check each file    
    for filename in preresult:
        newLumis=filename['lumi'][0]['number']
        #for each file we check each lumi range.
        for lumiRange in newLumis:
            newlumiRange=[lumiRange[0]]
            if lumiRange[0]<lumiRange[1]:
                newlumiRange=range(lumiRange[0], lumiRange[1])
            #check each lumi, if its in the lumiset            
            for lumi in newlumiRange:
                if lumi in lumisChecked:
                    return True
                else:
                    lumisChecked.add(lumi)
    return False
Exemple #5
0
def dasQuery(query, limit = 0):
    global das_client
    try:
        result = das_client.get_data('https://cmsweb.cern.ch', query, 0, limit, False, 300, '', '')
        return result['data']
    except:
        return []
Exemple #6
0
 def __getData( self, dasQuery, dasLimit = 0 ):
     dasData = das_client.get_data( 'https://cmsweb.cern.ch',
                                    dasQuery, 0, dasLimit, False )
     if isinstance(dasData, str):
         jsondict = json.loads( dasData )
     else:
         jsondict = dasData
     # Check, if the DAS query fails
     try:
         error = self.__findInJson(jsondict,["data","error"])
     except KeyError:
         error = None
     if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
         jsonstr = str(jsondict)
         if len(jsonstr) > 10000:
             jsonfile = "das_query_output_%i.txt"
             i = 0
             while os.path.lexists(jsonfile % i):
                 i += 1
             jsonfile = jsonfile % i
             theFile = open( jsonfile, "w" )
             theFile.write( jsonstr )
             theFile.close()
             msg = "The DAS query returned an error.  The output is very long, and has been stored in:\n" + jsonfile
         else:
             msg = "The DAS query returned a error.  Here is the output\n" + jsonstr
         msg += "\nIt's possible that this was a server error.  If so, it may work if you try again later"
         raise AllInOneError(msg)
     return self.__findInJson(jsondict,"data")
def getFilesFromDAS(release, runtype, globalTag):
    '''Get proxy with "voms-proxy-init -voms cms" to use this option.'''
    print "Getting files from DAS. May take a while...."
    host = 'https://cmsweb.cern.ch'
    capath = '/etc/grid-security/certificates'

    query = "file dataset=/*{0}*/*{1}*{2}*/MINIAODSIM".format(
        runtype,
        release,
        globalTag,
    )
    output = get_data(host=host,
                      query=query,
                      idx=0,
                      limit=0,
                      debug=0,
                      cert=x509(),
                      capath=capath)
    files = []

    for entry in output["data"]:
        file = "root://cms-xrd-global.cern.ch/" + str(entry["file"][0]["name"])
        if "/".join([release, runtype, "MINIAODSIM", globalTag]) in file:
            files.append(file)

    return files
Exemple #8
0
def duplicateRunLumi(dataset):
    """
    checks if output dataset has duplicate lumis
    for every run.
    """
    RunlumisChecked = {}
    query = "file run lumi dataset=" + dataset
    das_data = get_data(das_host, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        preresult = result['data']
    #check ever file in dataset
    for filename in preresult:
        run = filename['run'][0]['run_number']
        #add run if new
        if run not in RunlumisChecked:
            RunlumisChecked[run] = set()
        newLumis = filename['lumi'][0]['number']
        #check every lumi on range
        for lumiRange in newLumis:
            newlumiRange = range(lumiRange[0], lumiRange[1] + 1)
            for lumi in newlumiRange:
                #if already checked in the same run
                if lumi in RunlumisChecked[run]:
                    return True
                else:
                    RunlumisChecked[run].add(lumi)
    return False
Exemple #9
0
def duplicateLumi(dataset):
    """
    checks if output dataset has a duplicate lumi
    """
    #registry of lumis checked, better a set
    lumisChecked = set()
    #get dtaset info frm das
    query = "file lumi dataset=" + dataset
    das_data = get_data(das_host, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        preresult = result['data']
    #check each file
    for filename in preresult:
        newLumis = filename['lumi'][0]['number']
        #for each file we check each lumi range.
        for lumiRange in newLumis:
            newlumiRange = [lumiRange[0]]
            if lumiRange[0] < lumiRange[1]:
                newlumiRange = range(lumiRange[0], lumiRange[1])
            #check each lumi, if its in the lumiset
            for lumi in newlumiRange:
                if lumi in lumisChecked:
                    return True
                else:
                    lumisChecked.add(lumi)
    return False
Exemple #10
0
def autocomplete_Datasets(data):
    result_array = []
    ckey = x509()  #os.path.join(os.environ['HOME'], '.globus/userkey.pem')
    cert = x509()  #os.path.join(os.environ['HOME'], '.globus/usercert.pem')
    cpath = '/etc/grid-security/certificates/'  #os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/'

    for element in data:
        if '*' in element:
            jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element,
                                0, 0, 0, 300, ckey, cert, cpath)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            try:
                for i in range(len(jsondict['data'])):
                    result_array.append(
                        jsondict['data'][i]['dataset'][0]['name'])
            except:
                print '=' * 10
                print 'Not found', element
                print '=' * 10
        else:
            result_array.append(element)
    if len(result_array) == 0:
        print "No samples found going to exit"
        sys.exit(0)
    return result_array
def duplicateRunLumi(dataset):
    RunlumisChecked={}
    query="file run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	preresult=result['data'] 
    for filename in preresult:
     	run=filename['run'][0]['run_number']
	if run not in RunlumisChecked:
		RunlumisChecked[run]=[]
	newLumis=filename['lumi'][0]['number']
	for lumiRange in newLumis:
			newlumiRange=range(lumiRange[0], lumiRange[1]+1)
			for lumi in newlumiRange:
				if lumi in RunlumisChecked[run]:
					return True
				else:
					RunlumisChecked[run].append(lumi)
    return False
Exemple #12
0
def duplicateRunLumi(dataset):
    """
    checks if output dataset has duplicate lumis
    for every run.
    """
    RunlumisChecked={}
    query="file run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
        preresult=result['data'] 
    #check ever file in dataset    
    for filename in preresult:
        run=filename['run'][0]['run_number']
        #add run if new
        if run not in RunlumisChecked:
            RunlumisChecked[run]=set()
        newLumis=filename['lumi'][0]['number']
        #check every lumi on range        
        for lumiRange in newLumis:
            newlumiRange=range(lumiRange[0], lumiRange[1]+1)
            for lumi in newlumiRange:
                #if already checked in the same run
                if lumi in RunlumisChecked[run]:
                    return True
                else:
                    RunlumisChecked[run].add(lumi)
    return False
Exemple #13
0
def duplicateRunLumi(dataset):
    RunlumisChecked={}
    query="file run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	preresult=result['data'] 
    for filename in preresult:
     	run=filename['run'][0]['run_number']
	if run not in RunlumisChecked:
		RunlumisChecked[run]=[]
	newLumis=filename['lumi'][0]['number']
	for lumiRange in newLumis:
			newlumiRange=range(lumiRange[0], lumiRange[1]+1)
			for lumi in newlumiRange:
				if lumi in RunlumisChecked[run]:
					return True
				else:
					RunlumisChecked[run].append(lumi)
    return False
Exemple #14
0
 def __getData( self, dasQuery, dasLimit = 0 ):
     dasData = das_client.get_data( 'https://cmsweb.cern.ch',
                                    dasQuery, 0, dasLimit, False )
     jsondict = json.loads( dasData )
     # Check, if the DAS query fails
     if jsondict["status"] != 'ok':
         msg = "Status not 'ok', but:", jsondict["status"]
         raise AllInOneError(msg)
     return jsondict["data"]
Exemple #15
0
 def __getData(self, dasQuery, dasLimit=0):
     dasData = das_client.get_data('https://cmsweb.cern.ch', dasQuery, 0,
                                   dasLimit, False)
     jsondict = json.loads(dasData)
     # Check, if the DAS query fails
     if jsondict["status"] != 'ok':
         msg = "Status not 'ok', but:", jsondict["status"]
         raise AllInOneError(msg)
     return jsondict["data"]
def dasQuery(queryString, entryTitle) :
    import das_client
    dasinfo = das_client.get_data('https://cmsweb.cern.ch', queryString, 0, 0, False)
    if dasinfo['status'] != 'ok' :
        raise Exception('DAS query failed.\nQuery: %s\nDAS Status returned: %s' % (queryString, dasinfo['status']))

    for entry in dasinfo['data'] :
        if len(entry[entryTitle]) > 0 :
            yield entry[entryTitle][0]
def getLumisInDataset(dataset):
    lumis=[]
    query="run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
    #preresult=result['data'][0]['lumi'][[0]['number'][0]
    #print result    
    preresult=result['data'][0]['lumi'][0]['number']
    for lumiRange in preresult:
        if lumiRange[0]==lumiRange[1]:
            lumis.append(lumiRange[0])
        else:
            lumis=lumis+range(lumiRange[0], lumiRange[1])
    return lumis

def getBlocksNotProcessed(lumisOutput, BlockLumisInput):
    BlocksNotProcessed=[]
    for blockLumiPair in BlockLumisInput:
        BlockProcessed=False
        lumisBlock=blockLumiPair[1]
        for lumi in lumisBlock:
            if lumi in lumisOutput:
                BlockProcessed=True
        if BlockProcessed==False:
            BlocksNotProcessed.append(blockLumiPair[0])
    return [x.encode() for x in BlocksNotProcessed]


def getListUnprocessedBlocks(url, workflow):
    wfInfo = WorkflowWithInput(workflow)
    outputDataSets = wfInfo.outputDatasets
    inputDataset = wfInfo.inputDatasets
    lumisOutput = getLumisInDataset(outputDataSets[0])
    BlockLumisInput = getBlocksLumi(inputDataset)
    BlocksNotProcessed = getBlocksNotProcessed(lumisOutput, BlockLumisInput)
    return BlocksNotProcessed

def main():
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage:unprocessedBlocks workflowname"
        sys.exit(0)
    workflow=args[0]
    url='cmsweb.cern.ch'
    BlocksNotProcessed=getListUnprocessedBlocks(url, workflow)
    print "Number of blocks not processed", str(len(BlocksNotProcessed))
    print [x.encode() for x in BlocksNotProcessed]
    sys.exit(0);

if __name__ == "__main__":
    main()
def getNumberofFilesPerRun(das_url, dataset, run):
	query="file dataset="+dataset+" run="+str(run)+" | count(file.name)"
	das_data = get_data(das_url,query,0,0,0)
   	if isinstance(das_data, basestring):
        	result = json.loads(das_data)
	else:
        	result = das_data
	if result['status'] == 'fail' :
        	print 'DAS query failed with reason:',result['reason']
	else:
		return result['data'][0]['result']['value']
Exemple #19
0
def getNumberofFilesPerRun(das_url, dataset, run):
	query="file dataset="+dataset+" run="+str(run)+" | count(file.name)"
	das_data = get_data(das_url,query,0,0,0)
   	if isinstance(das_data, basestring):
        	result = json.loads(das_data)
	else:
        	result = das_data
	if result['status'] == 'fail' :
        	print 'DAS query failed with reason:',result['reason']
	else:
		return result['data'][0]['result']['value']
Exemple #20
0
def autocomplete_Datasets(data):
    result_array =[]
    for element in data:
        if '*' in element:
            jsondict = get_data('https://cmsweb.cern.ch',"dataset="+element,0,0,0)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            for i in range(len(jsondict['data'])):
                result_array.append(jsondict['data'][i]['dataset'][0]['name'])
        else:
            result_array.append(element)
    return result_array
Exemple #21
0
def dasQuery(queryString, entryTitle):
    dasinfo = das_client.get_data('https://cmsweb.cern.ch', queryString, 0, 0,
                                  False)
    if dasinfo['status'] != 'ok':
        raise Exception(
            'DAS query failed.\nQuery: %s\nDAS Status returned: %s' %
            (queryString, dasinfo['status']))

    if len(dasinfo['data']) > 0:
        for entry in dasinfo['data']:
            if entryTitle in entry and len(entry[entryTitle]) > 0:
                yield entry[entryTitle][0]
def getLumisInDataset(dataset):
    lumis=[]
    query="run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
    #preresult=result['data'][0]['lumi'][[0]['number'][0]
    #print result    
    preresult=result['data'][0]['lumi'][0]['number']
    for lumiRange in preresult:
        if lumiRange[0]==lumiRange[1]:
            lumis.append(lumiRange[0])
        else:
            lumis=lumis+range(lumiRange[0], lumiRange[1])
    return lumis

def getBlocksNotProcessed(lumisOutput, BlockLumisInput):
    BlocksNotProcessed=[]
    for blockLumiPair in BlockLumisInput:
        BlockProcessed=False
        lumisBlock=blockLumiPair[1]
        for lumi in lumisBlock:
            if lumi in lumisOutput:
                BlockProcessed=True
        if BlockProcessed==False:
            BlocksNotProcessed.append(blockLumiPair[0])
    return [x.encode() for x in BlocksNotProcessed]


def getListUnprocessedBlocks(url, workflow):
    wfInfo = WorkflowWithInput(workflow)
    outputDataSets = wfInfo.outputDatasets
    inputDataset = wfInfo.inputDatasets
    lumisOutput = getLumisInDataset(outputDataSets[0])
    BlockLumisInput = getBlocksLumi(inputDataset)
    BlocksNotProcessed = getBlocksNotProcessed(lumisOutput, BlockLumisInput)
    return BlocksNotProcessed

def main():
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage:unprocessedBlocks workflowname"
        sys.exit(0)
    workflow=args[0]
    url='cmsweb.cern.ch'
    BlocksNotProcessed=getListUnprocessedBlocks(url, workflow)
    print "Number of blocks not processed", str(len(BlocksNotProcessed))
    print [x.encode() for x in BlocksNotProcessed]
def checkDatasetExistenceDAS(dataset):
    query="dataset dataset="+dataset+" status=*"
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    if len(result['data'])==0:
	return False
    else:
	return True
Exemple #24
0
def getFileNames (event):
    files = []
    # Query DAS
    query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
    jsondict = das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
        return files
    
    mongo_query = jsondict['mongo_query']
    filters = mongo_query['filters']
    data = jsondict['data']
Exemple #25
0
def query(query_str, verbose=False):
    'simple query function to interface with DAS, better than using Popen as everything is handled by python'
    if verbose:
        print 'querying DAS with: "%s"' % query_str
    data = get_data('https://cmsweb.cern.ch', query_str, 0, 0, False)

    to_get = query_str.split()[0].strip(',')
    if data['status'] != 'ok':
        raise RuntimeError('Das query crashed')

    #-1 works both when getting dataset from files and files from datasets,
    #not checked on everything
    return [i[to_get][-1]['name'] for i in data['data']]
Exemple #26
0
def autocomplete_Datasets(data):
    result_array = []
    for element in data:
        if '*' in element:
            jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element,
                                0, 0, 0)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            for i in range(len(jsondict['data'])):
                result_array.append(jsondict['data'][i]['dataset'][0]['name'])
        else:
            result_array.append(element)
    return result_array
def listFichier(theRelease, theSample, dataTier, tag):
	theQuery = "file dataset=/"+theSample+"*/"+theRelease+"-"+tag+"*/"+dataTier
	jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False)
	status = jsondict['status']
	if status != 'ok':
		print "DAS query status: %s"%(status)
	data = jsondict['data']
	theFileList = []
	for aRaw in data:
		recordStuff = aRaw['file']
   		for aRecord in recordStuff:
        		print "theFile===",aRecord['name']
        		theFileList.append(aRecord['name'])
   	return theFileList
def getNumberOfEvents(dataset):
  query = "file dataset=" + dataset + " | sum(file.nevents)"

  data = das.get_data(query)
  if isinstance(data, basestring):
    dasjson = json.loads(data)
  else:
    dasjson = data
  status  = dasjson.get('status')
  if  status == 'ok':
    data = dasjson.get('data')
    sumevents=0
    for idata in data:
      sumevents+=idata.get('result').get('value')
    return sumevents
def runInDataset(url, run, dataset):
    query = "file run=" + str(run) + " dataset=" + dataset
    output = {}
    das_data = get_data(das_host, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        if len(result['data']) > 0:
            return True
        else:
            return False
def runInDataset(url, run, dataset):
    query="file run="+str(run)+ " dataset="+dataset
    output = {}
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
    	if len(result['data'])>0:
		return True
	else:
		return False
def getNumberOfEvents(dataset):
    query = "file dataset=" + dataset + " | sum(file.nevents)"

    data = das.get_data(query)
    if isinstance(data, basestring):
        dasjson = json.loads(data)
    else:
        dasjson = data
    status = dasjson.get('status')
    if status == 'ok':
        data = dasjson.get('data')
        sumevents = 0
        for idata in data:
            sumevents += idata.get('result').get('value')
        return sumevents
Exemple #32
0
def getRunsInDataset(das_url, dataset):
    query = "run dataset=" + dataset
    das_data = get_data(das_url, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        runs = []
        preresult = result['data']
        for run in preresult:
            runs.append(run['run'][0]['run_number'])
        return runs
def listFichier(theRelease, theSample, dataTier, tag):
    theQuery = "file dataset=/" + theSample + "*/" + theRelease + "-" + tag + "*/" + dataTier
    jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0,
                                   False)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s" % (status)
    data = jsondict['data']
    theFileList = []
    for aRaw in data:
        recordStuff = aRaw['file']
        for aRecord in recordStuff:
            print "theFile===", aRecord['name']
            theFileList.append(aRecord['name'])
    return theFileList
def getNumberOfEvents(dataset):
    query = "file dataset=" + dataset + " | sum(file.nevents)"

    data = das.get_data(query)
    if isinstance(data, basestring):
        dasjson = json.loads(data)
    else:
        dasjson = data
    status = dasjson.get("status")
    if status == "ok":
        data = dasjson.get("data")
        sumevents = 0
        for idata in data:
            sumevents += idata.get("result").get("value")
        return sumevents
Exemple #35
0
def getRunsInDataset(das_url, dataset):
	query="run dataset="+dataset
  	das_data = get_data(das_url,query,0,0,0)
   	if isinstance(das_data, basestring):
        	result = json.loads(das_data)
	else:
        	result = das_data
	if result['status'] == 'fail' :
        	print 'DAS query failed with reason:',result['reason']
	else:
		runs=[]
		preresult=result['data']
		for run in preresult:
			runs.append(run['run'][0]['run_number'])
		return runs
Exemple #36
0
def autocomplete_Datasets(data):
    result_array =[]
    ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
    cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')
    os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/'

    for element in data:
        if '*' in element:
            jsondict = get_data('https://cmsweb.cern.ch',"dataset="+element,0,0,0,300,ckey,cert)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            for i in range(len(jsondict['data'])):
                result_array.append(jsondict['data'][i]['dataset'][0]['name'])
        else:
            result_array.append(element)
    return result_array
Exemple #37
0
 def get_data(self, dataset, queryobject = None, query_aggregation=None):
     if queryobject is not None:
         query = queryobject + " "
     else: query = ''
     query += "dataset=%s " % dataset
     if query_aggregation is not None:
         query += " | %s" % query_aggregation
     jsondict = das_client.get_data( self.opts.host,
                                     query,
                                     self.opts.idx,
                                     self.opts.limit,
                                     self.opts.verbose,
                                     self.opts.threshold,
                                     self.opts.ckey,
                                     self.opts.cert)
     return jsondict
Exemple #38
0
def query(query_str, verbose=False):
   'simple query function to interface with DAS, better than using Popen as everything is handled by python'
   if verbose:
      print('querying DAS with: "%s"' % query_str)
   data = get_data(
      'https://cmsweb.cern.ch', 
      query_str,
      0, 0, False)

   to_get = query_str.split()[0].strip(',')
   if data['status'] != 'ok':
      raise RuntimeError('Das query crashed')

   #-1 works both when getting dataset from files and files from datasets, 
   #not checked on everything
   return [i[to_get][-1]['name'] for i in data['data']]
def getEventsRun(das_url, dataset, run):
    query="summary dataset dataset="+dataset+" run="+str(run)+" | grep grep summary.nevents"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]['summary']
	for key in preresult:
		if 'nevents' in key:
			return key['nevents']		
	return -1
Exemple #40
0
def getEventsRun(das_url, dataset, run):
    query="summary dataset dataset="+dataset+" run="+str(run)+" | grep grep summary.nevents"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]['summary']
	for key in preresult:
		if 'nevents' in key:
			return key['nevents']		
	return -1
Exemple #41
0
def getEventCountDataSet(das_url, dataset):
    query="dataset dataset="+dataset+"  status=* | grep dataset.nevents"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]['dataset']
	for key in preresult:
		if 'nevents' in key:
			return key['nevents']		
	return -1
def listFichier(theRun, theSample, dataTier,runNumberRAW):
    runNumber = putRunInShape(runNumberRAW)
    theQuery = "file dataset=/"+theSample+"*/"+theRun+"*/"+dataTier
    jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0, False)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
    data = jsondict['data']
    theFileList = []
    for aRaw in data:
        recordStuff = aRaw['file']
        for aRecord in recordStuff:
            #                if ((len(re.split("251/244",aRecord['name']))>1) or (len(re.split("251/252",aRecord['name']))>1) or (len(re.split("251/562",aRecord['name']))>1) or (len(re.split("251/643",aRecord['name']))>1) or (len(re.split("251/638",aRecord['name']))>1) or (len(re.split("251/721",aRecord['name']))>1) or (len(re.split("251/718",aRecord['name']))>1)):
            if ((len(re.split(runNumber,aRecord['name']))>1)):
                theFileList.append(aRecord['name'])
                print "\""+aRecord['name']+"\","
    return theFileList
Exemple #43
0
def getSizeAtSizeDAS(dataset, site):
    query = "site dataset=" + dataset
    das_data = get_data(das_host, query, 0, 0, 0)
    myStatus = ''
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
        if result['status'] == 'fail':
            print 'ERROR: DAS query failed with reason:', result['reason']
            sys.exit(0)
        else:
            preresult = result['data']
            for key in preresult:
                if key['site'][0]['name'] == site:
                    return key['site'][0]['dataset_fraction']
    return 'Unknown'
Exemple #44
0
def getEventCountDataSet(das_url, dataset):
    query = "dataset dataset=" + dataset + "  status=* | grep dataset.nevents"
    das_data = get_data(das_url, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query' + query + ' failed with reason:', result['reason']
    else:
        if len(result['data']) == 0:  #dataset not yet registered in DBS
            return 0
        preresult = result['data'][0]['dataset']
        for key in preresult:
            if 'nevents' in key:
                return key['nevents']
        return -1
Exemple #45
0
def hasAllBlocksClosed(dataset):
    """
    checks if a given dataset has all blocks closed and 
    can be used as input
    """
    query = "block dataset=" + dataset
    das_data = get_data(das_host, query, 0, 0, 0)['data']
    #traverse blocks
    for ds in das_data:
        #print 'block', ds['block'][0]['name']
        for block in ds['block']:
            #print '  is_open', block['is_open'] if 'is_open' in block else "?"
            if 'is_open' not in block:
                pass
            elif block['is_open'] == 'y':
                return False
    return True
def getSizeAtSizeDAS(dataset, site):
        query = "site dataset="+dataset
        das_data = get_data(das_host,query,0,0,0)
        myStatus = ''
        if isinstance(das_data, basestring):
           result = json.loads(das_data)
        else:
           result = das_data
           if result['status'] == 'fail' :
              print 'ERROR: DAS query failed with reason:',result['reason']
              sys.exit(0)
           else:
              preresult = result['data']
              for key in preresult:
                 if key['site'][0]['name'] == site:
                    return key['site'][0]['dataset_fraction']
        return 'Unknown'
Exemple #47
0
def hasAllBlocksClosed(dataset):
    """
    checks if a given dataset has all blocks closed and 
    can be used as input
    """
    query="block dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)['data']
    #traverse blocks
    for ds in das_data:                
        #print 'block', ds['block'][0]['name']
        for block in ds['block']:
            #print '  is_open', block['is_open'] if 'is_open' in block else "?"
            if 'is_open' not in block:
                pass
            elif block['is_open'] == 'y':
                return False
    return True
Exemple #48
0
def autocomplete_Datasets(data):
    result_array = []
    ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
    cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')
    os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates/'

    for element in data:
        if '*' in element:
            jsondict = get_data('https://cmsweb.cern.ch', "dataset=" + element,
                                0, 0, 0, 300, ckey, cert)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            for i in range(len(jsondict['data'])):
                result_array.append(jsondict['data'][i]['dataset'][0]['name'])
        else:
            result_array.append(element)
    return result_array
Exemple #49
0
 def __getData( self, dasQuery, dasLimit = 0 ):
     dasData = das_client.get_data( 'https://cmsweb.cern.ch',
                                    dasQuery, 0, dasLimit, False )
     if isinstance(dasData, str):
         jsondict = json.loads( dasData )
     else:
         jsondict = dasData
     # Check, if the DAS query fails
     try:
         error = self.__findInJson(jsondict,["data","error"])
     except KeyError:
         error = None
     if error or self.__findInJson(jsondict,"status") != 'ok' or "data" not in jsondict:
         msg = ("The DAS query returned a error.  Here is the output\n" + str(jsondict) +
                "\nIt's possible that this was a server error.  If so, it may work if you try again later")
         raise AllInOneError(msg)
     return self.__findInJson(jsondict,"data")
Exemple #50
0
def getRunsDataset(das_url, dataset):
    runList = []
    query = "run dataset=" + dataset + "| grep run.run_number"
    das_data = get_data(das_url, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        if len(result['data']) == 0:  #dataset not yet registered in DBS
            return runList
        preresult = result['data']
        for key in preresult:
            if 'run' in key:
                runList.append(key['run'][0]['run_number'])
    return runList
Exemple #51
0
def getRunLumiCountDataset(das_url, dataset):
    lumis = 0
    query = "summary dataset=" + dataset + " | grep summary.nlumis"
    das_data = get_data(das_url, query, 0, 0, 0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail':
        print 'DAS query failed with reason:', result['reason']
    else:
        if len(result['data']) == 0:  #dataset not yet registered in DBS
            return 0
        preresult = result['data'][0]
        for key in preresult:
            if 'summary' in key:
                return preresult['summary'][0]['nlumis']
        return -1
Exemple #52
0
def listFichier(theRun, theSample, dataTier, runNumberRAW):
    runNumber = putRunInShape(runNumberRAW)
    theQuery = "file dataset=/" + theSample + "*/" + theRun + "*/" + dataTier
    jsondict = das_client.get_data('https://cmsweb.cern.ch', theQuery, 0, 0,
                                   False)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s" % (status)
    data = jsondict['data']
    theFileList = []
    for aRaw in data:
        recordStuff = aRaw['file']
        for aRecord in recordStuff:
            #                if ((len(re.split("251/244",aRecord['name']))>1) or (len(re.split("251/252",aRecord['name']))>1) or (len(re.split("251/562",aRecord['name']))>1) or (len(re.split("251/643",aRecord['name']))>1) or (len(re.split("251/638",aRecord['name']))>1) or (len(re.split("251/721",aRecord['name']))>1) or (len(re.split("251/718",aRecord['name']))>1)):
            if ((len(re.split(runNumber, aRecord['name'])) > 1)):
                theFileList.append(aRecord['name'])
                print "\"" + aRecord['name'] + "\","
    return theFileList
def getDASData (query):
    results = []
    jsondict = das_client.get_data ('https://cmsweb.cern.ch', query, 0, 0, False)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
        return results

    mongo_query = jsondict['mongo_query']
    filters = mongo_query['filters']
    data = jsondict['data']

    for row in data:
        result = [r for r in das_client.get_value (row, filters['grep'])][0]
        if len (result) > 0 and not result in results:
            results.append (result)

    return results
def getEventsBlock(das_url, block_name):
    query="block="+block_name+"  | grep block.nevents"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]['block']
	for key in preresult:
		if 'nevents' in key:
			return key['nevents']		
	return -1
    return 0
Exemple #55
0
def getRunsDataset(das_url, dataset):
	runList=[]
	query="run dataset="+dataset+"| grep run.run_number"
	das_data = get_data(das_url,query,0,0,0)
	if isinstance(das_data, basestring):
        	result = json.loads(das_data)
	else:
        	result = das_data
	if result['status'] == 'fail' :
        	print 'DAS query failed with reason:',result['reason']
	else:
		if len(result['data'])==0:#dataset not yet registered in DBS
			return runList
		preresult=result['data']
		for key in preresult:
			if 'run' in key:
				runList.append(key['run'][0]['run_number'])
	return runList
Exemple #56
0
def getRunLumiCountDatasetRun(das_url, dataset, run):
	lumis=0
	query="summary dataset="+dataset+"  run="+str(run)+" | sum(summary.nlumis)"
	das_data = get_data(das_url,query,0,0,0)
	if isinstance(das_data, basestring):
        	result = json.loads(das_data)
	else:
        	result = das_data
	if result['status'] == 'fail' :
        	print 'DAS query failed with reason:',result['reason']
	else:
		if len(result['data'])==0:#dataset not yet registered in DBS
			return 0
		preresult=result['data']
		for key in preresult:
		    if 'result' in key:
			return key['result']['value']		
		return -1
Exemple #57
0
def getEventsBlock(das_url, block_name):
    query="block="+block_name+"  | grep block.nevents"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]['block']
	for key in preresult:
		if 'nevents' in key:
			return key['nevents']		
	return -1
    return 0
Exemple #58
0
def getRunLumiCountDatasetListLimited(das_url,dataset, runlist):
    lumis=0
    query="summary dataset="+dataset+" run in "+str(runlist)+ " | grep summary.nlumis"
    das_data = get_data(das_url,query,0,0,0)
    if isinstance(das_data, basestring):
       	result = json.loads(das_data)
    else:
       	result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	if len(result['data'])==0:#dataset not yet registered in DBS
		return 0
	preresult=result['data'][0]
	for key in preresult:
		if 'summary' in key:
			return preresult['summary'][0]['nlumis']		
	return -1
def getLumisInDataset(dataset):
    lumis=[]
    query="run lumi dataset="+dataset
    das_data = get_data(das_host,query,0,0,0)
    if isinstance(das_data, basestring):
        result = json.loads(das_data)
    else:
        result = das_data
    if result['status'] == 'fail' :
        print 'DAS query failed with reason:',result['reason']
    else:
	#preresult=result['data'][0]['lumi'][[0]['number'][0]
	#print result	
	preresult=result['data'][0]['lumi'][0]['number']
	for lumiRange in preresult:
		if lumiRange[0]==lumiRange[1]:
			lumis.append(lumiRange[0])
		else:
			lumis=lumis+range(lumiRange[0], lumiRange[1])
    return lumis