def getInputEventsTaskChain(request): """ Calculates input events for a taskchain based on the TaskChain properties and subtype """ #TODO filter by subtype #if it's MC from scratch, it has a set number of requested events if 'RequestNumEvents' in request['Task1']: if request['Task1']['RequestNumEvents'] is not None: return request['Task1']['RequestNumEvents'] #if it has an input dataset blockWhitelist = blockBlacklist = runWhitelist = runBlacklist = [] if 'InputDataset' in request['Task1']: inputDataSet=request['Task1']['InputDataset'] if 'BlockWhitelist' in request['Task1']: blockWhitelist=request['Task1']['BlockWhitelist'] if 'BlockBlacklist' in request['Task1']: blockBlacklist=request['Task1']['BlockBlacklist'] if 'RunWhitelist' in request['Task1']: runWhitelist=request['Task1']['RunWhitelist'] if 'RunBlacklist' in request['Task1']: runBlacklist=request['Task1']['RunBlacklist'] if blockWhitelist: return dbs3.getEventCountDataSetBlockList(inputDataSet,blockWhitelist) if blockBlacklist: return dbs3.getEventCountDataSet(inputDataSet) - dbs3.getEventCountDataSetBlockList(inputDataSet,blockBlacklist) if runWhitelist: return dbs3.getEventCountDataSetRunList(inputDataSet, runWhitelist) else: return dbs3.getEventCountDataSet(inputDataSet)
def getInputEventsTaskChain(request): """ Calculates input events for a taskchain based on the TaskChain properties and subtype """ #TODO filter by subtype #if it's MC from scratch, it has a set number of requested events if 'RequestNumEvents' in request['Task1']: if request['Task1']['RequestNumEvents'] is not None: return request['Task1']['RequestNumEvents'] #if it has an input dataset blockWhitelist = blockBlacklist = runWhitelist = runBlacklist = [] if 'InputDataset' in request['Task1']: inputDataSet = request['Task1']['InputDataset'] if 'BlockWhitelist' in request['Task1']: blockWhitelist = request['Task1']['BlockWhitelist'] if 'BlockBlacklist' in request['Task1']: blockBlacklist = request['Task1']['BlockBlacklist'] if 'RunWhitelist' in request['Task1']: runWhitelist = request['Task1']['RunWhitelist'] if 'RunBlacklist' in request['Task1']: runBlacklist = request['Task1']['RunBlacklist'] if blockWhitelist: return dbs3.getEventCountDataSetBlockList(inputDataSet, blockWhitelist) if blockBlacklist: return dbs3.getEventCountDataSet( inputDataSet) - dbs3.getEventCountDataSetBlockList( inputDataSet, blockBlacklist) if runWhitelist: return dbs3.getEventCountDataSetRunList(inputDataSet, runWhitelist) else: return dbs3.getEventCountDataSet(inputDataSet)
def getOutputEvents(url, workflow, dataset): """ Gets the output events depending on the type of the request """ # request = getWorkflowInfo(url, workflow) return dbs3.getEventCountDataSet(dataset)
def main(): #dataset = sys.argv[1] lines = [l.strip() for l in open(sys.argv[1])] #look for datasetname for i in range(3): if lines[i].startswith('dataset'): dataset = lines[i].replace('dataset : ','').strip() break print "'%s'"%dataset #build graph and calculate graph = buildGraph(lines) try: files = colorBipartiteGraph(graph) except Exception as e: files = deleteMaxDegreeFirst(graph) total = dbs.getEventCountDataSet(dataset) invalid = dbs.getEventCountDataSetFileList(dataset, files) print 'total events %s'%total print 'invalidated files %s'%len(files) print 'invalidated events %s'%invalid if total: print '%s%%'%(float(total-invalid)/total*100.0) for f in sorted(files): print f
def main(): #dataset = sys.argv[1] lines = [l.strip() for l in open(sys.argv[1])] #look for datasetname for i in range(3): if lines[i].startswith('dataset'): dataset = lines[i].replace('dataset : ','').strip() break print "'%s'"%dataset print "Building graph model" graph = buildGraph(lines) print "Getting events per file" events = getFileEvents(dataset, graph.keys()) try: #first algorithm that assumes bipartition files = colorBipartiteGraph(graph, events) except Exception as e: #second, algorithm #files = deleteMaxDegreeFirst(graph, events) files = deleteSmallestVertexFirst(graph, events) total = dbs.getEventCountDataSet(dataset) invalid = dbs.getEventCountDataSetFileList(dataset, files) print 'total events %s'%total print 'invalidated files %s'%len(files) print 'invalidated events %s'%invalid if total: print '%s%%'%(float(total-invalid)/total*100.0) for f in sorted(files): print f
def getOutputEvents(self, ds, skipInvalid=False): """ gets the output events on one of the output datasets """ #We store the events to avoid checking them twice if ds not in self.outEvents: events = dbs3.getEventCountDataSet(ds, skipInvalid) self.outEvents[ds] = events else: events = self.outEvents[ds] return events
def checkCorrectLumisEventGEN(dataset): """ Checks that the dataset has more than 300 events per lumi """ numlumis = dbs3Client.getLumiCountDataSet(dataset) numEvents = dbs3Client.getEventCountDataSet(dataset) # numEvents / numLumis >= 300 if numlumis >= numEvents / 300.0: return True else: return False
def main(): dataset = sys.argv[1] lines = [l.strip() for l in open(sys.argv[2])] graph = buildGraph(lines) files = deleteMaxDegreeFirst(graph) total = dbs.getEventCountDataSet(dataset) invalid = dbs.getEventCountDataSetFileList(dataset, files) print 'total events %s'%total print 'invalidated files %s'%len(files) print 'invalidated events %s'%invalid print '%s%%'%(float(total-invalid)/total*100.0) for f in files: print f
def getInputEvents(url, workflow): """ Gets the inputs events of a given workflow depending of the kind of workflow TODO this can be replaced by getting the info from the workload cache """ request = getWorkflowInfo(url,workflow) requestType=request['RequestType'] #if request is montecarlo or Step0, the numer of #input events is by the requsted events if requestType == 'MonteCarlo' or requestType == 'LHEStepZero': if 'RequestNumEvents' in request: if request['RequestNumEvents']>0: return request['RequestNumEvents'] if 'RequestSizeEvents' in request: return request['RequestSizeEvents'] else: return 0 if requestType == 'TaskChain': return getInputEventsTaskChain(request) #if request is not montecarlo, then we need to check the size #of input datasets #This loops fixes the white and blacklists in the workflow #information, for listitem in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"]: if listitem in request: #if empty if request[listitem]=='[]' or request[listitem]=='': request[listitem]=[] #if there is not a list but some elements it creates a list if type(request[listitem]) is not list: # if doesn't contain "[" is a single block if '[' not in request[listitem]: #wrap in a list request[listitem] = [request[listitem]] #else parse a list else: request[listitem]= eval(request[listitem]) #if not, an empty list will do else: request[listitem]=[] inputDataSet=request['InputDataset'] #it the request is rereco, we valiate white/black lists if requestType=='ReReco': # if there is block whte list, count only the selected block if request['BlockWhitelist']: events = dbs3.getEventCountDataSetBlockList(inputDataSet,request['BlockWhitelist']) # if there is block black list, substract them from the total if request['BlockBlacklist']: events = (dbs3.getEventCountDataSet(inputDataSet) - dbs3.getEventCountDataSet(inputDataSet,request['BlockBlacklist'])) return events # same if a run whitelist if request['RunWhitelist']: events = dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist']) return events # otherwize, the full lumi count else: events = dbs3.getEventCountDataSet(inputDataSet) return events events = dbs3.getEventCountDataSet(inputDataSet) # if black list, subsctract them if request['BlockBlacklist']: events=events-dbs3.getEventCountDataSetBlockList(inputDataSet, request['BlockBlacklist']) # if white list, only the ones in the whitelist. if request['RunWhitelist']: events=dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist']) # if white list of blocks if request['BlockWhitelist']: events=dbs3.getEventCountDataSetBlockList(inputDataSet, request['BlockWhitelist']) #TODO delete FilterEfficiency from here. TEST #if 'FilterEfficiency' in request: #return float(request['FilterEfficiency'])*events #else: return events
def getInputEvents(url, workflow): """ Gets the inputs events of a given workflow depending of the kind of workflow TODO this can be replaced by getting the info from the workload cache """ request = getWorkflowInfo(url, workflow) requestType = request['RequestType'] #if request is montecarlo or Step0, the numer of #input events is by the requsted events if requestType == 'MonteCarlo' or requestType == 'LHEStepZero': if 'RequestNumEvents' in request: if request['RequestNumEvents'] > 0: return request['RequestNumEvents'] if 'RequestSizeEvents' in request: return request['RequestSizeEvents'] else: return 0 if requestType == 'TaskChain': return getInputEventsTaskChain(request) #if request is not montecarlo, then we need to check the size #of input datasets #This loops fixes the white and blacklists in the workflow #information, for listitem in [ "RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist" ]: if listitem in request: #if empty if request[listitem] == '[]' or request[listitem] == '': request[listitem] = [] #if there is not a list but some elements it creates a list if type(request[listitem]) is not list: # if doesn't contain "[" is a single block if '[' not in request[listitem]: #wrap in a list request[listitem] = [request[listitem]] #else parse a list else: request[listitem] = eval(request[listitem]) #if not, an empty list will do else: request[listitem] = [] inputDataSet = request['InputDataset'] #it the request is rereco, we valiate white/black lists if requestType == 'ReReco': # if there is block whte list, count only the selected block if request['BlockWhitelist']: events = dbs3.getEventCountDataSetBlockList( inputDataSet, request['BlockWhitelist']) # if there is block black list, substract them from the total if request['BlockBlacklist']: events = (dbs3.getEventCountDataSet(inputDataSet) - dbs3.getEventCountDataSet(inputDataSet, request['BlockBlacklist'])) return events # same if a run whitelist if request['RunWhitelist']: events = dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist']) return events # otherwize, the full lumi count else: events = dbs3.getEventCountDataSet(inputDataSet) return events events = dbs3.getEventCountDataSet(inputDataSet) # if black list, subsctract them if request['BlockBlacklist']: events = events - dbs3.getEventCountDataSetBlockList( inputDataSet, request['BlockBlacklist']) # if white list, only the ones in the whitelist. if request['RunWhitelist']: events = dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist']) # if white list of blocks if request['BlockWhitelist']: events = dbs3.getEventCountDataSetBlockList(inputDataSet, request['BlockWhitelist']) #TODO delete FilterEfficiency from here. TEST #if 'FilterEfficiency' in request: #return float(request['FilterEfficiency'])*events #else: return events
def main(): usage = "python %prog [OPTIONS]" parser = OptionParser(usage) parser.add_option("-a", "--doall",dest="doall", action="store_true" , default=False, help="It will analyze all datasets of the workflow from the beginning. If this option is true,"\ " you should provide a workflow name or a list of them in the --file option.") parser.add_option("-f", "--file",dest="file", help="Input file with the contents of duplicateEvents.py (a list of lumis and files)."\ " If you are using the --doall option, it should contain a list of workflows instead") options, args = parser.parse_args() workflows = None #if we not doing all, input should be treated as list of lumis an files if not options.doall and options.file: lines = [l.strip() for l in open(options.file)] graphs = buildGraphs(lines) # if do all and input file elif options.doall and options.file: workflows = [l.strip() for l in open(options.file)] elif options.doall and not options.file: workflows = args else: parser.error("You should provide an input file with the output of duplicateEvents") # get the output datasets of the workflos and create the graph if workflows: datasets = [] for wf in workflows: datasets += reqMgrClient.outputdatasetsWorkflow(url, wf); graphs = {} #analyze each dataset for dataset in datasets: dup, lumis = dbs.duplicateRunLumi(dataset, verbose="dict", skipInvalid=True) #print lumis graphs[dataset] = buildGraph(lumis) for dataset, graph in graphs.items(): #look for datasetname print "Getting events per file" events = getFileEvents(dataset, graph.keys()) try: #first algorithm that assumes bipartition files = colorBipartiteGraph(graph, events) except Exception as e: #second, algorithm #files = deleteMaxDegreeFirst(graph, events) files = deleteSmallestVertexFirst(graph, events) total = dbs.getEventCountDataSet(dataset) invalid = dbs.getEventCountDataSetFileList(dataset, files) print 'total events %s'%total print 'invalidated files %s'%len(files) print 'invalidated events %s'%invalid if total: print '%s%%'%(float(total-invalid)/total*100.0) for f in sorted(files): print f