def getInputEventsTaskChain(request):
    """
    Calculates input events for a taskchain based on the
    TaskChain properties and subtype
    """
    #TODO filter by subtype
    #if it's MC from scratch, it has a set number of requested events
    if 'RequestNumEvents' in request['Task1']:
        if request['Task1']['RequestNumEvents'] is not None:
            return request['Task1']['RequestNumEvents']
    #if it has an input dataset
    blockWhitelist = blockBlacklist = runWhitelist = runBlacklist = []
    if 'InputDataset' in request['Task1']:
        inputDataSet=request['Task1']['InputDataset']
        if 'BlockWhitelist' in request['Task1']:
            blockWhitelist=request['Task1']['BlockWhitelist']
        if 'BlockBlacklist' in request['Task1']:
            blockBlacklist=request['Task1']['BlockBlacklist']
        if 'RunWhitelist' in request['Task1']:
            runWhitelist=request['Task1']['RunWhitelist']
        if 'RunBlacklist' in request['Task1']:
            runBlacklist=request['Task1']['RunBlacklist']

        if blockWhitelist:
            return dbs3.getEventCountDataSetBlockList(inputDataSet,blockWhitelist)
        if blockBlacklist:
            return dbs3.getEventCountDataSet(inputDataSet) - dbs3.getEventCountDataSetBlockList(inputDataSet,blockBlacklist)
        if runWhitelist:
            return dbs3.getEventCountDataSetRunList(inputDataSet, runWhitelist)
        else:
            return dbs3.getEventCountDataSet(inputDataSet)
def getInputEventsTaskChain(request):
    """
    Calculates input events for a taskchain based on the
    TaskChain properties and subtype
    """
    #TODO filter by subtype
    #if it's MC from scratch, it has a set number of requested events
    if 'RequestNumEvents' in request['Task1']:
        if request['Task1']['RequestNumEvents'] is not None:
            return request['Task1']['RequestNumEvents']
    #if it has an input dataset
    blockWhitelist = blockBlacklist = runWhitelist = runBlacklist = []
    if 'InputDataset' in request['Task1']:
        inputDataSet = request['Task1']['InputDataset']
        if 'BlockWhitelist' in request['Task1']:
            blockWhitelist = request['Task1']['BlockWhitelist']
        if 'BlockBlacklist' in request['Task1']:
            blockBlacklist = request['Task1']['BlockBlacklist']
        if 'RunWhitelist' in request['Task1']:
            runWhitelist = request['Task1']['RunWhitelist']
        if 'RunBlacklist' in request['Task1']:
            runBlacklist = request['Task1']['RunBlacklist']

        if blockWhitelist:
            return dbs3.getEventCountDataSetBlockList(inputDataSet,
                                                      blockWhitelist)
        if blockBlacklist:
            return dbs3.getEventCountDataSet(
                inputDataSet) - dbs3.getEventCountDataSetBlockList(
                    inputDataSet, blockBlacklist)
        if runWhitelist:
            return dbs3.getEventCountDataSetRunList(inputDataSet, runWhitelist)
        else:
            return dbs3.getEventCountDataSet(inputDataSet)
def getOutputEvents(url, workflow, dataset):
    """
    Gets the output events depending on the type
    of the request
    """
    # request = getWorkflowInfo(url, workflow)
    return dbs3.getEventCountDataSet(dataset)
def getOutputEvents(url, workflow, dataset):
    """
    Gets the output events depending on the type
    of the request
    """
    # request = getWorkflowInfo(url, workflow)
    return dbs3.getEventCountDataSet(dataset)
def main():
    #dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[1])]
    #look for datasetname
    for i in range(3):
        if lines[i].startswith('dataset'):
            dataset = lines[i].replace('dataset : ','').strip()
            break
    print "'%s'"%dataset
    #build graph and calculate
    graph = buildGraph(lines)
    try:
        files = colorBipartiteGraph(graph)
    except Exception as e:
        files = deleteMaxDegreeFirst(graph)
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)

    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    if total:
        print '%s%%'%(float(total-invalid)/total*100.0)
    for f in sorted(files):
        print f
Exemple #6
0
def main():
    #dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[1])]
    #look for datasetname
    for i in range(3):
        if lines[i].startswith('dataset'):
            dataset = lines[i].replace('dataset : ','').strip()
            break
    print "'%s'"%dataset
    print "Building graph model"
    graph = buildGraph(lines)

    print "Getting events per file"
    events = getFileEvents(dataset, graph.keys())
    try:
        #first algorithm that assumes bipartition        
        files = colorBipartiteGraph(graph, events)
    except Exception as e:
        #second, algorithm
        #files = deleteMaxDegreeFirst(graph, events)
        files = deleteSmallestVertexFirst(graph, events)
    
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)

    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    if total:
        print '%s%%'%(float(total-invalid)/total*100.0)
    for f in sorted(files):
        print f
 def getOutputEvents(self, ds, skipInvalid=False):
     """
     gets the output events on one of the output datasets
     """
     #We store the events to avoid checking them twice
     if ds not in self.outEvents:
         events = dbs3.getEventCountDataSet(ds, skipInvalid)
         self.outEvents[ds] = events
     else:
         events = self.outEvents[ds]
     return events
 def getOutputEvents(self, ds, skipInvalid=False):
     """
     gets the output events on one of the output datasets
     """
     #We store the events to avoid checking them twice
     if ds not in self.outEvents:
         events = dbs3.getEventCountDataSet(ds, skipInvalid)
         self.outEvents[ds] = events
     else:
         events = self.outEvents[ds]
     return events
def checkCorrectLumisEventGEN(dataset):
    """
    Checks that the dataset has more than 300 events per lumi
    """
    numlumis = dbs3Client.getLumiCountDataSet(dataset)
    numEvents = dbs3Client.getEventCountDataSet(dataset)
    # numEvents / numLumis >= 300
    if numlumis >= numEvents / 300.0:
        return True
    else:
        return False
def main():
    dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[2])]
    graph = buildGraph(lines)
    files = deleteMaxDegreeFirst(graph)
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)
    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    print '%s%%'%(float(total-invalid)/total*100.0)
    for f in files:
        print f
def getInputEvents(url, workflow):
    """
    Gets the inputs events of a given workflow
    depending of the kind of workflow
    TODO this can be replaced by getting the info from the workload cache
    """
    request = getWorkflowInfo(url,workflow)
    requestType=request['RequestType']
    #if request is montecarlo or Step0, the numer of
    #input events is by the requsted events
    if requestType == 'MonteCarlo' or requestType == 'LHEStepZero':
        if 'RequestNumEvents' in request:
            if request['RequestNumEvents']>0:
                return request['RequestNumEvents']
        if 'RequestSizeEvents' in request:
            return request['RequestSizeEvents']
        else:
            return 0
    if requestType == 'TaskChain':
        return getInputEventsTaskChain(request)

    #if request is not montecarlo, then we need to check the size
    #of input datasets
    #This loops fixes the white and blacklists in the workflow
    #information,
    for listitem in ["RunWhitelist", "RunBlacklist",
                    "BlockWhitelist", "BlockBlacklist"]:
        if listitem in request:
            #if empty
            if request[listitem]=='[]' or request[listitem]=='':
                request[listitem]=[]
            #if there is not a list but some elements it creates a list
            if type(request[listitem]) is not list:
                # if doesn't contain "[" is a single block
                if '[' not in request[listitem]:
                    #wrap in a list
                    request[listitem] = [request[listitem]]
                #else parse a list
                else:
                    request[listitem]= eval(request[listitem])
        #if not, an empty list will do        
        else:
            request[listitem]=[]

    inputDataSet=request['InputDataset']
    
    #it the request is rereco, we valiate white/black lists
    if requestType=='ReReco':
        # if there is block whte list, count only the selected block
        if request['BlockWhitelist']:
            events = dbs3.getEventCountDataSetBlockList(inputDataSet,request['BlockWhitelist'])
        # if there is block black list, substract them from the total
        if request['BlockBlacklist']:
            events = (dbs3.getEventCountDataSet(inputDataSet) - 
                    dbs3.getEventCountDataSet(inputDataSet,request['BlockBlacklist']))
            return events
        # same if a run whitelist
        if request['RunWhitelist']:
            events = dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist'])
            return events
        # otherwize, the full lumi count
        else:
            events = dbs3.getEventCountDataSet(inputDataSet)
            return events
    
    events = dbs3.getEventCountDataSet(inputDataSet)
    # if black list, subsctract them    
    if request['BlockBlacklist']:
        events=events-dbs3.getEventCountDataSetBlockList(inputDataSet, request['BlockBlacklist'])
    # if white list, only the ones in the whitelist.
    if request['RunWhitelist']:
        events=dbs3.getEventCountDataSetRunList(inputDataSet, request['RunWhitelist'])
    # if white list of blocks
    if request['BlockWhitelist']:
        events=dbs3.getEventCountDataSetBlockList(inputDataSet, request['BlockWhitelist'])

    #TODO delete FilterEfficiency from here. TEST
    #if 'FilterEfficiency' in request:
    #return float(request['FilterEfficiency'])*events
    #else:
    return events
def getInputEvents(url, workflow):
    """
    Gets the inputs events of a given workflow
    depending of the kind of workflow
    TODO this can be replaced by getting the info from the workload cache
    """
    request = getWorkflowInfo(url, workflow)
    requestType = request['RequestType']
    #if request is montecarlo or Step0, the numer of
    #input events is by the requsted events
    if requestType == 'MonteCarlo' or requestType == 'LHEStepZero':
        if 'RequestNumEvents' in request:
            if request['RequestNumEvents'] > 0:
                return request['RequestNumEvents']
        if 'RequestSizeEvents' in request:
            return request['RequestSizeEvents']
        else:
            return 0
    if requestType == 'TaskChain':
        return getInputEventsTaskChain(request)

    #if request is not montecarlo, then we need to check the size
    #of input datasets
    #This loops fixes the white and blacklists in the workflow
    #information,
    for listitem in [
            "RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"
    ]:
        if listitem in request:
            #if empty
            if request[listitem] == '[]' or request[listitem] == '':
                request[listitem] = []
            #if there is not a list but some elements it creates a list
            if type(request[listitem]) is not list:
                # if doesn't contain "[" is a single block
                if '[' not in request[listitem]:
                    #wrap in a list
                    request[listitem] = [request[listitem]]
                #else parse a list
                else:
                    request[listitem] = eval(request[listitem])
        #if not, an empty list will do
        else:
            request[listitem] = []

    inputDataSet = request['InputDataset']

    #it the request is rereco, we valiate white/black lists
    if requestType == 'ReReco':
        # if there is block whte list, count only the selected block
        if request['BlockWhitelist']:
            events = dbs3.getEventCountDataSetBlockList(
                inputDataSet, request['BlockWhitelist'])
        # if there is block black list, substract them from the total
        if request['BlockBlacklist']:
            events = (dbs3.getEventCountDataSet(inputDataSet) -
                      dbs3.getEventCountDataSet(inputDataSet,
                                                request['BlockBlacklist']))
            return events
        # same if a run whitelist
        if request['RunWhitelist']:
            events = dbs3.getEventCountDataSetRunList(inputDataSet,
                                                      request['RunWhitelist'])
            return events
        # otherwize, the full lumi count
        else:
            events = dbs3.getEventCountDataSet(inputDataSet)
            return events

    events = dbs3.getEventCountDataSet(inputDataSet)
    # if black list, subsctract them
    if request['BlockBlacklist']:
        events = events - dbs3.getEventCountDataSetBlockList(
            inputDataSet, request['BlockBlacklist'])
    # if white list, only the ones in the whitelist.
    if request['RunWhitelist']:
        events = dbs3.getEventCountDataSetRunList(inputDataSet,
                                                  request['RunWhitelist'])
    # if white list of blocks
    if request['BlockWhitelist']:
        events = dbs3.getEventCountDataSetBlockList(inputDataSet,
                                                    request['BlockWhitelist'])

    #TODO delete FilterEfficiency from here. TEST
    #if 'FilterEfficiency' in request:
    #return float(request['FilterEfficiency'])*events
    #else:
    return events
Exemple #13
0
def main():
    
    usage = "python %prog [OPTIONS]"
    parser = OptionParser(usage)
    parser.add_option("-a", "--doall",dest="doall", action="store_true" , default=False, 
                      help="It will analyze all datasets of the workflow from the beginning. If this option is true,"\
                        " you should provide a workflow name or a list of them in the --file option.")
    parser.add_option("-f", "--file",dest="file", 
                      help="Input file with the contents of duplicateEvents.py (a list of lumis and files)."\
                      " If you are using the --doall option, it should contain a list of workflows instead")
    
    options, args = parser.parse_args()
    workflows = None
    #if we not doing all, input should be treated as list of lumis an files
    if not options.doall and options.file:
        lines = [l.strip() for l in open(options.file)]
        graphs = buildGraphs(lines)
    # if do all and input file
    elif options.doall and options.file:
        workflows = [l.strip() for l in open(options.file)]
    elif options.doall and not options.file:
        workflows = args
    else:
        parser.error("You should provide an input file with the output of duplicateEvents")

    # get the output datasets of the workflos and create the graph
    if workflows:
        datasets = []
        for wf in workflows:
            datasets += reqMgrClient.outputdatasetsWorkflow(url, wf);
        
        graphs = {}
        #analyze each dataset
        for dataset in datasets:
            dup, lumis = dbs.duplicateRunLumi(dataset, verbose="dict", skipInvalid=True)
            #print lumis
            graphs[dataset] = buildGraph(lumis)
            
    
    for dataset, graph in graphs.items():
        #look for datasetname
        print "Getting events per file"
        events = getFileEvents(dataset, graph.keys())
        try:
            #first algorithm that assumes bipartition        
            files = colorBipartiteGraph(graph, events)
        except Exception as e:
            #second, algorithm
            #files = deleteMaxDegreeFirst(graph, events)
            files = deleteSmallestVertexFirst(graph, events)
        
        total = dbs.getEventCountDataSet(dataset)
        invalid = dbs.getEventCountDataSetFileList(dataset, files)
    
        print 'total events %s'%total
        print 'invalidated files %s'%len(files)
        print 'invalidated events %s'%invalid
        if total:
            print '%s%%'%(float(total-invalid)/total*100.0)
        for f in sorted(files):
            print f