def getOutputEvents(dbsApi, dset, verb = False):
    """
    Get the num of events in the output dataset provided it
    has a valid data tier.
    Returns the num of events or -10 if datatier not allowed
    """
    # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples
    if '/ALCARECO' in dset or  '/DQMIO' in dset or '/DQMROOT' in dset:
        return -10

    outputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, dset)

    return outputEvents
Example #2
0
def getOutputEvents(dbsApi, dset, verb=False):
    """
    Get the num of events in the output dataset provided it
    has a valid data tier.
    Returns the num of events or -10 if datatier not allowed
    """
    # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples
    if '/ALCARECO' in dset or '/DQMIO' in dset or '/DQMROOT' in dset:
        return -10

    outputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, dset)

    return outputEvents
def too_many_events_check(wf_name):

    schema = getRequestJson(wf_name)

    #this check only works for taskchain workflows
    if schema['RequestType'] != 'TaskChain':
        return

    dbsApi = collect_dsets_and_nevents.getDBSApi()

    outputDatasets = collect_dsets_and_nevents.getOutputDset(wf_name)

    # We should never hit this case
    if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema['Task1']:
        os.system('echo \"'+wf_name +'\" | mail -s \"too_many_events_check.py error 1\" [email protected]')
        sys.exit(1)

    # Check whether it's FastSim or FullSim from scratch
    if 'RequestNumEvents' in schema['Task1']:
        inputEvents = schema['Task1']['RequestNumEvents']
        for dataset in outputDatasets:
            outputEvents = 0

            outputEvents = getOutputEvents(dbsApi, dataset, verb = False)
            
            if outputEvents > inputEvents :
                os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 2\" [email protected]')
                sys.exit(1)


    # Then it's either Data or MC recycling
    elif 'InputDataset' in schema['Task1']:
        inputDset = schema['Task1']['InputDataset']

            # It's Data
        if 'RunWhitelist' in schema['Task1']:

            runList = schema['Task1']['RunWhitelist']

            inputEvents = getEventsDataSetRunList(dbsApi, inputDset, runList, verb = False)

            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi, dataset, verb = False)
                if outputEvents > inputEvents :
                    os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 3\" [email protected]')
                    sys.exit(1)

        elif 'BlockWhitelist' in schema['Task1']:
            os.system('echo '+wf_name+' | mail -s \"too_many_events_check error.py 4\" [email protected]')
            sys.exit(1)

            # Most likely MC, since there is no run whitelist
            # it means we can just go for num of events
        else:
            inputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, inputDset)

            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi, dataset)
                if outputEvents > inputEvents :
                    os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 5\" [email protected]')
                    sys.exit(1)
Example #4
0
def too_many_events_check(wf_name):

    schema = getRequestJson(wf_name)

    #this check only works for taskchain workflows
    if schema['RequestType'] != 'TaskChain':
        return

    dbsApi = collect_dsets_and_nevents.getDBSApi()

    outputDatasets = collect_dsets_and_nevents.getOutputDset(wf_name)

    # We should never hit this case
    if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema[
            'Task1']:
        os.system(
            'echo \"' + wf_name +
            '\" | mail -s \"too_many_events_check.py error 1\" [email protected]'
        )
        sys.exit(1)

    # Check whether it's FastSim or FullSim from scratch
    if 'RequestNumEvents' in schema['Task1']:
        inputEvents = schema['Task1']['RequestNumEvents']
        for dataset in outputDatasets:
            outputEvents = 0

            outputEvents = getOutputEvents(dbsApi, dataset, verb=False)

            if outputEvents > inputEvents:
                os.system(
                    'echo ' + wf_name +
                    ' | mail -s \"too_many_events_check.py error 2\" [email protected]'
                )
                sys.exit(1)

    # Then it's either Data or MC recycling
    elif 'InputDataset' in schema['Task1']:
        inputDset = schema['Task1']['InputDataset']

        # It's Data
        if 'RunWhitelist' in schema['Task1']:

            runList = schema['Task1']['RunWhitelist']

            inputEvents = getEventsDataSetRunList(dbsApi,
                                                  inputDset,
                                                  runList,
                                                  verb=False)

            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi, dataset, verb=False)
                if outputEvents > inputEvents:
                    os.system(
                        'echo ' + wf_name +
                        ' | mail -s \"too_many_events_check.py error 3\" [email protected]'
                    )
                    sys.exit(1)

        elif 'BlockWhitelist' in schema['Task1']:
            os.system(
                'echo ' + wf_name +
                ' | mail -s \"too_many_events_check error.py 4\" [email protected]'
            )
            sys.exit(1)

            # Most likely MC, since there is no run whitelist
            # it means we can just go for num of events
        else:
            inputEvents = collect_dsets_and_nevents.getNumEvents(
                dbsApi, inputDset)

            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi, dataset)
                if outputEvents > inputEvents:
                    os.system(
                        'echo ' + wf_name +
                        ' | mail -s \"too_many_events_check.py error 5\" [email protected]'
                    )
                    sys.exit(1)