def getOutputEvents(dbsApi, dset, verb = False): """ Get the num of events in the output dataset provided it has a valid data tier. Returns the num of events or -10 if datatier not allowed """ # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples if '/ALCARECO' in dset or '/DQMIO' in dset or '/DQMROOT' in dset: return -10 outputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, dset) return outputEvents
def getOutputEvents(dbsApi, dset, verb=False): """ Get the num of events in the output dataset provided it has a valid data tier. Returns the num of events or -10 if datatier not allowed """ # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples if '/ALCARECO' in dset or '/DQMIO' in dset or '/DQMROOT' in dset: return -10 outputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, dset) return outputEvents
def too_many_events_check(wf_name): schema = getRequestJson(wf_name) #this check only works for taskchain workflows if schema['RequestType'] != 'TaskChain': return dbsApi = collect_dsets_and_nevents.getDBSApi() outputDatasets = collect_dsets_and_nevents.getOutputDset(wf_name) # We should never hit this case if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema['Task1']: os.system('echo \"'+wf_name +'\" | mail -s \"too_many_events_check.py error 1\" [email protected]') sys.exit(1) # Check whether it's FastSim or FullSim from scratch if 'RequestNumEvents' in schema['Task1']: inputEvents = schema['Task1']['RequestNumEvents'] for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset, verb = False) if outputEvents > inputEvents : os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 2\" [email protected]') sys.exit(1) # Then it's either Data or MC recycling elif 'InputDataset' in schema['Task1']: inputDset = schema['Task1']['InputDataset'] # It's Data if 'RunWhitelist' in schema['Task1']: runList = schema['Task1']['RunWhitelist'] inputEvents = getEventsDataSetRunList(dbsApi, inputDset, runList, verb = False) for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset, verb = False) if outputEvents > inputEvents : os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 3\" [email protected]') sys.exit(1) elif 'BlockWhitelist' in schema['Task1']: os.system('echo '+wf_name+' | mail -s \"too_many_events_check error.py 4\" [email protected]') sys.exit(1) # Most likely MC, since there is no run whitelist # it means we can just go for num of events else: inputEvents = collect_dsets_and_nevents.getNumEvents(dbsApi, inputDset) for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset) if outputEvents > inputEvents : os.system('echo '+wf_name+' | mail -s \"too_many_events_check.py error 5\" [email protected]') sys.exit(1)
def too_many_events_check(wf_name): schema = getRequestJson(wf_name) #this check only works for taskchain workflows if schema['RequestType'] != 'TaskChain': return dbsApi = collect_dsets_and_nevents.getDBSApi() outputDatasets = collect_dsets_and_nevents.getOutputDset(wf_name) # We should never hit this case if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema[ 'Task1']: os.system( 'echo \"' + wf_name + '\" | mail -s \"too_many_events_check.py error 1\" [email protected]' ) sys.exit(1) # Check whether it's FastSim or FullSim from scratch if 'RequestNumEvents' in schema['Task1']: inputEvents = schema['Task1']['RequestNumEvents'] for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset, verb=False) if outputEvents > inputEvents: os.system( 'echo ' + wf_name + ' | mail -s \"too_many_events_check.py error 2\" [email protected]' ) sys.exit(1) # Then it's either Data or MC recycling elif 'InputDataset' in schema['Task1']: inputDset = schema['Task1']['InputDataset'] # It's Data if 'RunWhitelist' in schema['Task1']: runList = schema['Task1']['RunWhitelist'] inputEvents = getEventsDataSetRunList(dbsApi, inputDset, runList, verb=False) for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset, verb=False) if outputEvents > inputEvents: os.system( 'echo ' + wf_name + ' | mail -s \"too_many_events_check.py error 3\" [email protected]' ) sys.exit(1) elif 'BlockWhitelist' in schema['Task1']: os.system( 'echo ' + wf_name + ' | mail -s \"too_many_events_check error.py 4\" [email protected]' ) sys.exit(1) # Most likely MC, since there is no run whitelist # it means we can just go for num of events else: inputEvents = collect_dsets_and_nevents.getNumEvents( dbsApi, inputDset) for dataset in outputDatasets: outputEvents = 0 outputEvents = getOutputEvents(dbsApi, dataset) if outputEvents > inputEvents: os.system( 'echo ' + wf_name + ' | mail -s \"too_many_events_check.py error 5\" [email protected]' ) sys.exit(1)