def closeOutStep0Requests(url, workflows): """ Closes either montecarlo step0 requests """ noSiteWorkflows = [] for workflow in workflows: datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) status = reqMgrClient.getWorkflowStatus(url, workflow) #if not completed skip if status != 'completed': continue closeOutWorkflow = True #skip montecarlos on a special queue if reqMgrClient.getRequestTeam(url, workflow) == 'analysis': continue for dataset in datasets: closeOutDataset = False percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset) transPerc = 0 closedBlocks = None duplicate = None correctLumis = None # if dataset has subscription and enough events we check # duplicates, transfer percentage, closed blocks and lumis if phedexSubscription and percentage >= float(0.95): transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription) duplicate = dbs3Client.duplicateLumi(dataset) correctLumis = checkCorrectLumisEventGEN(dataset) #TODO validate closed blocks if not duplicate and correctLumis: closeOutDataset = True else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage >= float(0.95) and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s| ' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), str(correctLumis), duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) print '-'*180 return noSiteWorkflows
def closeOutMonterCarloRequests(url, workflows): """ Closes either montecarlo or montecarlo from gen workflows """ noSiteWorkflows = [] for workflow in workflows: datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow) closeOutWorkflow = True #skip montecarlos on a special queue if reqMgrClient.getRequestTeam(url, workflow) == 'analysis': continue for dataset in datasets: closePercentage = 0.95 # validation for SMS montecarlos if 'SMS' in dataset: closePercentage= 1.00 percentage = percentageCompletion(url, workflow, dataset) phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset) transPerc = 0 closedBlocks = None duplicate = None # if dataset has subscription and enough events we check # duplicates, transfer percentage and closed blocks if phedexSubscription and percentage >= float(closePercentage): transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription) duplicate = dbs3Client.duplicateLumi(dataset) if not duplicate: closeOutDataset = True else: closeOutDataset = False else: closeOutDataset = False #validate when percentage is ok but has not phedex subscription if percentage >= float(closePercentage) and not phedexSubscription: noSiteWorkflows.append(workflow) #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s|' % (workflow, dataset,str(int(percentage*100)), str(phedexSubscription), str(int(transPerc*100)), duplicate, closeOutDataset) #workflow can only be closed out if all datasets are ready if closeOutWorkflow: reqMgrClient.closeOutWorkflowCascade(url, workflow) #separation line print '-'*180 return noSiteWorkflows
def validateClosingTaskChain(url, workflow): """ Calculates a Percentage completion for a taskchain. Taking step/filter efficiency into account. test with pdmvserv_task_SUS-Summer12WMLHE-00004__v1_T_141003_120119_9755 """ inputEvents = workflow.getInputEvents() #if subtype doesn't come with the request, we decide based on dataset names fromGen = False #if no output dataset ends with GEN or LHE if not re.match('.*/(GEN|LHE)$', workflow.outputDatasets[0]): fromGen = False elif (re.match('.*/(GEN|LHE)$', workflow.outputDatasets[0]) and re.match('.*/(GEN-SIM|GEN)$', workflow.outputDatasets[1])): fromGen = True #task-chain 1 (without filterEff) if not fromGen: #validate with the regular procedure result = validateClosingWorkflow(url, workflow, 0.95) return result #task-chain 2 GEN, GEN-SIM, GEN-SIM-RAW, AODSIM, DQM else: #GEN/LHE and GEN-SIM result = {'name':workflow.name, 'datasets': {}} result['datasets'] = dict( (ds,{}) for ds in workflow.outputDatasets) closeOutWorkflow = True i = 1 for dataset in workflow.outputDatasets: closeOutDataset = False #percentage outputEvents = workflow.getOutputEvents(dataset) filterEff = workflow.getFilterEfficiency('Task%d'%i) #GEN/LHE and GEN-SIM if 1 <= i <= 2: #decrease filter eff inputEvents *= filterEff #percentage percentage = outputEvents/float(inputEvents) if inputEvents > 0 else 0.0 #phedex request phedexReqs = phedexClient.getCustodialSubscriptionRequestSite(dataset) #all validations duplicate = None correctLumis = None transPerc = None missingSubs = False #TODO test dbsFiles = dbs3Client.getFileCountDataset(dataset) phdFiles = phedexClient.getFileCountDataset(url,dataset) equalFiles = (dbsFiles == phdFiles) #Check first percentage if percentage >= 0.95: #if we need to check duplicates duplicate = dbs3Client.duplicateRunLumi(dataset) #dataset healthy means: # checkDuplicates -> no duplicates if not duplicate: #if phedex check not required we can closeout if phedexReqs: try: transPerc = phedexClient.getTransferPercentage(url, dataset, phedexReqs[0]) except: transPerc = None #last check if files are equal closeOutDataset = equalFiles else: missingSubs = True #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset #load results in a dict result['datasets'][dataset]["percentage"] = percentage result['datasets'][dataset]["duplicate"] = duplicate result['datasets'][dataset]["phedexReqs"] = phedexReqs result['datasets'][dataset]["closeOutDataset"] = closeOutDataset result['datasets'][dataset]["transPerc"] = transPerc result['datasets'][dataset]["correctLumis"] = correctLumis result['datasets'][dataset]["missingSubs"] = missingSubs result['datasets'][dataset]["dbsFiles"] = dbsFiles result['datasets'][dataset]["phedexFiles"] = phdFiles i += 1 result['closeOutWorkflow'] = closeOutWorkflow return result
def validateClosingWorkflow(url, workflow, closePercentage = 0.95, checkEqual=False, checkDuplicates=True, checkLumiNumb=False, checkPhedex='custodial'): """ Validates if a workflow can be closed out, using different parameters of validation. returns the response as a dict. checkPhedex can be 'custodial', 'any' or False """ #inputDataset = reqMgrClient.getInputDataSet(url, workflow) result = {'name':workflow.name, 'datasets': {}} result['datasets'] = dict( (ds,{}) for ds in workflow.outputDatasets) closeOutWorkflow = True #check if dataset is ready #TODO validate here if workflow is MonteCarlo from GEN with two output for dataset in workflow.outputDatasets: closeOutDataset = False try: percentage = workflow.percentageCompletion(dataset, skipInvalid=True) except Exception as e: print 'Error getting information from DBS', workflow, dataset percentage = 0.0 #retrieve either custodial or all subscriptions. try: if checkPhedex == 'custodial': phedexReqs = phedexClient.getCustodialSubscriptionRequestSite(dataset) elif checkPhedex == 'any': phedexReqs = phedexClient.getSubscriptionSites(dataset) else: phedexReqs = None except Exception: print 'Error getting phedex info,: ', dataset phedexReqs = None duplicate = None correctLumis = None transPerc = None missingSubs = False equalFiles = None dbsFiles = dbs3Client.getFileCountDataset(dataset) phdFiles = phedexClient.getFileCountDataset(url,dataset) equalFiles = (dbsFiles == phdFiles) #Check first percentage if ((checkEqual and percentage == closePercentage) or (not checkEqual and percentage >= closePercentage) or dataset.endswith("DQMIO") ): #DQMIO are exceptions (have 0 events) #if we need to check duplicates if checkDuplicates: try: duplicate = dbs3Client.duplicateRunLumi(dataset, skipInvalid=True) except Exception: print "Error in checking duplicate lumis for", dataset #if we need to check for correct lumi number if checkLumiNumb: correctLumis = checkCorrectLumisEventGEN(dataset) #dataset healthy means: # checkDuplicates -> no duplicates # checkLumiNumb -> correct if (not (checkDuplicates and duplicate) and not ( checkLumiNumb and not correctLumis)): #if phedex check not required we can closeout if not checkPhedex: #last check, that files are equal closeOutDataset = equalFiles #if phedex check is required and has it elif checkPhedex and phedexReqs: try: transPerc = phedexClient.getTransferPercentage(url, dataset, phedexReqs[0]) except: transPerc = None #last check, that files are equal closeOutDataset = equalFiles else: #TODO only missing subscription if equal # of files missingSubs = equalFiles #if at least one dataset is not ready wf cannot be closed out closeOutWorkflow = closeOutWorkflow and closeOutDataset #load results in a dict result['datasets'][dataset]["percentage"] = percentage result['datasets'][dataset]["duplicate"] = duplicate result['datasets'][dataset]["phedexReqs"] = phedexReqs result['datasets'][dataset]["closeOutDataset"] = closeOutDataset result['datasets'][dataset]["transPerc"] = transPerc result['datasets'][dataset]["correctLumis"] = correctLumis result['datasets'][dataset]["missingSubs"] = missingSubs result['datasets'][dataset]["dbsFiles"] = dbsFiles result['datasets'][dataset]["phedexFiles"] = phdFiles result['closeOutWorkflow'] = closeOutWorkflow return result