def closeOutMonterCarloRequests(url, workflows): for workflow in workflows: datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow=True if getRequestTeam(url, workflow)!='analysis':#If request is not in special queue for dataset in datasets: ClosePercentage=0.90 #if 'SMS' in dataset: # ClosePercentage=1 closeOutDataset=True Percentage=PercentageCompletion(url, workflow, dataset) PhedexSubscription=CustodialMoveSubscriptionCreated(dataset) TransPercen=0 closedBlocks=False if PhedexSubscription!=False: site=PhedexSubscription TransPercen=TransferPercentage(url, dataset, site) duplicate=True if PhedexSubscription!=False and Percentage>=float(0.9): duplicate=dbsTest.duplicateLumi(dataset) closedBlocks = True #dbsTest.hasAllBlocksClosed(dataset) if Percentage>=float(ClosePercentage) and PhedexSubscription!=False and not duplicate: closeOutDataset=True else: closeOutDataset=False closeOutWorkflow=closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| %5s|' % (workflow, dataset,str(int(Percentage*100)), str(PhedexSubscription), str(int(TransPercen*100)), duplicate, closedBlocks, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print'-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def closeOutStep0Requests(url, workflows): for workflow in workflows: #print workflow datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow=True if getRequestTeam(url, workflow)!='analysis':#If request is not in special queue for dataset in datasets: closeOutDataset=False Percentage=PercentageCompletion(url, workflow, dataset) PhedexSubscription=CustodialMoveSubscriptionCreated(dataset) if PhedexSubscription!=False: site=PhedexSubscription TransPercen=TransferPercentage(url, dataset, site) duplicate=dbsTest.duplicateLumi(dataset) correctLumis=dbsTest.checkCorrectLumisEventGEN(dataset) if Percentage>=float(0.95) and PhedexSubscription!=False and not duplicate and correctLumis: closeOutDataset=True else: closeOutDataset=False closeOutWorkflow=closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s| ' % (workflow, dataset,str(int(Percentage*100)), str(PhedexSubscription), str(correctLumis), duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print'-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests = {} for request in requests: if 'type' in request: name = request['request_name'] if request['type'] == 'MonteCarloFromGEN' or request[ 'type'] == 'MonteCarlo': datasetWorkflow = phedexSubscription.outputdatasetsWorkflow( url, name) problem = False percentage = 0 if len(datasetWorkflow) < 1: continue dataset = datasetWorkflow[0] inputEvents = 0.0001 inputEvents = inputEvents + int( dbsTest.getInputEvents(url, name)) outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) duplicate = duplicateLumi(dataset) problem = False if duplicate: problem = True if problem: FilterEfficiency = getFilterEfficiency(url, name) datasetStatus = getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % ( name, request['type'], request['status'], dataset, datasetStatus, str(percentage * 100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests={} for request in requests: if 'type' in request: name=request['request_name'] if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo': datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name) problem=False percentage=0 if len(datasetWorkflow)<1: continue dataset=datasetWorkflow[0] inputEvents=0.0001 inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name)) outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) duplicate=duplicateLumi(dataset) problem=False if duplicate: problem=True if problem: FilterEfficiency=getFilterEfficiency(url, name) datasetStatus=getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def getListUnprocessedBlocks(url, workflow): outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputDataset=dbsTest.getInputDataSet(url, workflow) lumisOutput=getLumisInDataset(outputDataSets[0]) BlockLumisInput=getBlocksLumi(inputDataset) BlocksNotProcessed=getBlocksNotProcessed(lumisOutput, BlockLumisInput) return BlocksNotProcessed
def main(): args = sys.argv[1:] if not len(args) == 1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') url = 'cmsweb.cern.ch' for line in f: workflow = line.rstrip('\n') outputDataSets = phedexSubscription.outputdatasetsWorkflow( url, workflow) # print "These are the output datasets:" # print outputDataSets #inputEvents=getInputEvents(url, workflow) #print inputEvents for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet( "https://cmsweb.cern.ch", dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset + "\t\t" + str(outputEvents) f.close sys.exit(0)
def closeOutReRecoWorkflows(url, workflows): for workflow in workflows: if 'RelVal' in workflow: continue if 'TEST' in workflow: continue datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow = True InputDataset = dbsTest.getInputDataSet(url, workflow) for dataset in datasets: duplicate = False closeOutDataset = True Percentage = PercentageCompletion(url, workflow, dataset) PhedexSubscription = testOutputDataset(dataset) closeOutDataset = False if Percentage == 1 and PhedexSubscription and not duplicate: closeOutDataset = True else: closeOutDataset = False closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % ( workflow, dataset, str(int(Percentage * 100)), str(PhedexSubscription), 100, duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def closeOutStep0Requests(url, workflows): for workflow in workflows: datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow = True if getRequestTeam( url, workflow) != 'analysis': #If request is not in special queue for dataset in datasets: closeOutDataset = False Percentage = PercentageCompletion(url, workflow, dataset) PhedexSubscription = CustodialMoveSubscriptionCreated(dataset) if PhedexSubscription != False: site = PhedexSubscription TransPercen = TransferPercentage(url, dataset, site) duplicate = dbsTest.duplicateLumi(dataset) correctLumis = dbsTest.checkCorrectLumisEventGEN(dataset) if Percentage >= float( 0.90 ) and PhedexSubscription != False and not duplicate and correctLumis: closeOutDataset = True else: closeOutDataset = False closeOutWorkflow = closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % ( workflow, dataset, str(int( Percentage * 100)), str(PhedexSubscription), str(correctLumis), duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print '-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents != 0: if (outputEvents / float(inputEvents) * 100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def closeOutMonterCarloRequests(url, workflows): for workflow in workflows: datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow=True if getRequestTeam(url, workflow)!='analysis':#If request is not in special queue for dataset in datasets: ClosePercentage=0.93 if 'SMS' in dataset: ClosePercentage=1 closeOutDataset=True Percentage=PercentageCompletion(url, workflow, dataset) PhedexSubscription=CustodialMoveSubscriptionCreated(dataset) TransPercen=0 if PhedexSubscription!=False: site=PhedexSubscription TransPercen=TransferPercentage(url, dataset, site) duplicate=True if PhedexSubscription!=False and Percentage>=float(0.9): duplicate=dbsTest.duplicateLumi(dataset) if Percentage>=float(ClosePercentage) and PhedexSubscription!=False and not duplicate: closeOutDataset=True else: closeOutDataset=False closeOutWorkflow=closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(Percentage*100)), str(PhedexSubscription), str(int(TransPercen*100)), duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print'-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def classifyRunningRequests(url, requests): datasetsUnsuscribed={} datasetsUnsuscribedSpecialQueue=[] for request in requests: name=request['request_name'] status='NoStatus' if 'status' in request.keys(): status=request['status'] else: continue requestType='NoType' if 'type' in request.keys(): requestType=request['type'] else: continue if status=='running' or status=='completed': if requestType=='MonteCarloFromGEN' or requestType=='MonteCarlo' or requestType=='LHEStepZero': site=closeOutWorkflows.findCustodial(url, name) if requestType=='LHEStepZero': site='T1_US_FNAL' if site=='NoSite': continue if closeOutWorkflows.getRequestTeam(url, name)=='analysis': # If the request is running in the special queue datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, request) for dataset in datasetWorkflow: PhedexSubscriptionDone=phedexSubscription.TestSubscritpionSpecialRequest(url, dataset, 'T2_DE_DESY') if not PhedexSubscriptionDone: datasetsUnsuscribedSpecialQueue.append(dataset) else: datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name) for dataset in datasetWorkflow: if dataset == "/SMS-T2tt_Mgluino-225to1200_mLSP-0to1000_8TeV-Pythia6Z/Summer12-START52_V9_FSIM-v1/AODSIM": print "Skipping",dataset continue percentage=closeOutWorkflows.PercentageCompletion(url, name, dataset) if float(percentage)>float(0): if not phedexSubscription.TestCustodialSubscriptionRequested(url, dataset, site): if site not in datasetsUnsuscribed.keys(): datasetsUnsuscribed[site]=[dataset] else: datasetsUnsuscribed[site].append(dataset) else: continue if len(datasetsUnsuscribedSpecialQueue)>0: phedexSubscription.makeCustodialReplicaRequest(url, 'T2_DE_DESY',datasetsUnsuscribedSpecialQueue, "Replica Subscription for Request in special production queue") return datasetsUnsuscribed
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents!=0: if (outputEvents/float(inputEvents)*100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def getMaxLumi(url, workflow): outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) dataset=outputDataSets[0] output=os.popen("./dbssql --input='find run, max(lumi) where dataset="+dataset+"'| awk '{print $2}' | grep '[0-9]\{1,\}'").read() try: return int(output) except ValueError: return -1
def testWorkflow(url, workflow): datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) subscribed=1 for dataset in datasets: if not testOutputDataset(dataset): subscribed=0 return 0 return 1
def classifyRequests(requests, dataset): for request in requests: name=request['request_name'] outputDatasets=phedexSubscription.outputdatasetsWorkflow('cmsweb.cern.ch',name) for out in outputDatasets: if dataset in out: print name return "None"
def testWorkflow(url, workflow): datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) subscribed = 1 for dataset in datasets: if not testOutputDataset(dataset): subscribed = 0 return 0 return 1
def classifyRequests(requests, dataset): for request in requests: name = request['request_name'] outputDatasets = phedexSubscription.outputdatasetsWorkflow( 'cmsweb.cern.ch', name) for out in outputDatasets: if dataset in out: print name return "None"
def runNotinAllDatasets(url, run, workflow): Datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) InputDataset=dbsTest.getInputDataSet(url, workflow) runInputDataset=runInDataset(url, run, InputDataset) if not runInputDataset: return True for dataset in Datasets: if runInDataset(url, run, dataset):#the run is in at least one of the output datasets return True return False
def testEventCountWorkflow(url, workflow): inputEvents=0 inputEvents=inputEvents+dbsTest.getInputEvents(url, workflow) datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) if float(percentage)>float(1): print "Workflow: " + workflow+" duplicate events in outputdataset: "+dataset +" percentage: "+str(outputEvents/float(inputEvents)*100) +"%" return 1
def testEventCountWorkflow(url, workflow): inputEvents = 0 inputEvents = inputEvents + dbsTest.getInputEvents(url, workflow) datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) if float(percentage) > float(1): print "Workflow: " + workflow + " duplicate events in outputdataset: " + dataset + " percentage: " + str( outputEvents / float(inputEvents) * 100) + "%" return 1
def main(): args = sys.argv[1:] if not len(args) == 1: print "usage:dbsTest workflowname" sys.exit(0) workflow = args[0] url = 'cmsweb.cern.ch' requestType = dbsTest.getWorkflowType(url, workflow) datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) for datasetName in datasets: testOutputDataset(datasetName, requestType) sys.exit(0)
def main(): args=sys.argv[1:] if not len(args)==1: print "usage:dbsTest workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' requestType=dbsTest.getWorkflowType(url, workflow) datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) for datasetName in datasets: testOutputDataset(datasetName, requestType) sys.exit(0);
def main(): args = sys.argv[1:] if not len(args) == 1: print "usage:checkStep0Output workflowname" sys.exit(0) workflow = args[0] url = 'cmsweb.cern.ch' dataset = phedexSubscription.outputdatasetsWorkflow(url, workflow)[0] correctLumi = dbsTest.checkCorrectLumisEventGEN(dataset) if correctLumi: print "The workflow is correct" else: print "The output Dataset has at least one lumi with more than 300 events, please check it." sys.exit(0)
def main(): args=sys.argv[1:] if not len(args)==1: print "usage:checkStep0Output workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' dataset=phedexSubscription.outputdatasetsWorkflow(url, workflow)[0] correctLumi=dbsTest.checkCorrectLumisEventGEN(dataset) if correctLumi: print "The workflow is correct" else: print "The output Dataset has at least one lumi with more than 300 events, please check it." sys.exit(0);
def main(): args=sys.argv[1:] if not len(args)==1: print "usage:dbsTest workflowname" sys.exit(0) workflow=args[0] url='cmsweb.cern.ch' outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=getInputEvents(url, workflow) for dataset in outputDataSets: outputEvents=getOutputEvents(url, workflow, dataset) if inputEvents!=0: print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: print "Input Events 0" sys.exit(0);
def classifyCompletedRequests(url, requests): workflows={'ReDigi':[],'MonteCarloFromGEN':[],'MonteCarlo':[] , 'ReReco':[], 'LHEStepZero':[]} for request in requests: name=request['id'] if len(request['key'])<3: print request continue status=request['key'][1] requestType=request['key'][2] if status=='completed': if requestType=='MonteCarlo': datasets=phedexSubscription.outputdatasetsWorkflow(url, name) m=re.search('.*/GEN$',datasets[0]) if m: workflows['LHEStepZero'].append(name) else: workflows[requestType].append(name) if requestType=='MonteCarloFromGEN' or requestType=='LHEStepZero'or requestType=='ReDigi' or requestType=='ReReco': workflows[requestType].append(name) return workflows
def closeOutRedigiWorkflows(url, workflows): for workflow in workflows: closeOutWorkflow=True InputDataset=dbsTest.getInputDataSet(url, workflow) datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: closeOutDataset=False Percentage=PercentageCompletion(url, workflow, dataset) PhedexSubscription=testOutputDataset(dataset) duplicate=True if PhedexSubscription!=False and Percentage>=float(0.95): duplicate=dbsTest.duplicateRunLumi(dataset) if Percentage>=float(0.95) and PhedexSubscription and not duplicate: closeOutDataset=True else: closeOutDataset=False closeOutWorkflow=closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(Percentage*100)), str(PhedexSubscription), 100, duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def main(): parser = optparse.OptionParser() parser.add_option("--correct_env", action="store_true", dest="correct_env") (options, args) = parser.parse_args() command = "" for arg in sys.argv: command = command + arg + " " if not options.correct_env: os.system( "source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 " + command + "--correct_env" ) sys.exit(0) # args=sys.argv[1:] if not len(args) == 1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, "r") url = "cmsweb.cern.ch" for line in f: workflow = line.rstrip("\n") outputDataSets = phedexSubscription.outputdatasetsWorkflow(url, workflow) # print "These are the output datasets:" # print outputDataSets # inputEvents=getInputEvents(url, workflow) # print inputEvents for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet("https://cmsweb.cern.ch", dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset + "\t\t" + str(outputEvents) f.close sys.exit(0)
def main(): args=sys.argv[1:] if not len(args)==1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') url='cmsweb.cern.ch' for line in f: workflow = line.rstrip('\n') outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) # print "These are the output datasets:" # print outputDataSets #inputEvents=getInputEvents(url, workflow) #print inputEvents for dataset in outputDataSets: outputEvents=dbsTest.getEventCountDataSet("https://cmsweb.cern.ch",dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset+"\t\t"+str(outputEvents) f.close sys.exit(0);
def closeOutReRecoWorkflows(url, workflows): for workflow in workflows: if 'RelVal' in workflow: continue if 'TEST' in workflow: continue datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) closeOutWorkflow=True InputDataset=dbsTest.getInputDataSet(url, workflow) for dataset in datasets: duplicate=False closeOutDataset=True Percentage=PercentageCompletion(url, workflow, dataset) PhedexSubscription=testOutputDataset(dataset) closeOutDataset=False if Percentage==1 and PhedexSubscription and not duplicate: closeOutDataset=True else: closeOutDataset=False closeOutWorkflow=closeOutWorkflow and closeOutDataset print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(Percentage*100)), str(PhedexSubscription), 100, duplicate, closeOutDataset) if closeOutWorkflow: phedexSubscription.closeOutWorkflow(url, workflow) print '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------'
def main(): args = sys.argv[1:] if not len(args) == 1: print "usage:listReqTapeFamilies.py filename" print "where the file should contain a list of workflows" sys.exit(0) sites = [ 'T1_DE_KIT', 'T1_FR_CCIN2P3', 'T1_IT_CNAF', 'T1_ES_PIC', 'T1_TW_ASGC', 'T1_UK_RAL', 'T1_US_FNAL' ] filename = args[0] url = 'cmsweb.cern.ch' workflows = phedexSubscription.workflownamesfromFile(filename) for workflow in workflows: outputDataSets = phedexSubscription.outputdatasetsWorkflow( url, workflow) prepID = getPrepID(url, workflow) ods = [] # Set defaults & era lfn = '/store/mc' era = 'NONE' if 'Summer12_DR52X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer12' if 'Summer12_DR53X' in prepID or 'Summer12DR53X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer12_DR53X' if 'Summer13dr53X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer13dr53X' if 'Summer11dr53X' in prepID: #ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer11dr53X' if 'UpgFall13d' in prepID: era = 'UpgFall13d' if 'Fall13dr' in prepID: era = 'Fall13dr' if 'Summer11LegDR' in prepID: era = 'Summer11LegDR' if 'Spring14dr' in prepID: era = 'Spring14dr' if 'HiFall13DR53X' in prepID: era = 'HiFall13DR53X' lfn = '/store/himc' if 'Fall11_R' in prepID or 'Fall11_HLTMuonia' in prepID or 'Fall11R' in prepID: ods = ['GEN-RAW', 'GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Fall11' if 'LowPU2010_DR' in prepID: era = 'Summer12' if 'UpgradeL1TDR_DR6X' in prepID: era = 'Summer12' if 'Winter13' in prepID or 'Winter13' in workflow: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'HiWinter13' lfn = '/store/himc' if 'HiFall11DR44' in prepID: era = 'HiFall11' lfn = '/store/himc' if 'UpgradePhase' in workflow and ('DR61SLHCx' in workflow or 'dr61SLHCx' in workflow): era = 'Summer13' lfn = '/store/mc' # Check for any additionals, e.g. GEN-SIM-RECODEBUG for extra in outputDataSets: bits = extra.split('/') if bits[len(bits) - 1] not in ods: ods.append(bits[len(bits) - 1]) inputDataset = getInputDataSet(url, workflow) inputDatasetComps = inputDataset.split('/') # Determine site where workflow should be run count = 0 for site in sites: if site in workflow: count = count + 1 siteUse = site # Find custodial location of input dataset if workflow name contains no T1 site or multiple T1 sites if count == 0 or count > 1: siteUse = findCustodialLocation(url, inputDataset) siteUse = siteUse[:-4] # List required tape families and site name for od in ods: tapeFamily = lfn + '/' + era + '/' + inputDatasetComps[1] + '/' + od print tapeFamily, ' ', siteUse sys.exit(0)
def main(): args=sys.argv[1:] if not len(args)==1: print "usage:listReqTapeFamilies.py filename" print "where the file should contain a list of workflows" sys.exit(0) sites = ['T1_DE_KIT', 'T1_FR_CCIN2P3', 'T1_IT_CNAF', 'T1_ES_PIC', 'T1_TW_ASGC', 'T1_UK_RAL', 'T1_US_FNAL'] filename=args[0] url='cmsweb.cern.ch' workflows=phedexSubscription.workflownamesfromFile(filename) for workflow in workflows: outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) prepID = getPrepID(url, workflow) ods = [] # Set defaults & era lfn = '/store/mc' era = 'NONE' if 'Summer12_DR52X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer12' if 'Summer12_DR53X' in prepID or 'Summer12DR53X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer12_DR53X' if 'Summer13dr53X' in prepID: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer13dr53X' if 'Summer11dr53X' in prepID: #ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Summer11dr53X' if 'Fall11_R' in prepID or 'Fall11_HLTMuonia' in prepID or 'Fall11R' in prepID: ods = ['GEN-RAW', 'GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'Fall11' if 'LowPU2010_DR' in prepID: era = 'Summer12' if 'UpgradeL1TDR_DR6X' in prepID: era = 'Summer12' if 'Winter13' in prepID or 'Winter13' in workflow: ods = ['GEN-SIM-RECO', 'AODSIM', 'DQM'] era = 'HiWinter13' lfn = '/store/himc' if 'HiFall11DR44' in prepID: era = 'HiFall11' lfn = '/store/himc' if 'UpgradePhase' in workflow and 'DR61SLHCx' in workflow: era = 'Summer13' lfn = '/store/mc' # Check for any additionals, e.g. GEN-SIM-RECODEBUG for extra in outputDataSets: bits = extra.split('/') if bits[len(bits)-1] not in ods: ods.append(bits[len(bits)-1]) inputDataset = getInputDataSet(url, workflow) inputDatasetComps = inputDataset.split('/') # Determine site where workflow should be run count=0 for site in sites: if site in workflow: count=count+1 siteUse = site # Find custodial location of input dataset if workflow name contains no T1 site or multiple T1 sites if count==0 or count>1: siteUse = findCustodialLocation(url, inputDataset) siteUse = siteUse[:-4] # List required tape families and site name for od in ods: tapeFamily = lfn+'/'+era+'/'+inputDatasetComps[1]+'/'+od print tapeFamily,' ',siteUse sys.exit(0);
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents : #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents : closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str(inputEvents) + " events were expected and " + str(outputEvents) + " were found." f.close sys.exit(0)
def getFinalRequestedNumEvents(url, workflow): outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) obtainedEvents=dbsTest.getOutputEvents(url, workflow, outputDataSets[0]) requestedEvents=dbsTest.getInputEvents(url, workflow) return (requestedEvents-obtainedEvents)
def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents: closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str( inputEvents) + " events were expected and " + str( outputEvents) + " were found." f.close sys.exit(0)
def main(): url='cmsweb.cern.ch' parser = optparse.OptionParser() parser.add_option('--test',action="store_true", help='Nothing is closed out. Only test if the workflows are ready to be closed out.',dest='test') parser.add_option('--verbose',action="store_true", help='Print out details about the number of events expected and produced.',dest='verbose') parser.add_option('--correct_env',action="store_true",dest='correct_env') (options,args) = parser.parse_args() command="" for arg in sys.argv: command=command+arg+" " if not options.correct_env: os.system("source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh; source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 "+command + "--correct_env") sys.exit(0) if len(args) != 1: print "Usage:" print "python closeOutTaskChainWorkflows.py [--test] [--verbose] <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') closedOut = [] nonClosedOut = [] tooManyEventsOrLumis = [] running = [] for line in f: workflow = line.rstrip('\n') print "checking workflow " + workflow jsn = getWorkflowJson(url, workflow) if jsn['RequestType'] != 'TaskChain': print "workflow type is not TaskChain, exiting" sys.exit(0) outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) if 'RequestNumEvents' in jsn['Task1'] and 'InputDataset' in jsn['Task1']: print "both RequestNumEvents and InputDataset in Task1, exiting" sys.exit(1) if 'RequestNumEvents' in jsn['Task1']: inputEvents = jsn['Task1']['RequestNumEvents'] closeOut = True tooMany = False for dataset in outputDatasets: print " checking dataset " + dataset # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=0 tries=0 while outputEvents==0 and tries < 3: outputEvents=dbsTest.getOutputEvents(dataset) if outputEvents==0: print "0 output lumis" time.sleep(50) tries=tries+1 if options.verbose: print " input events: " + str(inputEvents) print " output events: " + str(outputEvents) if outputEvents == inputEvents: pass elif outputEvents < inputEvents : closeOut = False elif outputEvents > inputEvents : closeOut = False tooMany = True break if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEventsOrLumis.append(workflow) elif 'InputDataset' in jsn['Task1']: inputLumis=dbsTest.getInputLumis(jsn) closeOut = True tooMany = False for dataset in outputDatasets: print " checking dataset " + dataset # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputLumis=0 tries=0 while outputLumis==0 and tries < 3: outputLumis=dbsTest.getOutputLumis(dataset) if outputLumis==0: print "0 output lumis" time.sleep(50) tries=tries+1 if options.verbose: print " input lumis: " + str(inputLumis) print " output lumis: " + str(outputLumis) if outputLumis == inputLumis: pass elif outputLumis < inputLumis : closeOut = False elif outputLumis > inputLumis : closeOut = False tooMany = True break if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEventsOrLumis.append(workflow) else: print "neither RequestNumEvents nor InputDataset in Task1, exiting" sys.exit(1) print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in closedOut: jsn = getWorkflowJson(url, workflow) if jsn['RequestStatus'] == 'completed': if not options.test: closeOutTaskChainWorkflows(url, workflow) else: pass status = jsn['RequestStatus'] print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: jsn = getWorkflowJson(url, workflow) status = jsn['RequestStatus'] print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEventsOrLumis: print "WARNING: One of the datasets produced by the workflow "+workflow+" contains MORE events or lumis than expected. Rerun with the --verbose flag to get more information." f.close sys.exit(0)