def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents != 0: if (outputEvents / float(inputEvents) * 100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) print "dataset = " + dataset print "inputEvents = " + str(inputEvents) print "outputEvents = " + str(outputEvents) if inputEvents!=0: if (outputEvents/float(inputEvents)*100) >= 100.0: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" else: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) #if status == 'completed': # closeOutTaskChainWorkflows(url, workflow) #else: # pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) f.close print '-------------------------------------------------------------------------------------------------------------------------------------' sys.exit(0)
def PercentageCompletion(url, workflow, dataset): inputEvents = 0 inputEvents = inputEvents + int(dbsTest.getInputEvents(url, workflow)) outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents == 0: return 0 percentage = outputEvents / float(inputEvents) return percentage
def PercentageCompletion(url, workflow, dataset): inputEvents=0 inputEvents=inputEvents+int(dbsTest.getInputEvents(url, workflow)) outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents==0: return 0 percentage=outputEvents/float(inputEvents) return percentage
def main(): url='cmsweb.cern.ch' args=sys.argv[1:] if not len(args)==1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents : #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents : closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) inputEvents=dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents!=0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str(inputEvents) + " events were expected and " + str(outputEvents) + " were found." f.close sys.exit(0)
def main(): url='cmsweb.cern.ch' parser = optparse.OptionParser() parser.add_option('--test',action="store_true", help='Nothing is closed out. Only test if the workflows are ready to be closed out.',dest='test') parser.add_option('--verbose',action="store_true", help='Print out details about the number of events expected and produced.',dest='verbose') parser.add_option('--correct_env',action="store_true",dest='correct_env') (options,args) = parser.parse_args() command="" for arg in sys.argv: command=command+arg+" " if not options.correct_env: os.system("source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh; source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 "+command + "--correct_env") sys.exit(0) if len(args) != 1: print "Usage:" print "python closeOutTaskChainWorkflows.py [--test] [--verbose] <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') closedOut = [] nonClosedOut = [] tooManyEventsOrLumis = [] running = [] for line in f: workflow = line.rstrip('\n') print "checking workflow " + workflow jsn = getWorkflowJson(url, workflow) if jsn['RequestType'] != 'TaskChain': print "workflow type is not TaskChain, exiting" sys.exit(0) outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) if 'RequestNumEvents' in jsn['Task1'] and 'InputDataset' in jsn['Task1']: print "both RequestNumEvents and InputDataset in Task1, exiting" sys.exit(1) if 'RequestNumEvents' in jsn['Task1']: inputEvents = jsn['Task1']['RequestNumEvents'] closeOut = True tooMany = False for dataset in outputDatasets: print " checking dataset " + dataset # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents=0 tries=0 while outputEvents==0 and tries < 3: outputEvents=dbsTest.getOutputEvents(dataset) if outputEvents==0: print "0 output lumis" time.sleep(50) tries=tries+1 if options.verbose: print " input events: " + str(inputEvents) print " output events: " + str(outputEvents) if outputEvents == inputEvents: pass elif outputEvents < inputEvents : closeOut = False elif outputEvents > inputEvents : closeOut = False tooMany = True break if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEventsOrLumis.append(workflow) elif 'InputDataset' in jsn['Task1']: inputLumis=dbsTest.getInputLumis(jsn) closeOut = True tooMany = False for dataset in outputDatasets: print " checking dataset " + dataset # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputLumis=0 tries=0 while outputLumis==0 and tries < 3: outputLumis=dbsTest.getOutputLumis(dataset) if outputLumis==0: print "0 output lumis" time.sleep(50) tries=tries+1 if options.verbose: print " input lumis: " + str(inputLumis) print " output lumis: " + str(outputLumis) if outputLumis == inputLumis: pass elif outputLumis < inputLumis : closeOut = False elif outputLumis > inputLumis : closeOut = False tooMany = True break if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEventsOrLumis.append(workflow) else: print "neither RequestNumEvents nor InputDataset in Task1, exiting" sys.exit(1) print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in closedOut: jsn = getWorkflowJson(url, workflow) if jsn['RequestStatus'] == 'completed': if not options.test: closeOutTaskChainWorkflows(url, workflow) else: pass status = jsn['RequestStatus'] print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: jsn = getWorkflowJson(url, workflow) status = jsn['RequestStatus'] print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEventsOrLumis: print "WARNING: One of the datasets produced by the workflow "+workflow+" contains MORE events or lumis than expected. Rerun with the --verbose flag to get more information." f.close sys.exit(0)
def getFinalRequestedNumEvents(url, workflow): outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) obtainedEvents=dbsTest.getOutputEvents(url, workflow, outputDataSets[0]) requestedEvents=dbsTest.getInputEvents(url, workflow) return (requestedEvents-obtainedEvents)
def main(): url = 'cmsweb.cern.ch' args = sys.argv[1:] if not len(args) == 1: print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') print '-------------------------------------------------------------------------------------------------------------------------------------' print '| Request | Closed-out? | Current status |' print '-------------------------------------------------------------------------------------------------------------------------------------' closedOut = [] nonClosedOut = [] tooManyEvents = [] running = [] for line in f: workflow = line.rstrip('\n') outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) closeOut = True tooMany = False for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents == inputEvents: pass #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" elif outputEvents < inputEvents: #print dataset + " it is less than 99.9999% completed, keeping it in the current status" closeOut = False elif outputEvents > inputEvents: closeOut = False tooMany = True break else: print "Input Events 0" if closeOut: closedOut.append(workflow) else: nonClosedOut.append(workflow) if tooMany: tooManyEvents.append(workflow) for workflow in closedOut: status = getStatus(url, workflow) if status == 'completed': closeOutTaskChainWorkflows(url, workflow) else: pass print "%90s\tYES\t\t%s" % (workflow, status) for workflow in nonClosedOut: status = getStatus(url, workflow) print "%90s\tNO\t\t%s" % (workflow, status) print '-------------------------------------------------------------------------------------------------------------------------------------' for workflow in tooManyEvents: outputDatasets = phedexSubscription.outputdatasetsWorkflow( url, workflow) inputEvents = dbsTest.getInputEvents(url, workflow) for dataset in outputDatasets: # we cannot calculate completion of ALCARECO samples if 'ALCARECO' in dataset: continue outputEvents = dbsTest.getOutputEvents(url, workflow, dataset) if inputEvents != 0: if outputEvents > inputEvents: print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str( inputEvents) + " events were expected and " + str( outputEvents) + " were found." f.close sys.exit(0)