Exemple #1
0
def main():
    url = 'cmsweb.cern.ch'
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        closeOut = True
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            print "dataset = " + dataset
            print "inputEvents = " + str(inputEvents)
            print "outputEvents = " + str(outputEvents)

            if inputEvents != 0:
                if (outputEvents / float(inputEvents) * 100) >= 100.0:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                else:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status"
                    closeOut = False
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        #if status == 'completed':
        #    closeOutTaskChainWorkflows(url, workflow)
        #else:
        #    pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)
    f.close
    print '-------------------------------------------------------------------------------------------------------------------------------------'
    sys.exit(0)
def main():
    url='cmsweb.cern.ch'
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile=args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        closeOut = True
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            print "dataset = " + dataset
            print "inputEvents = " + str(inputEvents)
            print "outputEvents = " + str(outputEvents)
                                         
            if inputEvents!=0:
                if (outputEvents/float(inputEvents)*100) >= 100.0:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                else:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status" 
                    closeOut = False
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        #if status == 'completed':
        #    closeOutTaskChainWorkflows(url, workflow)
        #else:
        #    pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)
    f.close
    print '-------------------------------------------------------------------------------------------------------------------------------------'
    sys.exit(0)
Exemple #3
0
def PercentageCompletion(url, workflow, dataset):
    inputEvents = 0
    inputEvents = inputEvents + int(dbsTest.getInputEvents(url, workflow))
    outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
    if inputEvents == 0:
        return 0
    percentage = outputEvents / float(inputEvents)
    return percentage
def PercentageCompletion(url, workflow, dataset):
	inputEvents=0
	inputEvents=inputEvents+int(dbsTest.getInputEvents(url, workflow))
	outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
	if inputEvents==0:
		return 0
	percentage=outputEvents/float(inputEvents)
	return percentage
def main():
    url='cmsweb.cern.ch'
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile=args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    tooManyEvents = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        closeOut = True
        tooMany = False
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents!=0:
                if outputEvents == inputEvents:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                elif outputEvents < inputEvents :
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status" 
                    closeOut = False
                elif outputEvents > inputEvents :
                    closeOut = False
                    tooMany = True
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)
        if tooMany:
            tooManyEvents.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        if status == 'completed':
            closeOutTaskChainWorkflows(url, workflow)
        else:
            pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)

    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEvents:
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        for dataset in outputDatasets:
        # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents!=0:
                if outputEvents > inputEvents:
                    print "WARNING about workflow " + workflow  + ": The dataset " + dataset + " contains MORE events than expected. " + str(inputEvents) + " events were expected and " + str(outputEvents) + " were found."
    f.close
    sys.exit(0)
def main():
    url='cmsweb.cern.ch'
    parser = optparse.OptionParser()
    parser.add_option('--test',action="store_true", help='Nothing is closed out. Only test if the workflows are ready to be closed out.',dest='test')
    parser.add_option('--verbose',action="store_true", help='Print out details about the number of events expected and produced.',dest='verbose')
    parser.add_option('--correct_env',action="store_true",dest='correct_env')
    (options,args) = parser.parse_args()

    command=""
    for arg in sys.argv:
        command=command+arg+" "

    if not options.correct_env:
        os.system("source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh; source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 "+command + "--correct_env")
        sys.exit(0)

    if len(args) != 1:
        print "Usage:"
        print "python closeOutTaskChainWorkflows.py [--test] [--verbose] <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)

    inputFile=args[0]
    
    f = open(inputFile, 'r')

    closedOut = []
    nonClosedOut = []
    tooManyEventsOrLumis = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        print "checking workflow " + workflow
        jsn = getWorkflowJson(url, workflow)
        if jsn['RequestType'] != 'TaskChain':
            print "workflow type is not TaskChain, exiting"
            sys.exit(0)
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)

        if 'RequestNumEvents' in jsn['Task1'] and 'InputDataset' in jsn['Task1']:
            print "both RequestNumEvents and InputDataset in Task1, exiting"
            sys.exit(1)
        if 'RequestNumEvents' in jsn['Task1']:
            inputEvents = jsn['Task1']['RequestNumEvents']

            closeOut = True
            tooMany = False
            for dataset in outputDatasets:
                print "    checking dataset  " + dataset
                # we cannot calculate completion of ALCARECO samples
                if 'ALCARECO' in dataset:
                    continue
                outputEvents=0
                tries=0
                while outputEvents==0 and tries < 3:
                    outputEvents=dbsTest.getOutputEvents(dataset)
                    if outputEvents==0:
                        print "0 output lumis"
                        time.sleep(50)
                    tries=tries+1    

                if options.verbose:        
                    print "        input events:  " + str(inputEvents)
                    print "        output events: " + str(outputEvents)
                
                if outputEvents == inputEvents:
                    pass
                elif outputEvents < inputEvents :
                    closeOut = False
                elif outputEvents > inputEvents :
                    closeOut = False
                    tooMany = True
                    break

            if closeOut:
                closedOut.append(workflow)
            else:
                nonClosedOut.append(workflow)
            if tooMany:
                tooManyEventsOrLumis.append(workflow)
                
        elif 'InputDataset' in jsn['Task1']:
            inputLumis=dbsTest.getInputLumis(jsn)

            closeOut = True
            tooMany = False
            for dataset in outputDatasets:
                print "    checking dataset  " + dataset
                # we cannot calculate completion of ALCARECO samples
                if 'ALCARECO' in dataset:
                    continue
                outputLumis=0
                tries=0
                while outputLumis==0 and tries < 3:
                    outputLumis=dbsTest.getOutputLumis(dataset)
                    if outputLumis==0:
                        print "0 output lumis"
                        time.sleep(50)
                    tries=tries+1    

                if options.verbose:        
                    print "        input lumis:  " + str(inputLumis)
                    print "        output lumis: " + str(outputLumis)
                
                if outputLumis == inputLumis:
                    pass
                elif outputLumis < inputLumis :
                    closeOut = False
                elif outputLumis > inputLumis :
                    closeOut = False
                    tooMany = True
                    break

            if closeOut:
                closedOut.append(workflow)
            else:
                nonClosedOut.append(workflow)
            if tooMany:
                tooManyEventsOrLumis.append(workflow)
        else:
            print "neither RequestNumEvents nor InputDataset in Task1, exiting"
            sys.exit(1)
        

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in closedOut:
        jsn = getWorkflowJson(url, workflow)
        if jsn['RequestStatus'] == 'completed':
            if not options.test:
                closeOutTaskChainWorkflows(url, workflow)
        else:
            pass
        status = jsn['RequestStatus']
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        jsn = getWorkflowJson(url, workflow)
        status = jsn['RequestStatus']
        print "%90s\tNO\t\t%s" % (workflow, status)

    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEventsOrLumis:
        print "WARNING: One of the datasets produced by the workflow "+workflow+" contains MORE events or lumis than expected. Rerun with the --verbose flag to get more information."
    f.close
    sys.exit(0)
Exemple #7
0
def getFinalRequestedNumEvents(url, workflow):
	outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
	obtainedEvents=dbsTest.getOutputEvents(url, workflow, outputDataSets[0])
	requestedEvents=dbsTest.getInputEvents(url, workflow)
	return (requestedEvents-obtainedEvents)
Exemple #8
0
def main():
    url = 'cmsweb.cern.ch'
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    tooManyEvents = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        closeOut = True
        tooMany = False
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents != 0:
                if outputEvents == inputEvents:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                elif outputEvents < inputEvents:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status"
                    closeOut = False
                elif outputEvents > inputEvents:
                    closeOut = False
                    tooMany = True
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)
        if tooMany:
            tooManyEvents.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        if status == 'completed':
            closeOutTaskChainWorkflows(url, workflow)
        else:
            pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)

    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEvents:
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents != 0:
                if outputEvents > inputEvents:
                    print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str(
                        inputEvents) + " events were expected and " + str(
                            outputEvents) + " were found."
    f.close
    sys.exit(0)