Beispiel #1
0
def classifyRequests(url, requests, historic, noNameSites, requestType):
    for request in requests:
        name = request['request_name']
        status = 'NoStatus'
        reqType = 'NoType'
        if 'type' in request.keys():
            reqType = request['type']
        if reqType != requestType:
            continue
        if 'status' in request.keys():
            status = request['status']
        if status in noNameSites.keys():
            namefound = 0
            for Site in historic.keys():
                if name.find(Site) >= 0:
                    namefound = 1
                    for stat in historic[Site].keys(
                    ):  #stat is the status of the request in the list of requests
                        if status == stat:
                            EffectiveLumi = getEffectiveLumiSections(
                                url, name, requestType)
                            if EffectiveLumi <= 0:
                                EffectiveLumi = 0.0000001
                            TimeEvent = getTimeEventRequest(url, name)
                            priority = getPriorityWorkflow(url, name)
                            numevents = dbsTest.getInputEvents(url, name)
                            checkLumi = False
                            if float(numevents / EffectiveLumi) > 400:
                                checkLumi = True
                            else:
                                checkLumi = False
                            maxEvents = maxEventsFileDataset(url, name)
                            historic[Site][stat].append(
                                (name, priority, numevents, TimeEvent,
                                 EffectiveLumi, checkLumi, maxEvents))
            if namefound == 0:
                for stat in noNameSites.keys():
                    if status == stat:
                        EffectiveLumi = getEffectiveLumiSections(
                            url, name, requestType)
                        if EffectiveLumi <= 0:
                            EffectiveLumi = 0.0000001
                        TimeEvent = getTimeEventRequest(url, name)
                        priority = getPriorityWorkflow(url, name)
                        numevents = dbsTest.getInputEvents(url, name)
                        checkLumi = False
                        if float(numevents / EffectiveLumi) > 400:
                            checkLumi = True
                        else:
                            checkLumi = False
                        maxEvents = maxEventsFileDataset(url, name)
                        noNameSites[stat].append(
                            (name, priority, numevents, TimeEvent,
                             EffectiveLumi, checkLumi, maxEvents))
Beispiel #2
0
def classifyRequests(url, requests):
    print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                       |req Type |Status Req | Dataset             |Status Dataset | Percentage|FilterEfficiency| '
    print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    classifiedRequests = {}
    for request in requests:
        if 'type' in request:
            name = request['request_name']
            if request['type'] == 'MonteCarloFromGEN' or request[
                    'type'] == 'MonteCarlo':
                datasetWorkflow = phedexSubscription.outputdatasetsWorkflow(
                    url, name)
                problem = False
                percentage = 0
                if len(datasetWorkflow) < 1:
                    continue
                dataset = datasetWorkflow[0]
                inputEvents = 0.0001
                inputEvents = inputEvents + int(
                    dbsTest.getInputEvents(url, name))
                outputEvents = dbsTest.getEventCountDataSet(dataset)
                percentage = outputEvents / float(inputEvents)
                duplicate = duplicateLumi(dataset)
                problem = False
                if duplicate:
                    problem = True
                if problem:
                    FilterEfficiency = getFilterEfficiency(url, name)
                    datasetStatus = getDatasetStatus(dataset)
                    print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (
                        name, request['type'], request['status'], dataset,
                        datasetStatus, str(percentage * 100), FilterEfficiency)

    print '---------------------------------------------------------------------------------------------------------------------------'
def getDbsProgress(options,wfs_dict,wfs_dict_skipped):
	print cya+"Getting progress from dbs..."+dfa
	url = "cmsweb.cern.ch"
	for wf in wfs_dict.keys():
		wfs_dict[wf]['dbsProgress'] = []
		try:
			outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(url, wf)
		except:
			print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa)
			wfs_dict_skipped[wf] = wfs_dict[wf]
                        del wfs_dict[wf]
			continue
		try:
			inputEvents = dbsTest.getInputEvents(url, wf)
		except:
			print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa)
			wfs_dict_skipped[wf] = wfs_dict[wf]
			del wfs_dict[wf]
			continue
		for dataset in outputDataSets:
			outputEvents = dbsTest.getEventCountDataSet(dataset)
			wfs_dict[wf]['dbsProgress'].append({"dataset":dataset,"progress":str(outputEvents/float(inputEvents)*100)})
	if options.verbose:
		print cya+"Added dbs progress info to workflow dictionary."+dfa
        appendFile(log_cmst1,"== "+str(datetime.datetime.now())+" == progress queried from dbs ==\n")
	return wfs_dict,wfs_dict_skipped
Beispiel #4
0
def retrieveSchema(url, workflowName, user, group ):
    specURL = os.path.join(reqmgrCouchURL, workflowName, "spec")
    helper = WMWorkloadHelper()
    #print "  retrieving original workflow...",
    helper.load(specURL)
    #print "done."
    schema = {}
    #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems():
    for (key, value) in helper.data.request.schema.dictionary_().iteritems():
        #print key, value
        if key == 'ProcConfigCacheID':
            schema['ConfigCacheID'] = value
	elif key=='RequestSizeEvents':
	    schema['RequestSizeEvents'] = value
	    #schema['RequestNumEvents'] = int(value)
	elif key=='Requestor':
	    schema['Requestor']=user
	elif key=='Group':
	    schema['Group']=group
	elif key=='RequestNumEvents':
  	    schema['RequestNumEvents']=getFinalRequestedNumEvents(url, workflowName)
        elif key=='FirstLumi':
	   schema['FirstLumi']=getMaxLumi(url, workflowName)*2
        elif key=='FirstEvent':
	   schema['FirstEvent']=dbsTest.getInputEvents(url, workflowName)*2
	elif key=='RequestString':
	   schema['RequestString']='ACDC_'+value
	elif value != None:
            schema[key] = value
    return schema
def classifyRequests(url, requests):
	print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    	print '| Request                                                       |req Type |Status Req | Dataset             |Status Dataset | Percentage|FilterEfficiency| ' 
   	print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
	classifiedRequests={}
	for request in requests:
		if 'type' in request:
			name=request['request_name']
			if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo':
				datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name)
				problem=False
				percentage=0
				if len(datasetWorkflow)<1:
					continue
				dataset=datasetWorkflow[0]
				inputEvents=0.0001
				inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name))
				outputEvents=dbsTest.getEventCountDataSet(dataset)
				percentage=outputEvents/float(inputEvents)
				duplicate=duplicateLumi(dataset)
				problem=False
				if duplicate:
					problem=True
				if problem:
					FilterEfficiency=getFilterEfficiency(url, name)
					datasetStatus=getDatasetStatus(dataset)
					print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency)

	print '---------------------------------------------------------------------------------------------------------------------------'
Beispiel #6
0
def main():
    url = 'cmsweb.cern.ch'
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        closeOut = True
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            print "dataset = " + dataset
            print "inputEvents = " + str(inputEvents)
            print "outputEvents = " + str(outputEvents)

            if inputEvents != 0:
                if (outputEvents / float(inputEvents) * 100) >= 100.0:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                else:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status"
                    closeOut = False
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        #if status == 'completed':
        #    closeOutTaskChainWorkflows(url, workflow)
        #else:
        #    pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)
    f.close
    print '-------------------------------------------------------------------------------------------------------------------------------------'
    sys.exit(0)
def classifyRequests(url, requests, historic, noNameSites, requestType):
	for request in requests:
	    name=request['request_name']
	    status='NoStatus'
	    reqType='NoType'
	    if 'type' in request.keys():
		reqType=request['type']
	    if reqType!=requestType:
		continue
	    if 'status' in request.keys():
			status=request['status']
            if status in noNameSites.keys():
		namefound=0
		for Site in historic.keys():
 		    if name.find(Site)>=0:
			namefound=1
			for stat in historic[Site].keys():#stat is the status of the request in the list of requests
				if status==stat:
					EffectiveLumi=getEffectiveLumiSections(url, name, requestType)
					if EffectiveLumi <=0:
						EffectiveLumi=0.0000001
					TimeEvent=getTimeEventRequest(url, name)
					priority=getPriorityWorkflow(url, name)
					numevents=dbsTest.getInputEvents(url, name)
					checkLumi=False
					if float(numevents/EffectiveLumi)>400:
						checkLumi=True
					else:
						checkLumi=False
					maxEvents=maxEventsFileDataset(url, name)
					historic[Site][stat].append((name,priority,numevents, TimeEvent,EffectiveLumi, checkLumi, maxEvents))
		if namefound==0:
			for stat in noNameSites.keys():
				if status==stat:
					EffectiveLumi=getEffectiveLumiSections(url, name, requestType)
					if EffectiveLumi <=0:
						EffectiveLumi=0.0000001
					TimeEvent=getTimeEventRequest(url, name)
					priority=getPriorityWorkflow(url, name)
					numevents=dbsTest.getInputEvents(url, name)
					checkLumi=False
					if float(numevents/EffectiveLumi)>400:
						checkLumi=True
					else:
						checkLumi=False
					maxEvents=maxEventsFileDataset(url, name)
					noNameSites[stat].append((name,priority,numevents, TimeEvent,EffectiveLumi, checkLumi, maxEvents))
def main():
    url='cmsweb.cern.ch'
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile=args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        closeOut = True
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            print "dataset = " + dataset
            print "inputEvents = " + str(inputEvents)
            print "outputEvents = " + str(outputEvents)
                                         
            if inputEvents!=0:
                if (outputEvents/float(inputEvents)*100) >= 100.0:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                else:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status" 
                    closeOut = False
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        #if status == 'completed':
        #    closeOutTaskChainWorkflows(url, workflow)
        #else:
        #    pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)
    f.close
    print '-------------------------------------------------------------------------------------------------------------------------------------'
    sys.exit(0)
Beispiel #9
0
def PercentageCompletion(url, workflow, dataset):
    inputEvents = 0
    inputEvents = inputEvents + int(dbsTest.getInputEvents(url, workflow))
    outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
    if inputEvents == 0:
        return 0
    percentage = outputEvents / float(inputEvents)
    return percentage
def PercentageCompletion(url, workflow, dataset):
	inputEvents=0
	inputEvents=inputEvents+int(dbsTest.getInputEvents(url, workflow))
	outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
	if inputEvents==0:
		return 0
	percentage=outputEvents/float(inputEvents)
	return percentage
def testEventCountWorkflow(url, workflow):
	inputEvents=0
	inputEvents=inputEvents+dbsTest.getInputEvents(url, workflow)
	datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
	for dataset in datasets:
		outputEvents=dbsTest.getEventCountDataSet(dataset)
		percentage=outputEvents/float(inputEvents)
		if float(percentage)>float(1):
			print "Workflow: " + workflow+" duplicate events in outputdataset: "+dataset +" percentage: "+str(outputEvents/float(inputEvents)*100) +"%"
	return 1
def testEventCountWorkflow(url, workflow):
    inputEvents = 0
    inputEvents = inputEvents + dbsTest.getInputEvents(url, workflow)
    datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow)
    for dataset in datasets:
        outputEvents = dbsTest.getEventCountDataSet(dataset)
        percentage = outputEvents / float(inputEvents)
        if float(percentage) > float(1):
            print "Workflow: " + workflow + " duplicate events in outputdataset: " + dataset + " percentage: " + str(
                outputEvents / float(inputEvents) * 100) + "%"
    return 1
def getDbsProgress(options, wfs_dict, wfs_dict_skipped):
    print cya + "Getting progress from dbs..." + dfa
    url = "cmsweb.cern.ch"
    for wf in wfs_dict.keys():
        wfs_dict[wf]['dbsProgress'] = []
        try:
            outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(
                url, wf)
        except:
            print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (
                red, wf, dfa)
            wfs_dict_skipped[wf] = wfs_dict[wf]
            del wfs_dict[wf]
            continue
        try:
            inputEvents = dbsTest.getInputEvents(url, wf)
        except:
            print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (
                red, wf, dfa)
            wfs_dict_skipped[wf] = wfs_dict[wf]
            del wfs_dict[wf]
            continue
        for dataset in outputDataSets:
            outputEvents = dbsTest.getEventCountDataSet(dataset)
            wfs_dict[wf]['dbsProgress'].append({
                "dataset":
                dataset,
                "progress":
                str(outputEvents / float(inputEvents) * 100)
            })
    if options.verbose:
        print cya + "Added dbs progress info to workflow dictionary." + dfa
    appendFile(
        log_cmst1, "== " + str(datetime.datetime.now()) +
        " == progress queried from dbs ==\n")
    return wfs_dict, wfs_dict_skipped
def main():
    url='cmsweb.cern.ch'
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile=args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    tooManyEvents = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        closeOut = True
        tooMany = False
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents!=0:
                if outputEvents == inputEvents:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                elif outputEvents < inputEvents :
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status" 
                    closeOut = False
                elif outputEvents > inputEvents :
                    closeOut = False
                    tooMany = True
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)
        if tooMany:
            tooManyEvents.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        if status == 'completed':
            closeOutTaskChainWorkflows(url, workflow)
        else:
            pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)

    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEvents:
        outputDatasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
        inputEvents=dbsTest.getInputEvents(url, workflow)
        for dataset in outputDatasets:
        # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents=dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents!=0:
                if outputEvents > inputEvents:
                    print "WARNING about workflow " + workflow  + ": The dataset " + dataset + " contains MORE events than expected. " + str(inputEvents) + " events were expected and " + str(outputEvents) + " were found."
    f.close
    sys.exit(0)
Beispiel #15
0
def getFinalRequestedNumEvents(url, workflow):
	outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
	obtainedEvents=dbsTest.getOutputEvents(url, workflow, outputDataSets[0])
	requestedEvents=dbsTest.getInputEvents(url, workflow)
	return (requestedEvents-obtainedEvents)
Beispiel #16
0
def main():
    url = 'cmsweb.cern.ch'
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python closeOutTaskChainWorkflows.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, 'r')

    print '-------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                   | Closed-out? | Current status          |'
    print '-------------------------------------------------------------------------------------------------------------------------------------'

    closedOut = []
    nonClosedOut = []
    tooManyEvents = []
    running = []
    for line in f:
        workflow = line.rstrip('\n')
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        closeOut = True
        tooMany = False
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents != 0:
                if outputEvents == inputEvents:
                    pass
                    #print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
                elif outputEvents < inputEvents:
                    #print dataset + " it is less than 99.9999% completed, keeping it in the current status"
                    closeOut = False
                elif outputEvents > inputEvents:
                    closeOut = False
                    tooMany = True
                    break
            else:
                print "Input Events 0"

        if closeOut:
            closedOut.append(workflow)
        else:
            nonClosedOut.append(workflow)
        if tooMany:
            tooManyEvents.append(workflow)

    for workflow in closedOut:
        status = getStatus(url, workflow)
        if status == 'completed':
            closeOutTaskChainWorkflows(url, workflow)
        else:
            pass
        print "%90s\tYES\t\t%s" % (workflow, status)
    for workflow in nonClosedOut:
        status = getStatus(url, workflow)
        print "%90s\tNO\t\t%s" % (workflow, status)

    print '-------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEvents:
        outputDatasets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        inputEvents = dbsTest.getInputEvents(url, workflow)
        for dataset in outputDatasets:
            # we cannot calculate completion of ALCARECO samples
            if 'ALCARECO' in dataset:
                continue
            outputEvents = dbsTest.getOutputEvents(url, workflow, dataset)
            if inputEvents != 0:
                if outputEvents > inputEvents:
                    print "WARNING about workflow " + workflow + ": The dataset " + dataset + " contains MORE events than expected. " + str(
                        inputEvents) + " events were expected and " + str(
                            outputEvents) + " were found."
    f.close
    sys.exit(0)