Ejemplo n.º 1
0
def main():
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, 'r')

    url = 'cmsweb.cern.ch'
    for line in f:
        workflow = line.rstrip('\n')
        outputDataSets = phedexSubscription.outputdatasetsWorkflow(
            url, workflow)
        #    print "These are the output datasets:"
        #    print outputDataSets
        #inputEvents=getInputEvents(url, workflow)
        #print inputEvents
        for dataset in outputDataSets:
            outputEvents = dbsTest.getEventCountDataSet(
                "https://cmsweb.cern.ch", dataset)
            #        print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
            print dataset + "\t\t" + str(outputEvents)

    f.close
    sys.exit(0)
def getDbsProgress(options,wfs_dict,wfs_dict_skipped):
	print cya+"Getting progress from dbs..."+dfa
	url = "cmsweb.cern.ch"
	for wf in wfs_dict.keys():
		wfs_dict[wf]['dbsProgress'] = []
		try:
			outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(url, wf)
		except:
			print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa)
			wfs_dict_skipped[wf] = wfs_dict[wf]
                        del wfs_dict[wf]
			continue
		try:
			inputEvents = dbsTest.getInputEvents(url, wf)
		except:
			print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa)
			wfs_dict_skipped[wf] = wfs_dict[wf]
			del wfs_dict[wf]
			continue
		for dataset in outputDataSets:
			outputEvents = dbsTest.getEventCountDataSet(dataset)
			wfs_dict[wf]['dbsProgress'].append({"dataset":dataset,"progress":str(outputEvents/float(inputEvents)*100)})
	if options.verbose:
		print cya+"Added dbs progress info to workflow dictionary."+dfa
        appendFile(log_cmst1,"== "+str(datetime.datetime.now())+" == progress queried from dbs ==\n")
	return wfs_dict,wfs_dict_skipped
def classifyRequests(url, requests):
	print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    	print '| Request                                                       |req Type |Status Req | Dataset             |Status Dataset | Percentage|FilterEfficiency| ' 
   	print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
	classifiedRequests={}
	for request in requests:
		if 'type' in request:
			name=request['request_name']
			if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo':
				datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name)
				problem=False
				percentage=0
				if len(datasetWorkflow)<1:
					continue
				dataset=datasetWorkflow[0]
				inputEvents=0.0001
				inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name))
				outputEvents=dbsTest.getEventCountDataSet(dataset)
				percentage=outputEvents/float(inputEvents)
				duplicate=duplicateLumi(dataset)
				problem=False
				if duplicate:
					problem=True
				if problem:
					FilterEfficiency=getFilterEfficiency(url, name)
					datasetStatus=getDatasetStatus(dataset)
					print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency)

	print '---------------------------------------------------------------------------------------------------------------------------'
Ejemplo n.º 4
0
def classifyRequests(url, requests):
    print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                       |req Type |Status Req | Dataset             |Status Dataset | Percentage|FilterEfficiency| '
    print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
    classifiedRequests = {}
    for request in requests:
        if 'type' in request:
            name = request['request_name']
            if request['type'] == 'MonteCarloFromGEN' or request[
                    'type'] == 'MonteCarlo':
                datasetWorkflow = phedexSubscription.outputdatasetsWorkflow(
                    url, name)
                problem = False
                percentage = 0
                if len(datasetWorkflow) < 1:
                    continue
                dataset = datasetWorkflow[0]
                inputEvents = 0.0001
                inputEvents = inputEvents + int(
                    dbsTest.getInputEvents(url, name))
                outputEvents = dbsTest.getEventCountDataSet(dataset)
                percentage = outputEvents / float(inputEvents)
                duplicate = duplicateLumi(dataset)
                problem = False
                if duplicate:
                    problem = True
                if problem:
                    FilterEfficiency = getFilterEfficiency(url, name)
                    datasetStatus = getDatasetStatus(dataset)
                    print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (
                        name, request['type'], request['status'], dataset,
                        datasetStatus, str(percentage * 100), FilterEfficiency)

    print '---------------------------------------------------------------------------------------------------------------------------'
def testEventCountWorkflow(url, workflow):
	inputEvents=0
	inputEvents=inputEvents+dbsTest.getInputEvents(url, workflow)
	datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
	for dataset in datasets:
		outputEvents=dbsTest.getEventCountDataSet(dataset)
		percentage=outputEvents/float(inputEvents)
		if float(percentage)>float(1):
			print "Workflow: " + workflow+" duplicate events in outputdataset: "+dataset +" percentage: "+str(outputEvents/float(inputEvents)*100) +"%"
	return 1
Ejemplo n.º 6
0
def testEventCountWorkflow(url, workflow):
    inputEvents = 0
    inputEvents = inputEvents + dbsTest.getInputEvents(url, workflow)
    datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow)
    for dataset in datasets:
        outputEvents = dbsTest.getEventCountDataSet(dataset)
        percentage = outputEvents / float(inputEvents)
        if float(percentage) > float(1):
            print "Workflow: " + workflow + " duplicate events in outputdataset: " + dataset + " percentage: " + str(
                outputEvents / float(inputEvents) * 100) + "%"
    return 1
Ejemplo n.º 7
0
def main():
    parser = optparse.OptionParser()
    parser.add_option("--correct_env", action="store_true", dest="correct_env")
    (options, args) = parser.parse_args()

    command = ""
    for arg in sys.argv:
        command = command + arg + " "

    if not options.correct_env:
        os.system(
            "source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 "
            + command
            + "--correct_env"
        )
        sys.exit(0)

    # args=sys.argv[1:]
    if not len(args) == 1:
        print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile = args[0]
    f = open(inputFile, "r")

    url = "cmsweb.cern.ch"
    for line in f:
        workflow = line.rstrip("\n")
        outputDataSets = phedexSubscription.outputdatasetsWorkflow(url, workflow)
        # print "These are the output datasets:"
        # print outputDataSets
        # inputEvents=getInputEvents(url, workflow)
        # print inputEvents
        for dataset in outputDataSets:
            outputEvents = dbsTest.getEventCountDataSet("https://cmsweb.cern.ch", dataset)
            #        print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
            print dataset + "\t\t" + str(outputEvents)

    f.close
    sys.exit(0)
def main():
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)
    inputFile=args[0]
    f = open(inputFile, 'r')

    url='cmsweb.cern.ch'
    for line in f:
        workflow = line.rstrip('\n')
        outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow)
    #    print "These are the output datasets:"
    #    print outputDataSets
        #inputEvents=getInputEvents(url, workflow)
        #print inputEvents
        for dataset in outputDataSets:
            outputEvents=dbsTest.getEventCountDataSet("https://cmsweb.cern.ch",dataset)
    #        print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%"
            print dataset+"\t\t"+str(outputEvents)

    f.close
    sys.exit(0);
Ejemplo n.º 9
0
def getDbsProgress(options, wfs_dict, wfs_dict_skipped):
    print cya + "Getting progress from dbs..." + dfa
    url = "cmsweb.cern.ch"
    for wf in wfs_dict.keys():
        wfs_dict[wf]['dbsProgress'] = []
        try:
            outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(
                url, wf)
        except:
            print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (
                red, wf, dfa)
            wfs_dict_skipped[wf] = wfs_dict[wf]
            del wfs_dict[wf]
            continue
        try:
            inputEvents = dbsTest.getInputEvents(url, wf)
        except:
            print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (
                red, wf, dfa)
            wfs_dict_skipped[wf] = wfs_dict[wf]
            del wfs_dict[wf]
            continue
        for dataset in outputDataSets:
            outputEvents = dbsTest.getEventCountDataSet(dataset)
            wfs_dict[wf]['dbsProgress'].append({
                "dataset":
                dataset,
                "progress":
                str(outputEvents / float(inputEvents) * 100)
            })
    if options.verbose:
        print cya + "Added dbs progress info to workflow dictionary." + dfa
    appendFile(
        log_cmst1, "== " + str(datetime.datetime.now()) +
        " == progress queried from dbs ==\n")
    return wfs_dict, wfs_dict_skipped