def main(): args = sys.argv[1:] if not len(args) == 1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, 'r') url = 'cmsweb.cern.ch' for line in f: workflow = line.rstrip('\n') outputDataSets = phedexSubscription.outputdatasetsWorkflow( url, workflow) # print "These are the output datasets:" # print outputDataSets #inputEvents=getInputEvents(url, workflow) #print inputEvents for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet( "https://cmsweb.cern.ch", dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset + "\t\t" + str(outputEvents) f.close sys.exit(0)
def getDbsProgress(options,wfs_dict,wfs_dict_skipped): print cya+"Getting progress from dbs..."+dfa url = "cmsweb.cern.ch" for wf in wfs_dict.keys(): wfs_dict[wf]['dbsProgress'] = [] try: outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue try: inputEvents = dbsTest.getInputEvents(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % (red,wf,dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet(dataset) wfs_dict[wf]['dbsProgress'].append({"dataset":dataset,"progress":str(outputEvents/float(inputEvents)*100)}) if options.verbose: print cya+"Added dbs progress info to workflow dictionary."+dfa appendFile(log_cmst1,"== "+str(datetime.datetime.now())+" == progress queried from dbs ==\n") return wfs_dict,wfs_dict_skipped
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests={} for request in requests: if 'type' in request: name=request['request_name'] if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo': datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name) problem=False percentage=0 if len(datasetWorkflow)<1: continue dataset=datasetWorkflow[0] inputEvents=0.0001 inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name)) outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) duplicate=duplicateLumi(dataset) problem=False if duplicate: problem=True if problem: FilterEfficiency=getFilterEfficiency(url, name) datasetStatus=getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def classifyRequests(url, requests): print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| ' print '-----------------------------------------------------------------------------------------------------------------------------------------------------------' classifiedRequests = {} for request in requests: if 'type' in request: name = request['request_name'] if request['type'] == 'MonteCarloFromGEN' or request[ 'type'] == 'MonteCarlo': datasetWorkflow = phedexSubscription.outputdatasetsWorkflow( url, name) problem = False percentage = 0 if len(datasetWorkflow) < 1: continue dataset = datasetWorkflow[0] inputEvents = 0.0001 inputEvents = inputEvents + int( dbsTest.getInputEvents(url, name)) outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) duplicate = duplicateLumi(dataset) problem = False if duplicate: problem = True if problem: FilterEfficiency = getFilterEfficiency(url, name) datasetStatus = getDatasetStatus(dataset) print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % ( name, request['type'], request['status'], dataset, datasetStatus, str(percentage * 100), FilterEfficiency) print '---------------------------------------------------------------------------------------------------------------------------'
def testEventCountWorkflow(url, workflow): inputEvents=0 inputEvents=inputEvents+dbsTest.getInputEvents(url, workflow) datasets=phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents=dbsTest.getEventCountDataSet(dataset) percentage=outputEvents/float(inputEvents) if float(percentage)>float(1): print "Workflow: " + workflow+" duplicate events in outputdataset: "+dataset +" percentage: "+str(outputEvents/float(inputEvents)*100) +"%" return 1
def testEventCountWorkflow(url, workflow): inputEvents = 0 inputEvents = inputEvents + dbsTest.getInputEvents(url, workflow) datasets = phedexSubscription.outputdatasetsWorkflow(url, workflow) for dataset in datasets: outputEvents = dbsTest.getEventCountDataSet(dataset) percentage = outputEvents / float(inputEvents) if float(percentage) > float(1): print "Workflow: " + workflow + " duplicate events in outputdataset: " + dataset + " percentage: " + str( outputEvents / float(inputEvents) * 100) + "%" return 1
def main(): parser = optparse.OptionParser() parser.add_option("--correct_env", action="store_true", dest="correct_env") (options, args) = parser.parse_args() command = "" for arg in sys.argv: command = command + arg + " " if not options.correct_env: os.system( "source /afs/cern.ch/project/gd/LCG-share/current_3.2/etc/profile.d/grid-env.sh; python2.6 " + command + "--correct_env" ) sys.exit(0) # args=sys.argv[1:] if not len(args) == 1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile = args[0] f = open(inputFile, "r") url = "cmsweb.cern.ch" for line in f: workflow = line.rstrip("\n") outputDataSets = phedexSubscription.outputdatasetsWorkflow(url, workflow) # print "These are the output datasets:" # print outputDataSets # inputEvents=getInputEvents(url, workflow) # print inputEvents for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet("https://cmsweb.cern.ch", dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset + "\t\t" + str(outputEvents) f.close sys.exit(0)
def main(): args=sys.argv[1:] if not len(args)==1: print "usage: python getRelValDsetNames.py <inputFile_containing_a_list_of_workflows>" sys.exit(0) inputFile=args[0] f = open(inputFile, 'r') url='cmsweb.cern.ch' for line in f: workflow = line.rstrip('\n') outputDataSets=phedexSubscription.outputdatasetsWorkflow(url, workflow) # print "These are the output datasets:" # print outputDataSets #inputEvents=getInputEvents(url, workflow) #print inputEvents for dataset in outputDataSets: outputEvents=dbsTest.getEventCountDataSet("https://cmsweb.cern.ch",dataset) # print dataset+" match: "+str(outputEvents/float(inputEvents)*100) +"%" print dataset+"\t\t"+str(outputEvents) f.close sys.exit(0);
def getDbsProgress(options, wfs_dict, wfs_dict_skipped): print cya + "Getting progress from dbs..." + dfa url = "cmsweb.cern.ch" for wf in wfs_dict.keys(): wfs_dict[wf]['dbsProgress'] = [] try: outputDataSets = dbsTest.phedexSubscription.outputdatasetsWorkflow( url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % ( red, wf, dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue try: inputEvents = dbsTest.getInputEvents(url, wf) except: print "\t%s'%s' cannot be looked up in dbs, skipping.%s" % ( red, wf, dfa) wfs_dict_skipped[wf] = wfs_dict[wf] del wfs_dict[wf] continue for dataset in outputDataSets: outputEvents = dbsTest.getEventCountDataSet(dataset) wfs_dict[wf]['dbsProgress'].append({ "dataset": dataset, "progress": str(outputEvents / float(inputEvents) * 100) }) if options.verbose: print cya + "Added dbs progress info to workflow dictionary." + dfa appendFile( log_cmst1, "== " + str(datetime.datetime.now()) + " == progress queried from dbs ==\n") return wfs_dict, wfs_dict_skipped