logProc.writeAck(expected - 1, 'sent') else: temp = packet(seq, None) recvSocket.sendto(pickle.dumps(temp), senderAddr) logProc.writeAck(seq, 'sent') else: temp_buffer.append(pkt) temp = packet(expected - 1, None) recvSocket.sendto(pickle.dumps(temp), senderAddr) logProc.writeAck(expected - 1, 'sent') if count == length: break logProc.writeEnd(throughput) recvSocket.close() if __name__ == '__main__': try: throughput = 0.0 expected = 0 dstFile = None temp_buffer = list() logProc = logHandler() fileReceiver() except Exception: err = traceback.format_exc() ErrorLog(str(err))
# pat to remove id = [] storagePath = [] dbsPublish = [] CffFilePath = [] # toptree to remove idTop = [] storagePathTop = [] mergedTopLocation = [] # logging log = logHandler("") # update grid-proxy for srm commands crab = CRABHandler("","",log) crab.checkGridProxy(False) #rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE") #sys.exit(1) #### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees
optParser.add_option("","--log-stdout", action="store_true", dest="stdout",default=bool(False), help="Write output to stdout and not to logs/log-*.txt", metavar="") (options, args) = optParser.parse_args() #if options.cmssw_ver == None: # optParser.error("Please specify a CMSSW version.\n") ################# ## DEFINITIONS ## ################# if not options.stdout == True: log = logHandler("logs/SIMProductionWorkflow.txt") else: log = logHandler("") ################# ## MAIN METHOD ## ################# # Create our Queue to store requests: requestsPool = Queue.Queue ( 0 ) # check the # of workers to start nWorkers=getnWorkers()
# pat to remove id = [] storagePath = [] dbsPublish = [] CffFilePath = [] # toptree to remove idTop = [] storagePathTop = [] mergedTopLocation = [] # logging log = logHandler("") # update grid-proxy for srm commands crab = CRABHandler("", "", log) crab.checkGridProxy(False) #rmSRMdir("/pnfs/iihe/cms/store/user/dhondt/QCD_Pt-20to30_EMEnriched_TuneZ2_7TeV-pythia6/Spring11-PU_S1_START311_V1G1-v1/29032011_213110/TOPTREE") #sys.exit(1) #### Remove DataSet -> ALL associated PatTuples -> All associated TopTrees if not options.cleanup and not options.rmDataSet == "None":
# test suite # for getting stuff from fjr files # from fjrHandler import FJRHandler,GreenBoxHandler # from xml.sax import make_parser # from xml.sax.handler import ContentHandler # file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml" # parser = make_parser() # handler = FJRHandler() # parser.setContentHandler(handler) # parser.parse(open(file)) # print handler.getEventsProcessed() # print handler.getFrameworkExitCode().split("\n")[0] from CrabHandler import CRABHandler from logHandler import logHandler crab = CRABHandler( "1234567", "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/", logHandler(""), ) crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711" crab.checkFJR()
import unittest from logHandler import logHandler output_string = logHandler("input.log") class LogHandler(unittest.TestCase): def test_logHandler(self): file = open('output.log', 'r') actual_string = file.read() file.close() self.assertEqual(output_string, actual_string) if __name__ == '__main__': unittest.main()
from logHandler import logHandler from CrabHandler import CRABHandler crab = CRABHandler("", ".", logHandler("")) crab.createGridProxy() crab.createMyProxyCredentials() #print crab.pickProxy()
GENFASTSIM_CFFPath = "" GENFASTSIM_PublishName = "/a/b/c" GENFASTSIM_nEvents = "" GENFASTSIM_PNFSLocation = "" GENFASTSIM_jobEff = "" GENFASTSIM_LHEFiles = "" ################ ## LOG SYSTEM ## ################ ## provide the desired logfile name to logHandler ## if you provide an empty string the output will be written on the stdOut if not options.stdout: log = logHandler(logFileName) else: log = logHandler("") # store datasetname for error-messages log.setDataSet(options.publish) # if not doDry: # log.sendErrorMails=bool(True) # FIXME ################## ## MAIN ROUTINE ## ################## log.output("--------------------------------------------") log.output("--> Automated FAST SIMULATION production <--")
GENFASTSIM_CFFPath = "" GENFASTSIM_PublishName = "/a/b/c" GENFASTSIM_nEvents = "" GENFASTSIM_PNFSLocation = "" GENFASTSIM_jobEff = "" GENFASTSIM_LHEFiles = "" ################ ## LOG SYSTEM ## ################ ## provide the desired logfile name to logHandler ## if you provide an empty string the output will be written on the stdOut if not options.stdout: log = logHandler(logFileName) else: log = logHandler("") #store datasetname for error-messages log.setDataSet(options.publish) #if not doDry: # log.sendErrorMails=bool(True) # FIXME ################## ## MAIN ROUTINE ## ################## log.output("--------------------------------------------") log.output("--> Automated FAST SIMULATION production <--")
from logHandler import logHandler from CrabHandler import CRABHandler crab = CRABHandler("",".",logHandler("")) crab.createGridProxy() crab.createMyProxyCredentials() #print crab.pickProxy()
def process(self): # setup proper log location self.log = logHandler("logs/log-TopDB-CleaningAgent-"+str(self.ID)+".txt") self.log.output("****** Removing "+self.removeType+" with ID "+str(self.removeId)+" as requested by "+self.user+" ******") # pat to remove id = [] storagePath = [] dbsPublish = [] CffFilePath = [] # toptree to remove idTop = [] storagePathTop = [] mergedTopLocation = [] storagePathTopMail=[] ## REMOVE ONLY TOPTREE if self.removeType == "toptree": self.sql.createQuery("SELECT","toptrees","id,StoragePath,TopTreeLocation","id = '"+str(self.removeId)+"'") result = self.sql.execQuery().split('\n') if len(result) == 1: self.log.output(" ---> ERROR: TopTree was not found in TopDB") return 1 else: idTop.append(result[1].split("\t")[0]) storagePathTop.append(result[1].split("\t")[1]) mergedTopLocation.append(result[1].split("\t")[2]) ## REMOVE PAT + ALL DOWNSTREAM TOPTREES elif self.removeType == "patuple": self.sql.createQuery("SELECT","patuples","id,StoragePath,name,CffFilePath","id = '"+str(self.removeId)+"'") result = self.sql.execQuery().split('\n') if len(result) == 1: self.log.output(" ---> RRROR: PatTuple was not found in TopDB") return 1 else: id.append(result[1].split("\t")[0]) storagePath.append(result[1].split("\t")[1]) dbsPublish.append(result[1].split("\t")[2]) CffFilePath.append(result[1].split("\t")[3]) self.sql.createQuery("SELECT","toptrees","id,StoragePath,TopTreeLocation","patuple_id = '"+id[len(id)-1].split("\\n")[0]+"'") result2 = self.sql.execQuery().split('\n') if len(result2) > 1: for j in range(1,len(result2)-1): idTop.append(result2[j].split("\t")[0]) storagePathTop.append(result2[j].split("\t")[1]) mergedTopLocation.append(result2[j].split("\t")[2]) ## REMOVE DATASET + ALL DOWNSTREAM PAT + ALL DOWNSTREAM TOPTREES elif self.removeType == "dataset": self.sql.createQuery("SELECT","patuples","id,StoragePath,name,CffFilePath","dataset_id = '"+str(self.removeId)+"'") result = self.sql.execQuery().split('\n') if len(result) > 1: for i in range(1,len(result)-1): id.append(result[i].split("\t")[0]) storagePath.append(result[i].split("\t")[1]) dbsPublish.append(result[i].split("\t")[2]) CffFilePath.append(result[i].split("\t")[3]) self.sql.createQuery("SELECT","toptrees","id,StoragePath,TopTreeLocation","patuple_id = '"+id[len(id)-1].split("\\n")[0]+"'") result2 = self.sql.execQuery().split('\n') if len(result2) > 1: for j in range(1,len(result2)-1): idTop.append(result2[j].split("\t")[0]) storagePathTop.append(result2[j].split("\t")[1]) mergedTopLocation.append(result2[j].split("\t")[2]) ## CLEAN LEFTOVER FILES FROM FAILED PRODUCTION, CLEAN PATUPLES WITHOUT ANY TOPTREES elif self.removeType == "cleanpnfs": self.log.output("--> Cleaning up PNFS area for dhondt") self.log.output(" ---> Searching for PNFS directories from broken production") dirs = [] for dir in os.listdir("/pnfs/iihe/cms/store/user/dhondt/"): if not dir.rfind("Skimmed-TopTrees") == -1: continue; #if dir.rfind("7TeV_T2") == -1: continue # this is just to make testing go fast pExe = Popen("find /pnfs/iihe/cms/store/user/dhondt/"+dir+" -name TOPTREE", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) out = pExe.stdout.read() for file in out.split("\n"): split = file.split("/") dirName = "" for i in xrange(0,len(split)-1): dirName += split[i]+"/" dirName = dirName.rstrip("/") if dirs.count(dirName) == 0 and len(dirName) > 0: dirs.append(dirName+"/TOPTREE") self.log.output(" ----> "+str(len(dirs))+" directory(s) found in total, cross-referencing TopDB...") for i in xrange(0,len(dirs)): self.sql.createQuery("SELECT","toptrees","id","StoragePath REGEXP '"+dirs[i]+"'") result = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","patuples","id","StoragePath REGEXP '"+dirs[i]+"'") result2 = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","gensims","id","PNFSPath REGEXP '"+dirs[i]+"'") result3 = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","recos","id","PNFSPath REGEXP '"+dirs[i]+"'") result4 = self.sql.execQuery().split('\n') if len(result) < 2 and len(result2) < 2 and len(result3) < 2 and len(result4) < 2 and storagePathTopMail.count(dirs[i]) == 0: filestat = os.stat(dirs[i]) filedate = filestat.st_mtime now = int(time.time()) last_mod=int(filedate) time_diff=now-last_mod if time_diff/(60*60) > 720: # just want the dir to be old enough to not remove ongoing prod self.log.output(" ----> Directory "+dirs[i]+" is not in TopDB, it should be removed! (Age: "+str(time_diff/(60*60*24))+" days)") #idTop.append(-9999) storagePathTopMail.append(dirs[i]) self.log.output(" ----> "+str(len(storagePathTopMail))+" directory(s) need removal!") self.log.output(" ---> Searching for PATuples that don't have a TopTree assigned") self.sql.createQuery("SELECT","patuples","id,StoragePath,name,CffFilePath","") result2 = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","toptrees","patuple_id","") result3 = self.sql.execQuery().split('\n') for i in result2: if i == "" or not i.rfind("id") == -1: continue tmpid = i.split("\t")[0] found=bool(False) for j in result3: if j == "": continue if tmpid == j: found=bool(True) #if not found: #id.append(i.split("\t")[0]) #storagePath.append(i.split("\t")[1]) #dbsPublish.append(i.split("\t")[2]) #CffFilePath.append(i.split("\t")[3]) msg = "Dear admins," if len(storagePathTopMail) > 0: msg += "\n\n The automatic TopDB PNFS cleaning tool has found "+str(len(storagePathTopMail))+" directories on PNFS not corresponding to any entry in the TopDB database." msg += "\n\n Please have a look at the following list:" for s in storagePathTopMail: msg += "\n\n \t rm -rfv "+s else: msg += "\n\n The automatic TopDB PNFS cleaning tool has found NO directories on PNFS not corresponding to any entry in the TopDB database." msg += "\n\nCheers,\nHector the cleaning agent" mail = MailHandler() mail.sendMail("error","Report from TopDB PNFS cleaning",msg) ## CLEAN LEFTOVER FILES FROM FAILED PRODUCTION, CLEAN PATUPLES WITHOUT ANY TOPTREES elif self.removeType == "cleancrablogs": days=50 self.log.output(" ---> Listing Configuration directories") self.log.output(" ---> Checking every Configuration directory (older than "+str(days)+" days) for large amounts of *.stdout from CRAB") ldirs = [] cleanup_ldirsToRemove = [] basedir=(Popen("echo $HOME", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True).stdout.read()).strip()+"/AutoMaticTopTreeProducer/" for dir in os.listdir(basedir): if dir.rfind("CMSSW_") == -1: continue; pExe = Popen("find "+basedir+dir.strip()+"/ -name crab_*.cfg", shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) out = pExe.stdout.read() for file in out.split("\n"): split = file.split("/") dirName = "" for i in xrange(0,len(split)-1): dirName += split[i]+"/" dirName = dirName.rstrip("/") if ldirs.count(dirName) == 0 and len(dirName) > 0: ldirs.append(dirName.split("/AutoMaticTopTreeProducer/")[1]) # becase we don't want it to crash on changes /home /user # time to clean out some big chunks of stdout files if not dirName == "": if not dirName.find("find: ") == -1: dirName = dirName.split("find: ")[1] #print dirName filestat = os.stat(dirName) filedate = filestat.st_mtime now = int(time.time()) last_mod=int(filedate) time_diff=now-last_mod if time_diff/(60*60*24) > days: #self.log.output(" ---> Cleaning CRAB stdout files in "+dirName+" (Age: "+str(time_diff/(3600*24))+" days)") crabdir="" for dir in os.listdir(dirName): if not dir.rfind("TOPTREE_") == -1 and dir.rfind(".py") == -1 and os.path.isdir(dirName+"/"+dir): crabdir=dirName+"/"+dir if not crabdir == "": numfiles=int(0) keepstdout="" keepstderr="" keepxml="" if os.path.exists(crabdir+"/log/crab.log"): self.log.output(" ---> Cleaning crab.log in "+crabdir+"/log/ (Age: "+str(time_diff/(3600*24))+" days)") os.unlink(crabdir+"/log/crab.log") #sys.exit(1) for file in os.listdir(crabdir+"/res"): if not file.rfind(".stdout") == -1: if os.path.getsize(crabdir+"/res/"+file) > 0 and keepstdout == "": keepstdout=file numfiles=numfiles+1 #print keepstdout if not os.path.isdir(crabdir+"/res"): numfiles=0 #print numfiles #print str(numfiles)+" "+dirName if numfiles > 2 and dirName.rfind("Run201") == -1: print numfiles self.log.output(" ---> Cleaning CRAB stdout files in "+crabdir+" (Age: "+str(time_diff/(3600*24))+" days)") keepstderr=keepstdout.split(".stdout")[0]+".stderr" keepxml="crab_fjr_"+(keepstdout.split(".stdout")[0]).split("CMSSW_")[1]+".xml" for file in os.listdir(crabdir+"/res"): if not os.path.isdir(crabdir+"/res/"+file) and file.rfind("Submission") == -1 and file.rfind(".json") == -1 and not file == keepxml and not file == keepstdout and not file == keepstderr: self.log.output(" ---> Removing crab output "+file) os.unlink(crabdir+"/res/"+file) elif not file.rfind("Submission") == -1: self.log.output(" ---> Removing old Submission_X dir: "+file) shutil.rmtree(crabdir+"/res/"+file) elif not dirName.rfind("Run201") == -1: if os.path.exists(crabdir+"/res/.shrunk"): continue self.log.output(" ---> (DATA PRODUCTION) Removing unuseful lines from stdout files in "+crabdir+" (Age: "+str(time_diff/(3600*24))+" days)") for file in os.listdir(crabdir+"/res"): if not file.rfind("Submission") == -1: self.log.output(" ---> Removing old Submission_X dir: "+file) shutil.rmtree(crabdir+"/res/"+file) elif not os.path.isdir(crabdir+"/res/"+file) and file.rfind("Submission") == -1 and file.rfind(".json") == -1 and not file.rfind(".stdout") == -1: self.log.output(" ---> Shrinking crab output "+file) tmpfile = open(crabdir+"/res/"+file+"_tmp","w") for line in open(crabdir+"/res/"+file): if line.rfind("Begin processing") == -1 and line.rfind("Vertex") == -1 and line.rfind("%MSG") == -1: tmpfile.write(line) os.unlink(crabdir+"/res/"+file) os.rename(crabdir+"/res/"+file+"_tmp",crabdir+"/res/"+file) f = open(crabdir+"/res/.shrunk","w") # leave a stamp that this dir is fixed f.close() self.log.output(" ----> "+str(len(ldirs))+" Configuration directory(s) found in total, cross-referencing TopDB...") self.log.output("") ldirs = [] # disable this for now for i in xrange(0,len(ldirs)): self.sql.createQuery("SELECT","toptrees","id","CffFilePath REGEXP '"+ldirs[i]+"'") result = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","patuples","id","CffFilePath REGEXP '"+ldirs[i]+"'") result2 = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","gensims","id","CffPath REGEXP '"+ldirs[i]+"'") result3 = self.sql.execQuery().split('\n') self.sql.createQuery("SELECT","recos","id","CffPath REGEXP '"+ldirs[i]+"'") result4 = self.sql.execQuery().split('\n') if len(result) < 2 and len(result2) < 2 and len(result3) < 2 and len(result4) < 2 and cleanup_ldirsToRemove.count(ldirs[i]) == 0: filestat = os.stat(basedir+"/"+ldirs[i]) filedate = filestat.st_mtime now = int(time.time()) last_mod=int(filedate) time_diff=now-last_mod if time_diff/(60*60*24) > days: # just want the dir to be old enough to not remove ongoing prod self.log.output(" ----> Directory "+ldirs[i]+" is not in TopDB, it should be removed! (Age: "+str(time_diff/(60*60*24))+" days)") cleanup_ldirsToRemove.append(ldirs[i]) self.log.output(" ----> "+str(len(cleanup_ldirsToRemove))+" directory(s) need removal!") ## SUMMARY OF THE REMOVAL self.log.output(" --> Summary of the removal") for i in range(0,len(id)): self.log.output(" * Removing PATtuple with ID "+str(id[i])+" at "+storagePath[i]) for i in range(0,len(idTop)): self.log.output(" * Removing TopTree with ID "+str(idTop[i])+" at "+storagePathTop[i]) #if self.removeType == "cleanpnfs": # return 0; # START REMOVAL time.sleep(20) log.output(" --> Starting the removal procedure") rm = RemoveHelper(self.sql,self.log) for i in range(0,len(id)): rm.rmSRMdir(storagePath[i]) rm.rmFromTopDB("patuples",id[i]) rm.invalDBS(dbsPublish[i],CffFilePath[i]) for i in range(0,len(idTop)): rm.rmSRMdir(storagePathTop[i]) if idTop[i] > 0: rm.rmFromTopDB("toptrees",idTop[i]) if self.removeType == "dataset": rm.rmFromTopDB("datasets",self.removeId) self.log.output(" --> Ended removal procedure") return 0
optParser.add_option("","--log-stdout", action="store_true", dest="stdout",default=bool(False), help="Write output to stdout and not to logs/log-*.txt", metavar="") (options, args) = optParser.parse_args() #if options.cmssw_ver == None: # optParser.error("Please specify a CMSSW version.\n") ################# ## DEFINITIONS ## ################# if not options.stdout == True: log = logHandler("logs/CleaningAgentWorkFlow.txt") else: log = logHandler("") ################# ## MAIN METHOD ## ################# # Create our Queue to store requests: requestsPool = Queue.Queue ( 0 ) # check the # of workers to start nWorkers=getnWorkers()
# test suite # for getting stuff from fjr files #from fjrHandler import FJRHandler,GreenBoxHandler #from xml.sax import make_parser #from xml.sax.handler import ContentHandler #file="CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711/res/crab_fjr_100.xml" #parser = make_parser() #handler = FJRHandler() #parser.setContentHandler(handler) #parser.parse(open(file)) #print handler.getEventsProcessed() #print handler.getFrameworkExitCode().split("\n")[0] from CrabHandler import CRABHandler from logHandler import logHandler crab = CRABHandler( "1234567", "CMSSW_4_1_4_patch4_TopTreeProd_41X_v4/src/ConfigurationFiles/WWtoAnything_TuneZ2_7TeV-pythia6-tauola/Spring11-PU_S1_START311_V1G1-v1/17062011_105711/", logHandler("")) crab.UIWorkingDir = "TOPTREE_Spring11-PU_S1_START311_V1G1-v1_17062011_105711" crab.checkFJR()