def finalizeTask(self, update=False, debug=False): outlfn = self.crabConfig.Data.outLFNDirBase.split("/store/user/")[1] if outlfn.endswith("/"): outlfn = outlfn[:-1] crab = CrabController() # Check files for each job dCacheFiles = gridFunctions.getdcachelist(outlfn) # ~ success , failed = crab.getlog( sample ) if debug: cmd = "crab log %s" % crab._prepareFoldername(self.name) else: # CRAB likes to be verbos, even if you tell it to be quiet... cmd = "crab --quiet log %s" % crab._prepareFoldername(self.name) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) # ,shell=True,universal_newlines=True) (out, err) = p.communicate() # ~ print out crabFolder = crab._prepareFoldername(self.name) # ~ print crabFolder logArchs = glob.glob("%s/%s/results/*.log.tar.gz" % (self.crabConfig.General.workArea, crabFolder)) finalFiles = [] totalEvents = 0 # ~ sys.exit() for logArchName in logArchs: JobNumber = logArchName.split("/")[-1].split("_")[1].split(".")[0] # ~ print logArchName log = self.readLogArch(logArchName) # check if file on dCache dfile = [] for layer in dCacheFiles: dfile += [s for s in layer if "%s_%s.pxlio" % (self.name, JobNumber) in s] if len(dfile) > 0 and log["readEvents"] > 0: finalFiles.append({"path": dfile[0], "nevents": log["readEvents"]}) totalEvents += log["readEvents"] self.finalFiles = finalFiles self.totalEvents = totalEvents if self.isData: self.addData2db(True) else: self.addMC2db(True) with open("finalSample", "a") as outfile: outfile.write("%s:%s\n" % (self.name, self.crabConfig.Data.inputDataset)) self.state = "FINAL"
def finalizeTask(self , update = False, debug= False ): outlfn = self.crabConfig.Data.outLFNDirBase.split('/store/user/')[1] if outlfn.endswith("/"): outlfn =outlfn[:-1] crab = CrabController() # Check files for each job dCacheFiles = gridFunctions.getdcachelist( outlfn ) #~ success , failed = crab.getlog( sample ) if debug: cmd = 'crab log %s' % crab._prepareFoldername( self.name ) else: # CRAB likes to be verbos, even if you tell it to be quiet... cmd = 'crab --quiet log %s' % crab._prepareFoldername( self.name ) p = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True)#,shell=True,universal_newlines=True) (out,err) = p.communicate() #~ print out crabFolder = crab._prepareFoldername( self.name ) #~ print crabFolder logArchs = glob.glob("%s/%s/results/*.log.tar.gz" % (self.crabConfig.General.workArea,crabFolder)) finalFiles = [] totalEvents = 0 #~ sys.exit() for logArchName in logArchs: JobNumber = logArchName.split("/")[-1].split("_")[1].split(".")[0] #~ print logArchName log = self.readLogArch( logArchName ) # check if file on dCache dfile = [] for layer in dCacheFiles: dfile += [s for s in layer if "%s_%s.pxlio" %( self.name, JobNumber ) in s ] if len(dfile) > 0 and log['readEvents'] > 0 : finalFiles.append( {'path':dfile[0], 'nevents':log['readEvents']} ) totalEvents += log['readEvents'] self.finalFiles = finalFiles self.totalEvents = totalEvents if self.isData: self.addData2db( True ) else: self.addMC2db( True ) with open('finalSample','a') as outfile: outfile.write("%s:%s\n" % ( self.name, self.crabConfig.Data.inputDataset)) self.state = "FINAL"
def getFilesfromFile(cfgFile): sampleList={} file = open(cfgFile,'r') user,tag,sample,config,mainFolder=["","","","",""] known_folders=dict() if not os.path.exists("data"): os.mkdir("data") #if os.path.exists("data/fileList.pkl"): #known_folders=readDcachePickle("data/fileList.pkl") for line in file: if line[0]=="#" or len(line.split())==0: continue if "tag=" in line: tag=line.split("=")[1].strip() continue if "user="******"=")[1].strip() continue if "config=" in line: config=line.split("=")[1].strip() continue if "mainFolder=" in line: mainFolder=line.split("=")[1].strip() continue sample=line.strip() log.debug( " ".join([user,tag,sample,config])) if mainFolder=="": mainFolder="MUSiC" folder="/%s/%s/%s/%s" % (user,mainFolder,tag,sample) sampleFolder=folder.replace("/","") sampleFolder+=".pkl" if os.path.exists("data/"+sampleFolder): known_folders=readDcachePickle("data/"+sampleFolder) file_lists_1=known_folders[folder] file_lists=[] for j in file_lists_1: #file_lists.append([ i.replace("dcap://grid-dcap.physik.rwth-aachen.de/pnfs","/pnfs") for i in j]) file_lists.append([ i for i in j]) else: #time.sleep(4) file_lists=[] if "Data" in sample: file_lists_1 = getdcachelist( folder, sample,mem_limit = 1000000000 ) else: file_lists_1 = getdcachelist( folder, sample,mem_limit = 4000000000 ) for j in file_lists_1: #file_lists.append([ i.replace("dcap://grid-dcap.physik.rwth-aachen.de/pnfs","/pnfs") for i in j]) file_lists.append([ i for i in j]) outfile = open( "data/"+sampleFolder, 'a+b' ) cPickle.dump( {folder:file_lists}, outfile, -1 ) outfile.close() if len(file_lists)>0: file_lists=filter(lambda x: "failed" not in x, file_lists) sampleList.update({sample:[file_lists,config]}) else: print sample #raise IOError( 'No sample in List for folder '+sample ) return sampleList