def checkFile(job): from Analysis.Tools.helpers import checkRootFile, deepCheckRootFile filename = job if filename.startswith('root://'): if not (checkRootFile(filename, checkForObjects=["Events"]) and deepCheckRootFile(filename)): return str(filename) else: if not (os.path.exists(filename) and os.stat(filename).st_size > 0 and checkRootFile(filename, checkForObjects=["Events"]) and deepCheckRootFile(filename)): return str(filename)
def checkRootFile( file ): logger.info("Checking root file: %s"%file) from Analysis.Tools.helpers import checkRootFile, deepCheckRootFile, deepCheckWeight valid = checkRootFile( file, checkForObjects=["Events"] ) and deepCheckRootFile( file ) and deepCheckWeight( file ) if valid: logger.info("Check done!") else: logger.info("Corrupt root file: %s"%file) return valid
def checkFile(file): if args.log: logger.info("Checking filepath: %s" % file) corrupt = False if args.check: corrupt = not checkRootFile(file, checkForObjects=["Events"]) if args.deepcheck and not corrupt: corrupt = not deepCheckRootFile(file) if args.checkWeight and not corrupt: corrupt = not deepCheckWeight(file) if corrupt: if file.startswith("root://hephyse.oeaw.ac.at/"): file = file.split("root://hephyse.oeaw.ac.at/")[1] logger.info("File corrupt: %s" % file) if args.remove: logger.info("Removing file: %s" % file) os.system("/usr/bin/rfrm -f %s" % file)
def copyDPMFiles( fromPath, toPath, toLocal=False, fromLocal=False, rootFileCheck=False ): """ copy files or directories including subdirectories """ fromPathList = convertToPathList( fromPath, local=fromLocal ) if toLocal: if not os.path.isdir( toPath ): os.makedirs( toPath ) else: makeDPMDir( toPath ) for file in fromPathList: if isFile( file ): logger.info( "Copying %s to %s"%( file, toPath ) ) cmd = "xrdcp -r %s%s %s%s"%(redir if not fromLocal else "", file, redir if not toLocal else "", toPath) os.system( cmd ) if rootFileCheck: corrupt = True for i in range(10): if checkRootFile( "%s%s/%s"%(redir if not toLocal else "", toPath, file.split("/")[-1]) ): corrupt = False break # remove files if corrupt (dont do -rf, otherwise it always copies) logger.info("Corrupt root file! Removing file and trying again!") if toLocal: os.system("rm %s/%s"%(toPath,file.split("/")[-1])) else: removeDPMFiles( "%s/%s"%(toPath,file.split("/")[-1]) ) os.system( cmd ) #try again if corrupt if corrupt: if toLocal: os.system("rm %s/%s"%(toPath,file.split("/")[-1])) else: removeDPMFiles( "%s/%s"%(toPath,file.split("/")[-1]) ) else: subdir = file.split("/")[-1] subFromPath = os.path.join( file, "*" ) subToPath = os.path.join( toPath, subdir+"/" ) # logger.info( "Creating subdirectory %s"%subToPath ) copyDPMFiles( subFromPath, subToPath, toLocal=toLocal, fromLocal=fromLocal )
# Walk the directory structure and group files in 'jobs' of [f1_0.root, f1_1.root, ...] tootalling to approx. sizeGB jobs = [] for dirName, subdirList, fileList in os.walk(options.dir): rootFiles = [] for f in fileList: if f.endswith('.root'): full_filename = os.path.join(dirName, f) if not '_reHadd_' in f: to_skip = False for skip in options.skip: if skip in f: logger.info( "Found skip string %s in %s. Skipping.", skip, f ) to_skip = True break if to_skip: continue isOK = checkRootFile( full_filename, checkForObjects = [options.treeName]) \ if options.treeName is not None else checkRootFile( full_filename ) if isOK: rootFiles.append( f ) else: logger.warning( "File %s does not look OK. Checked for tree: %r", full_filename, options.treeName ) else: logger.info( "Found '_reHadd_' in file %s in %s. Skipping.", full_filename, dirName ) job = [] jobsize = 0 for fname in rootFiles: filename, file_extension = os.path.splitext(fname) n_str = filename.split('_')[-1] if n_str.isdigit(): full_filename = os.path.join(dirName, fname) jobsize += os.path.getsize( full_filename )
if args.toCBEEos: if not args.target.startswith("/eos"): raise ValueError( "The target path needs to start with the /eos path when using --toCBEEos!" ) source = args.redirector + args.file target = args.target + args.file if args.noCheck: if os.path.exists(target): print "Found file %s. No check. Skip." % target sys.exit(0) else: from Analysis.Tools.helpers import checkRootFile, deepCheckRootFile if os.path.exists(target) and checkRootFile( target, checkForObjects=["Events"]) and deepCheckRootFile(target): print "Found file %s. Checked it. Skip." % target sys.exit(0) executable = ["echo", "xrdcp"] if args.noCopy else [which("xrdcp")] cmd = executable + [ "-p", "-f", source, clip_redirector + target if args.toCBEEos else target ] subprocess.Popen(cmd) time.sleep(float(args.sleep))
#!/usr/bin/env python import os, sys from Analysis.Tools.helpers import checkRootFile, deepCheckRootFile def get_parser(): ''' Argument parser for post-processing module. ''' import argparse argParser = argparse.ArgumentParser() argParser.add_argument('f') args = argParser.parse_args() if os.path.exists(args.f) and checkRootFile( args.f, checkForObjects=["Events"]) and deepCheckRootFile(args.f): print "passed:", args.f else: print "failed:", args.f
try: nanoAOD_list = list( set([ int(f.rstrip('.root').split('_')[-2]) for f in nanoAODFiles ])) except ValueError: #possibly 'SPLIT1' nanoAOD_list = [0] if len(allRootFiles) > 0: rootFiles = [ os.path.join(dirPath, filename) for filename in allRootFiles ] if args.check != 'None': if args.check == 'deep': check_f = lambda f: checkRootFile( prefix + f, ["Events"]) and deepCheckRootFile(prefix + f) elif args.check == 'normal': check_f = lambda f: checkRootFile(prefix + f, ["Events"]) len_before = len(rootFiles) rootFiles = filter(check_f, rootFiles) if len_before > len(rootFiles): logger.warning("Path: %s : %i/%i files failed %s check", dirPath, len_before - len(rootFiles), len_before, args.check) elif len_before == len(rootFiles): logger.debug("Sample %s: All %i files passed %s check", sample, len(rootFiles), args.check) else: rootFiles = [] logger.warning("Path does not exist or no files found: %s", dirPath)
if not os.path.exists( output_directory ): try: os.makedirs( output_directory ) logger.info( "Created output directory %s.", output_directory ) except: logger.info( "Directory %s already exists.", output_directory ) pass # checking overwrite or file exists sel = "&&".join(skimConds) nEvents = sample.getYieldFromDraw(weightString="1", selectionString=sel)['val'] if not options.overwrite: if os.path.isfile(targetFilePath): logger.info( "Output file %s found.", targetFilePath) if checkRootFile( targetFilePath, checkForObjects=["Events"] ) and deepCheckRootFile( targetFilePath ) and deepCheckWeight( targetFilePath ): logger.info( "File already processed. Source: File check ok!" ) # Everything is fine, no overwriting logger.info( "Checking the normalization of the sample." ) existingSample = Sample.fromFiles( "existing", targetFilePath, treeName = "Events" ) nEventsExist = existingSample.getYieldFromDraw(weightString="1")['val'] if nEvents == nEventsExist: logger.info( "File already processed. Normalization file check ok! Skipping." ) # Everything is fine, no overwriting sys.exit(0) else: logger.info( "Target events not equal to processing sample events! Is: %s, should be: %s!"%(nEventsExist, nEvents) ) logger.info( "Removing file from target." ) os.remove( targetFilePath ) logger.info( "Reprocessing." ) else: logger.info( "File corrupt. Removing file from target." ) os.remove( targetFilePath )
''' import argparse argParser = argparse.ArgumentParser(description = "Argument parser for nanoPostProcessing") argParser.add_argument('--check', action='store_true', help="check root files?") argParser.add_argument('--deepcheck', action='store_true', help="check events of root files?") argParser.add_argument('--remove', action='store_true', help="remove corrupt root files?") argParser.add_argument('--log', action='store_true', help="print each filename?") return argParser args = get_parser().parse_args() if not (args.check or args.deepcheck): sys.exit(0) # check Root Files from Analysis.Tools.helpers import checkRootFile, deepCheckRootFile for file in Run2017.files: if args.log: logger.info( "Checking filepath: %s"%file ) corrupt = False if args.check: corrupt = not checkRootFile(file, checkForObjects=["Events"]) if args.deepcheck and not corrupt: corrupt = not deepCheckRootFile(file) if corrupt: if file.startswith("root://hephyse.oeaw.ac.at/"): file = file.split("root://hephyse.oeaw.ac.at/")[1] logger.info( "File corrupt: %s"%file ) if args.remove: logger.info( "Removing file: %s"%file ) os.system( "/usr/bin/rfrm -f %s"%file )
shutil.rmtree(tmp_output_directory) try: #Avoid trouble with race conditions in multithreading os.makedirs(tmp_output_directory) logger.info( "Created output directory %s.", tmp_output_directory ) except: pass target_outfilename = os.path.join(storage_directory, sample.name + '.root') filename, ext = os.path.splitext( os.path.join(tmp_output_directory, sample.name + '.root') ) outfilename = filename+ext if not options.overwrite: if os.path.isfile(target_outfilename): logger.info( "Output file %s found.", target_outfilename) if checkRootFile( target_outfilename, checkForObjects=["Events"] ) and deepCheckRootFile( target_outfilename ) and deepCheckWeight( target_outfilename ): logger.info( "File already processed. Source: File check ok! Skipping." ) # Everything is fine, no overwriting sys.exit(0) else: logger.info( "File corrupt. Removing file from target." ) os.remove( target_outfilename ) logger.info( "Reprocessing." ) else: logger.info( "Sample not processed yet." ) logger.info( "Processing." ) else: logger.info( "Overwriting.") # relocate original sample.copy_files( os.path.join(tmp_output_directory, "input") )
event.LHE_weight_original = LHE_weights[0] fill_vector_collection(event, "LHE", ["weight"], [{ 'weight': w } for w in LHE_weights[1:10]]) tmp_dir = ROOT.gDirectory output_filename = os.path.join(output_directory, sample.name + '.root') _logger.add_fileHandler(output_filename.replace('.root', '.log'), args.logLevel) _logger_rt.add_fileHandler(output_filename.replace('.root', '_rt.log'), args.logLevel) if os.path.exists(output_filename) and checkRootFile( output_filename, checkForObjects=[ "Events" ]) and deepCheckRootFile(output_filename) and not args.overwrite: logger.info("File %s found. Quit.", output_filename) sys.exit(0) # FWLite reader if this is an EDM file reader = sample.fwliteReader(products=products) output_file = ROOT.TFile(output_filename, 'recreate') output_file.cd() maker = TreeMaker(sequence=[filler], variables=[TreeVariable.fromString(x) for x in variables], treeName="Events") tmp_dir.cd()