def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None): self.config = config self.sampleIdentifier = sampleIdentifier self.regions = list(set(regions)) self.forceRedo = forceRedo self.sampleTree = None self.samplesPath = self.config.get('Directories', 'plottingSamples') self.samplesDefinitions = self.config.get('Directories','samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.sampleNames = eval(self.config.get('Plot_general', 'samples')) self.dataNames = eval(self.config.get('Plot_general', 'Data')) self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames) self.regionsDict = {} for region in self.regions: treeCut = config.get('Cuts', region) self.regionsDict[region] = {'cut': treeCut} self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None VHbbNameSpace=config.get('VHbbNameSpace','library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode) else: print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace)
def __init__(self, config, regions, sampleToCache, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None, verbose=False): self.verbose = verbose or ('XBBDEBUG' in os.environ) self.config = config self.regions = regions self.treeCaches = [] self.sampleTree = None self.sampleToCache = sampleToCache self.forceRedo = forceRedo # settings which part of input files to process self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None # initialize Datacard objects self.dcMakers = [ Datacard(config=self.config, region=region) for region in self.regions ]
def __init__(self, config, regions, sampleToCache, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None, verbose=False): self.verbose = verbose self.config = config self.regions = regions self.treeCaches = [] self.sampleTree = None self.sampleToCache = sampleToCache self.forceRedo = forceRedo # settings which part of input files to process self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None # initialize Datacard objects self.dcMakers = [Datacard(config=self.config, region=region) for region in self.regions]
def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None): self.config = config self.sampleIdentifier = sampleIdentifier self.regions = list(set(regions)) self.forceRedo = forceRedo self.sampleTree = None self.samplesPath = self.config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.sampleNames = list( eval(self.config.get('Plot_general', 'samples'))) self.dataNames = list(eval(self.config.get('Plot_general', 'Data'))) self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames) self.regionsDict = {} for region in self.regions: treeCut = config.get('Cuts', region) self.regionsDict[region] = {'cut': treeCut} self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)
argv = sys.argv parser = OptionParser() parser.add_option("-S", "--sampleIdentifier", dest="sampleIdentifier", default="", help="samples you want to run on") parser.add_option("-C", "--config", dest="config", default=[], action="append", help="configuration defining the plots to make") parser.add_option("-f", "--fileList", dest="fileList", default="", help="list of files you want to run on") parser.add_option("-b", "--addCollections", dest="addCollections", default="", help="collections to add: vtype") parser.add_option("-F", "--force", dest="force", action="store_true", help="overwrite existing files", default=False) (opts, args) = parser.parse_args(argv) if opts.config == "": opts.config = "config" filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(filelist), if len(filelist) > 0: print "filelist[0]:", filelist[0] else: print '' debug = 'XBBDEBUG' in os.environ config = BetterConfigParser() config.read(opts.config) anaTag = config.get("Analysis","tag") TrainFlag = eval(config.get('Analysis','TrainFlag')) btagLibrary = config.get('BTagReshaping','library') samplesinfo=config.get('Directories','samplesinfo') channel=config.get('Configuration','channel') VHbbNameSpace=config.get('VHbbNameSpace','library')
# read arguments argv = sys.argv parser = OptionParser() parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Verbose mode.") parser.add_option("-C", "--config", dest="config", default=[], action="append", help="configuration file") parser.add_option("-s","--sampleIdentifier", dest="sampleIdentifier", default='', help="sample identifier (no subsample!)") parser.add_option("-i","--chunkNumber", dest="chunkNumber", default='', help="number of part to cache") parser.add_option("-f","--force", action="store_true", dest="force", default=False, help="force overwriting of already cached files") parser.add_option("-l","--fileList", dest="fileList", default="", help="file list") (opts, args) = parser.parse_args(argv) if opts.config == "": opts.config = "config" # Import after configure to get help message from myutils import BetterConfigParser, mvainfo, ParseInfo # load config config = BetterConfigParser() config.read(opts.config) partialFileMerger = PartialFileMerger(FileList.decompress(opts.fileList), int(opts.chunkNumber), config=config, sampleIdentifier=opts.sampleIdentifier, force=opts.force) partialFileMerger.run()
def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) samplesinfo = self.config.get('Directories', 'samplesinfo') self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samplesinfo, self.pathIN).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), })
def test_FileListDamaged2(self): compressedFileList = FileList.compress(self.fileList) # corrupt the file list compressedFileList = 'H'+compressedFileList[1:] with self.assertRaises(Exception) as e: decompressedFileList = FileList.decompress(compressedFileList)
def test_FileListDamaged(self): compressedFileList = FileList.compress(self.fileList) # corrupt the file list by removing the last character compressedFileList = compressedFileList[:-1] with self.assertRaises(Exception) as e: decompressedFileList = FileList.decompress(compressedFileList)
def test_FileList(self): compressedFileList = FileList.compress(self.fileList) decompressedFileList = FileList.decompress(compressedFileList) print('uncompressed length:',len(';'.join(self.fileList))) print('compressed length:',len(compressedFileList)) self.assertEqual(self.fileList, decompressedFileList)
def test_FileListEmpty(self): print('empty:', FileList.compress([])) # corrupt the file list compressedFileList = 'base64:' with self.assertRaises(Exception) as e: decompressedFileList = FileList.decompress(compressedFileList)
def test_FileListDamaged2(self): compressedFileList = FileList.compress(self.fileList) # corrupt the file list compressedFileList = 'H' + compressedFileList[1:] with self.assertRaises(Exception) as e: decompressedFileList = FileList.decompress(compressedFileList)
def test_FileList(self): compressedFileList = FileList.compress(self.fileList) decompressedFileList = FileList.decompress(compressedFileList) print('uncompressed length:', len(';'.join(self.fileList))) print('compressed length:', len(compressedFileList)) self.assertEqual(self.fileList, decompressedFileList)
help="number of part to cache") parser.add_option("-f", "--force", action="store_true", dest="force", default=False, help="force overwriting of already cached files") parser.add_option("-l", "--fileList", dest="fileList", default="", help="file list") (opts, args) = parser.parse_args(argv) if opts.config == "": opts.config = "config" # Import after configure to get help message from myutils import BetterConfigParser, mvainfo, ParseInfo # load config config = BetterConfigParser() config.read(opts.config) partialFileMerger = PartialFileMerger( FileList.decompress(opts.fileList), int(opts.chunkNumber), config=config, sampleIdentifier=opts.sampleIdentifier, force=opts.force) partialFileMerger.run()
def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), })
def test_FileListEmpty(self): print('empty:',FileList.compress([])) # corrupt the file list compressedFileList = 'base64:' with self.assertRaises(Exception) as e: decompressedFileList = FileList.decompress(compressedFileList)