def check_retrieve(self, filename, md5sum, dlurl): filename = os.path.expandvars(filename) basefile = os.path.basename(filename) dirname = os.path.dirname( filename ) from RingerCore.FileIO import checkFile if not checkFile(filename, md5sum): self._logger.info('Downloading %s to avoid doing it on server side.', basefile) import urllib if not os.path.isdir( dirname ): from RingerCore import mkdir_p mkdir_p( dirname ) urllib.urlretrieve(dlurl, filename=filename) else: self._logger.info('%s already downloaded.',filename)
def check_retrieve(self, filename, md5sum, dlurl): filename = os.path.expandvars(filename) basefile = os.path.basename(filename) dirname = os.path.dirname(filename) from RingerCore.FileIO import checkFile if not checkFile(filename, md5sum): self._info('Downloading %s to avoid doing it on server side.', basefile) import urllib if not os.path.isdir(dirname): from RingerCore import mkdir_p mkdir_p(dirname) urllib.urlretrieve(dlurl, filename=filename) else: self._debug('%s already downloaded.', filename)
RingerOperation.EFCalo, ringConfig = 100, #referenceSgn = Reference.AcceptAll, referenceSgn = Reference.Off_Likelihood, #referenceBkg = Reference.Truth, referenceBkg = Reference.Off_Likelihood, treePath = treePath, pattern_oFile = outputFile, l1EmClusCut = 20, l2EtCut = 19, efEtCut = 24, etBins = etBins, etaBins = etaBins, crossVal = crossVal, nClusters = 5000, #efficiencyValues = [97.0, 2.0], toMatlab = True) from RingerCore import mkdir_p mkdir_p(outputFile) import os os.system(('mv %s.* %s/') % (outputFile, outputFile) ) os.system(('mv *.pdf %s/') % (outputFile) )
if not args.subsetDS is None: args.append_to_job_submission_option( 'secondaryDSs', SecondaryDataset(key="SUBSET", nFilesPerJob=1, container=args.subsetDS[0], reusable=True)) subsetStr = '%SUBSET' elif clusterManagerConf() in (ClusterManager.PBS, ClusterManager.LSF): #if args.core_framework is TuningToolCores.keras: # Keras run single-threaded #args.set_job_submission_option('ncpus', SubOptionRetrieve( option = '-l', suboption = 'ncpus', value=1 ) ) # Make sure we have permision to create the directory: if not args.dry_run: mkdir_p(args.outputDir) rootcorebin = os.environ.get('ROOTCOREBIN') #setrootcore = os.path.join(rootcorebin,'../setrootcore.sh') setrootcore = '' # TODO Add to setrootcore the number of cores in the job # TODO Set the OMP_NUM_CLUSTER environment to the same value as the one in the job. #setrootcore_opts = '--ncpus=%d' % args.get_job_submission_option('ncpus') setrootcore_opts = '' expandArg = lambda x: ' '.join(x) if x else '' tuningJob = os.path.join( rootcorebin, 'user_scripts/TuningTools/standalone/runTuning.py') dataStr, configStr, ppStr, crossFileStr, refStr, subsetStr, expertNetworksStr = \ expandArg(args.data), '{CONFIG_FILES}', args.ppFile, args.crossFile, args.refFile, args.clusterFile, expandArg(args.expert_networks) configFileDir = os.path.abspath(args.configFileDir) if os.path.isdir(configFileDir): configFiles = getFiles(configFileDir)
def __call__(self, **kw): """ Create a collection of tuning job configuration files at the output folder. """ # Cross validation configuration outputFolder = retrieve_kw( kw, 'outputFolder', 'jobConfig' ) neuronBounds = retrieve_kw( kw, 'neuronBounds', SeqLoopingBounds(5, 20) ) sortBounds = retrieve_kw( kw, 'sortBounds', PythonLoopingBounds(50) ) nInits = retrieve_kw( kw, 'nInits', 100 ) # Output configuration nNeuronsPerJob = retrieve_kw( kw, 'nNeuronsPerJob', 1 ) nSortsPerJob = retrieve_kw( kw, 'nSortsPerJob', 1 ) nInitsPerJob = retrieve_kw( kw, 'nInitsPerJob', 100 ) compress = retrieve_kw( kw, 'compress', True ) prefix = retrieve_kw( kw, 'prefix' , 'job' ) if 'level' in kw: self.level = kw.pop('level') # Make sure that bounds variables are LoopingBounds objects: if not isinstance( neuronBounds, SeqLoopingBounds ): neuronBounds = SeqLoopingBounds(neuronBounds) if not isinstance( sortBounds, SeqLoopingBounds ): sortBounds = PythonLoopingBounds(sortBounds) # and delete it to avoid mistakes: checkForUnusedVars( kw, self._warning ) del kw if nInits < 1: self._fatal(("Cannot require zero or negative initialization " "number."), ValueError) # Do some checking in the arguments: nNeurons = len(neuronBounds) nSorts = len(sortBounds) if not nSorts: self._fatal("Sort bounds is empty.") if nNeuronsPerJob > nNeurons: self._warning(("The number of neurons per job (%d) is " "greater then the total number of neurons (%d), changing it " "into the maximum possible value."), nNeuronsPerJob, nNeurons ) nNeuronsPerJob = nNeurons if nSortsPerJob > nSorts: self._warning(("The number of sorts per job (%d) is " "greater then the total number of sorts (%d), changing it " "into the maximum possible value."), nSortsPerJob, nSorts ) nSortsPerJob = nSorts # Create the output folder: mkdir_p(outputFolder) # Create the windows in which each job will loop upon: neuronJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( neuronBounds, nNeuronsPerJob ) sortJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( sortBounds, nSortsPerJob ) initJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( \ PythonLoopingBounds( nInits ), \ nInitsPerJob ) # Loop over windows and create the job configuration for neuronWindowBounds in neuronJobsWindowList(): for sortWindowBounds in sortJobsWindowList(): for initWindowBounds in initJobsWindowList(): self._debug(('Retrieved following job configuration ' '(bounds.vec) : ' '[ neuronBounds=%s, sortBounds=%s, initBounds=%s]'), neuronWindowBounds.formattedString('hn'), sortWindowBounds.formattedString('s'), initWindowBounds.formattedString('i')) fulloutput = '{outputFolder}/{prefix}.{neuronStr}.{sortStr}.{initStr}'.format( outputFolder = outputFolder, prefix = prefix, neuronStr = neuronWindowBounds.formattedString('hn'), sortStr = sortWindowBounds.formattedString('s'), initStr = initWindowBounds.formattedString('i') ) savedFile = TuningJobConfigArchieve( fulloutput, neuronBounds = neuronWindowBounds, sortBounds = sortWindowBounds, initBounds = initWindowBounds ).save( compress ) self._info('Saved job option configuration at path: %s', savedFile )
parser = ArgumentParser( description = 'Retrieve performance information from the Cross-Validation method.' , parents = [crossValStatsJobParser, loggerParser] ) parser.make_adjustments() emptyArgumentsPrintHelp( parser ) ## Retrieve parser args: args = parser.parse_args( ) mainLogger = Logger.getModuleLogger(__name__) mainLogger.level = args.output_level # Overwrite tempfile in the beginning of the job: if args.tmpFolder: args.tmpFolder = expandPath( args.tmpFolder ) mkdir_p( args.tmpFolder ) import tempfile tempfile.tempdir = args.tmpFolder if mainLogger.isEnabledFor( LoggingLevel.DEBUG ): import cProfile, pstats, StringIO pr = cProfile.Profile() pr.enable() ## Treat special arguments # Check if binFilters is a class if args.binFilters is not NotSet: try: args.binFilters = str_to_class( "TuningTools.CrossValidStat", args.binFilters ) except (TypeError, AttributeError,): args.binFilters = csvStr2List( args.binFilters )
def __call__(self, **kw): """ Create a collection of tuning job configuration files at the output folder. """ # Cross validation configuration outputFolder = retrieve_kw( kw, 'outputFolder', 'jobConfig' ) neuronBounds = retrieve_kw( kw, 'neuronBounds', SeqLoopingBounds(5, 20) ) sortBounds = retrieve_kw( kw, 'sortBounds', PythonLoopingBounds(50) ) nInits = retrieve_kw( kw, 'nInits', 100 ) # Output configuration nNeuronsPerJob = retrieve_kw( kw, 'nNeuronsPerJob', 1 ) nSortsPerJob = retrieve_kw( kw, 'nSortsPerJob', 1 ) nInitsPerJob = retrieve_kw( kw, 'nInitsPerJob', 100 ) compress = retrieve_kw( kw, 'compress', True ) if 'level' in kw: self.level = kw.pop('level') # Make sure that bounds variables are LoopingBounds objects: if not isinstance( neuronBounds, SeqLoopingBounds ): neuronBounds = SeqLoopingBounds(neuronBounds) if not isinstance( sortBounds, SeqLoopingBounds ): sortBounds = PythonLoopingBounds(sortBounds) # and delete it to avoid mistakes: checkForUnusedVars( kw, self._logger.warning ) del kw if nInits < 1: self._logger.fatal(("Cannot require zero or negative initialization " "number."), ValueError) # Do some checking in the arguments: nNeurons = len(neuronBounds) nSorts = len(sortBounds) if not nSorts: self._logger.fatal("Sort bounds is empty.") if nNeuronsPerJob > nNeurons: self._logger.warning(("The number of neurons per job (%d) is " "greater then the total number of neurons (%d), changing it " "into the maximum possible value."), nNeuronsPerJob, nNeurons ) nNeuronsPerJob = nNeurons if nSortsPerJob > nSorts: self._logger.warning(("The number of sorts per job (%d) is " "greater then the total number of sorts (%d), changing it " "into the maximum possible value."), nSortsPerJob, nSorts ) nSortsPerJob = nSorts # Create the output folder: mkdir_p(outputFolder) # Create the windows in which each job will loop upon: neuronJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( neuronBounds, nNeuronsPerJob ) sortJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( sortBounds, nSortsPerJob ) initJobsWindowList = \ CreateTuningJobFiles._retrieveJobLoopingBoundsCol( \ PythonLoopingBounds( nInits ), \ nInitsPerJob ) # Loop over windows and create the job configuration for neuronWindowBounds in neuronJobsWindowList(): for sortWindowBounds in sortJobsWindowList(): for initWindowBounds in initJobsWindowList(): self._logger.debug(('Retrieved following job configuration ' '(bounds.vec) : ' '[ neuronBounds=%s, sortBounds=%s, initBounds=%s]'), neuronWindowBounds.formattedString('hn'), sortWindowBounds.formattedString('s'), initWindowBounds.formattedString('i')) fulloutput = '{outputFolder}/job.{neuronStr}.{sortStr}.{initStr}'.format( outputFolder = outputFolder, neuronStr = neuronWindowBounds.formattedString('hn'), sortStr = sortWindowBounds.formattedString('s'), initStr = initWindowBounds.formattedString('i') ) savedFile = TuningJobConfigArchieve( fulloutput, neuronBounds = neuronWindowBounds, sortBounds = sortWindowBounds, initBounds = initWindowBounds ).save( compress ) self._logger.info('Saved job option configuration at path: %s', savedFile )
caloLayers = [RingerLayer.PS, RingerLayer.EM1, RingerLayer.EM2, RingerLayer.EM3, RingerLayer.HAD1, RingerLayer.HAD2, RingerLayer.HAD3,] from RingerCore import load,save from RingerCore import changeExtension, ensureExtension, appendToFileName, progressbar, mkdir_p from itertools import product import numpy as np if args.outputPath is None: args.outputPath = os.path.dirname(args.inputFile) if not os.path.isdir( args.outputPath ): mkdir_p( args.outputPath ) f = load(args.inputFile) # Copy all metada information baseDict = { k : f[k] for k in f.keys() if not '_etBin_' in k and not '_etaBin_' in k } nEtBins = f['nEtBins'].item() nEtaBins = f['nEtaBins'].item() for etIdx, etaIdx in progressbar( product(xrange(nEtBins), xrange(nEtaBins)) , nEtBins*nEtaBins , logger = mainLogger , prefix = 'Juicing file '): binDict= {k:f[k] for k in f.keys() if 'etBin_%d_etaBin_%d'%(etIdx,etaIdx) in k} binDict.update(baseDict) from copy import deepcopy for layer in caloLayers: pp=PreProcChain([RingerLayerSegmentation(layer=layer)])
parser.add_argument("--triggerList", nargs="+", default=defaultTrigList) parser.add_argument("--numberOfSamplesPerPackage", type=int, default=50) args = parser.parse_args() mainLogger = Logger.getModuleLogger(__name__, LoggingLevel.INFO) if os.path.exists("dq2_ls.txt"): os.system("rm dq2_ls.txt") if args.inDS[-1] != "/": args.inDS += "/" if args.outFolder[-1] != "/": args.outFolder += "/" mkdir_p(args.outFolder) mkdir_p("tmpDir") os.system("dq2-ls -fH " + args.inDS + " >& dq2_ls.txt") with open("dq2_ls.txt", "r") as f: lines = f.readlines() samples = [] dataset = "" fileLine = re.compile("\[ \]\s+(\S+)\s+\S+\s+\S+\s+\S+\s+\S+") for s in lines: m = fileLine.match(s) if m: samples.append(m.group(1)) package = []
def loop(self, **kw): import gc output = kw.pop('output' , 'Mon' ) tuningReport = kw.pop('tuningReport', 'tuningReport' ) doBeamer = kw.pop('doBeamer' , True ) shortSlides = kw.pop('shortSlides' , False ) debug = kw.pop('debug' , False ) overwrite = kw.pop('overwrite' , False ) basepath=output basepath+=('_et%d_eta%d')%(self._infoObjs[0].etbin(),self._infoObjs[0].etabin()) if not overwrite and os.path.isdir( basepath ): self._logger.warning("Monitoring output path already exists!") return if shortSlides: self._logger.warning('Short slides enabled! Doing only tables...') if debug: self._logger.warning('Debug mode activated!') wantedPlotNames = {'allBestTstSorts','allBestOpSorts','allWorstTstSorts', 'allWorstOpSorts',\ 'allBestTstNeurons','allBestOpNeurons', 'allWorstTstNeurons', 'allWorstOpNeurons'} perfBenchmarks = dict() pathBenchmarks = dict() from PlotHolder import PlotHolder from PlotHelper import plot_4c, plot_rocs, plot_nnoutput from TuningMonitoringInfo import MonitoringPerfInfo #Loop over benchmarks for infoObj in self._infoObjs: #Initialize all plos plotObjects = dict() perfObjects = dict() infoObjects = dict() pathObjects = dict() #Init PlotsHolder for plotname in wantedPlotNames: if 'Sorts' in plotname: plotObjects[plotname] = PlotHolder(label = 'Sort') else: plotObjects[plotname] = PlotHolder(label = 'Neuron') #Retrieve benchmark name benchmarkName = infoObj.name() #Retrieve reference name reference = infoObj.reference() #summary csummary = infoObj.summary() #benchmark object cbenchmark = infoObj.rawBenchmark() # reference value refVal = infoObj.rawBenchmark()['refVal'] #Eta bin etabin = infoObj.etabin() #Et bin etbin = infoObj.etbin() self._logger.info(('Start loop over the benchmark: %s and etaBin = %d etBin = %d')%(benchmarkName,etabin, etbin) ) import copy args = dict() args['reference'] = reference args['refVal'] = refVal args['eps'] = cbenchmark['eps'] self._logger.info('Creating plots...') # Creating plots for neuron in progressbar(infoObj.neuronBounds(), len(infoObj.neuronBounds()), 'Loading : ', 60, False, logger=self._logger): # Figure path location currentPath = ('%s/figures/%s/%s') % (basepath,benchmarkName,'neuron_'+str(neuron)) neuronName = 'config_'+str(neuron).zfill(3) # Create folder to store all plot objects mkdir_p(currentPath) #Clear all hold plots stored plotObjects['allBestTstSorts'].clear() plotObjects['allBestOpSorts'].clear() infoObjects['allInfoOpBest_'+neuronName] = list() #plotObjects['allWorstTstSorts'].clear() #plotObjects['allWorstOpSorts'].clear() for sort in infoObj.sortBounds(neuron): sortName = 'sort_'+str(sort).zfill(3) #Init bounds initBounds = infoObj.initBounds(neuron,sort) #Create path list from initBound list initPaths = [('%s/%s/%s/init_%s')%(benchmarkName,neuronName,sortName,init) for init in initBounds] self._logger.debug('Creating init plots into the path: %s, (neuron_%s,sort_%s)', \ benchmarkName, neuron, sort) obj = PlotHolder(label = 'Init') try: #Create plots holder class (Helper), store all inits obj.retrieve(self._rootObj, initPaths) except RuntimeError: self._logger.fatal('Can not create plot holder object') #Hold all inits from current sort obj.set_index_correction(initBounds) obj.set_best_index( csummary[neuronName][sortName]['infoTstBest']['init'] ) obj.set_worst_index( csummary[neuronName][sortName]['infoTstWorst']['init'] ) plotObjects['allBestTstSorts'].append( copy.deepcopy(obj.get_best() ) ) obj.set_best_index( csummary[neuronName][sortName]['infoOpBest']['init'] ) obj.set_worst_index( csummary[neuronName][sortName]['infoOpWorst']['init'] ) plotObjects['allBestOpSorts'].append( copy.deepcopy(obj.get_best() ) ) #plotObjects['allWorstTstSorts'].append( copy.deepcopy(tstObj.getBest() ) #plotObjects['allWorstOpSorts'].append( copy.deepcopy(opObj.getBest() ) infoObjects['allInfoOpBest_'+neuronName].append( copy.deepcopy(csummary[neuronName][sortName]['infoOpBest']) ) #Release memory del obj #Loop over sorts gc.collect() plotObjects['allBestTstSorts'].set_index_correction( infoObj.sortBounds(neuron) ) plotObjects['allBestOpSorts'].set_index_correction( infoObj.sortBounds(neuron) ) #plotObjects['allWorstTstSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) #plotObjects['allWorstOpSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) # Best and worst sorts for this neuron configuration plotObjects['allBestTstSorts'].set_best_index( csummary[neuronName]['infoTstBest']['sort'] ) plotObjects['allBestTstSorts'].set_worst_index( csummary[neuronName]['infoTstWorst']['sort'] ) plotObjects['allBestOpSorts'].set_best_index( csummary[neuronName]['infoOpBest']['sort'] ) plotObjects['allBestOpSorts'].set_worst_index( csummary[neuronName]['infoOpWorst']['sort'] ) # Hold the information from the best and worst discriminator for this neuron infoObjects['infoOpBest_'+neuronName] = copy.deepcopy(csummary[neuronName]['infoOpBest']) infoObjects['infoOpWorst_'+neuronName] = copy.deepcopy(csummary[neuronName]['infoOpWorst']) # Best and worst neuron sort for this configuration plotObjects['allBestTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_best() )) plotObjects['allBestOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_best() )) plotObjects['allWorstTstNeurons'].append(copy.deepcopy(plotObjects['allBestTstSorts'].get_worst() )) plotObjects['allWorstOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_worst() )) # Create perf (tables) Objects for test and operation (Table) perfObjects[neuronName] = MonitoringPerfInfo(benchmarkName, reference, csummary[neuronName]['summaryInfoTst'], csummary[neuronName]['infoOpBest'], cbenchmark) # Debug information self._logger.debug(('Crossval indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst, plotObjects['allBestTstSorts'].get_worst()['bestInit'])) self._logger.debug(('Operation indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst, plotObjects['allBestOpSorts'].get_worst()['bestInit'])) # Figure 1: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestTstSorts'].size(),etabin, etbin, plotObjects['allBestTstSorts'].best, \ plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst,\ plotObjects['allBestTstSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_val')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = False args['paintListIdx'] = [plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst] pname1 = plot_4c(plotObjects['allBestTstSorts'], args) # Figure 2: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. But, here the painted curves represented the best and the worst # curve from the operation dataset. In other words, we pass all events into the # network and get the efficiencis than we choose the best operation and the worst # operation network and paint the validation curve who represent these sorts. # There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d (operation)}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestOpSorts'].size(),etabin, etbin, plotObjects['allBestOpSorts'].best, \ plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst,\ plotObjects['allBestOpSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_op')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = True args['paintListIdx'] = [plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst] pname2 = plot_4c(plotObjects['allBestOpSorts'], args) # Figure 3: This figure show us in deteails the best operation network for the current hidden # layer and benchmark analysis. Depend on the benchmark, we draw lines who represents the # stops for each curve. The current neuron will be the last position of the plotObjects splotObject = PlotHolder() args['label'] = ('#splitline{#splitline{Best network neuron: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{}}') % \ (neuron,etabin, etbin, plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_best_op')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = True splotObject.append( plotObjects['allBestOpNeurons'][-1] ) pname3 = plot_4c(splotObject, args) # Figure 4: Here, we have a plot of the discriminator output for all dataset. Black histogram # represents the signal and the red onces represent the background. TODO: Apply this outputs # using the feedfoward manual method to generate the network outputs and create the histograms. args['cname'] = ('%s/plot_%s_neuron_%s_best_op_output')%(currentPath,benchmarkName,neuron) args['nsignal'] = self._data[0].shape[0] args['nbackground'] = self._data[1].shape[0] sbest = plotObjects['allBestOpNeurons'][-1]['bestSort'] args['cut'] = csummary[neuronName]['sort_'+str(sbest).zfill(3)]['infoOpBest']['cut'] args['rocname'] = 'roc_op' pname4 = plot_nnoutput(splotObject,args) # Figure 5: The receive operation test curve for all sorts using the test dataset as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_tst')%(currentPath,benchmarkName,neuron) args['set'] = 'tst' args['paintListIdx'] = [plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst] pname5 = plot_rocs(plotObjects['allBestTstSorts'], args) # Figure 6: The receive operation curve for all sorts using the operation dataset (train+test) as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_op')%(currentPath,benchmarkName,neuron) args['set'] = 'op' args['paintListIdx'] = [plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst] pname6 = plot_rocs(plotObjects['allBestOpSorts'], args) # Map names for beamer, if you add a plot, you must add into # the path objects holder pathObjects['neuron_'+str(neuron)+'_sorts_val'] = pname1 pathObjects['neuron_'+str(neuron)+'_sort_op'] = pname2 pathObjects['neuron_'+str(neuron)+'_best_op'] = pname3 pathObjects['neuron_'+str(neuron)+'_best_op_output'] = pname4 pathObjects['neuron_'+str(neuron)+'_sorts_roc_tst'] = pname5 pathObjects['neuron_'+str(neuron)+'_sorts_roc_op'] = pname6 if debug: break #Loop over neurons #External pathBenchmarks[benchmarkName] = pathObjects perfBenchmarks[benchmarkName] = perfObjects #Release memory for xname in plotObjects.keys(): del plotObjects[xname] gc.collect() #if debug: break #Loop over benchmark #Start beamer presentation if doBeamer: from BeamerMonReport import BeamerMonReport from BeamerTemplates import BeamerPerfTables, BeamerFigure, BeamerBlocks #Eta bin etabin = self._infoObjs[0].etabin() #Et bin etbin = self._infoObjs[0].etbin() #Create the beamer manager reportname = ('%s_et%d_eta%d')%(output,etbin,etabin) beamer = BeamerMonReport(basepath+'/'+reportname, title = ('Tuning Report (et=%d, eta=%d)')%(etbin,etabin) ) neuronBounds = self._infoObjs[0].neuronBounds() for neuron in neuronBounds: #Make the tables for crossvalidation ptableCross = BeamerPerfTables(frametitle= ['Neuron '+str(neuron)+': Cross Validation Performance', 'Neuron '+str(neuron)+": Operation Best Network"], caption=['Efficiencies from each benchmark.', 'Efficiencies for the best operation network']) block = BeamerBlocks('Neuron '+str(neuron)+' Analysis', [('All sorts (validation)','All sorts evolution are ploted, each sort represents the best init;'), ('All sorts (operation)', 'All sorts evolution only for operation set;'), ('Best operation', 'Detailed analysis from the best sort discriminator.'), ('Tables','Cross validation performance')]) if not shortSlides: block.tolatex( beamer.file() ) for info in self._infoObjs: #If we produce a short presentation, we do not draw all plots if not shortSlides: bname = info.name().replace('OperationPoint_','') fig1 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_val'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': All sorts (validation)') fig2 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_roc_tst'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': All ROC sorts (validation)') fig3 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sort_op'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': All sorts (operation)') fig4 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_roc_op'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': All ROC sorts (operation)') fig5 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_best_op'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': Best Network') fig6 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_best_op_output'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': Best Network output') #Draw figures into the tex file fig1.tolatex( beamer.file() ) fig2.tolatex( beamer.file() ) fig3.tolatex( beamer.file() ) fig4.tolatex( beamer.file() ) fig5.tolatex( beamer.file() ) fig6.tolatex( beamer.file() ) #Concatenate performance table, each line will be a benchmark #e.g: det, sp and fa ptableCross.add( perfBenchmarks[info.name()]['config_'+str(neuron).zfill(3)] ) #if debug: break ptableCross.tolatex( beamer.file() )# internal switch is false to true: test ptableCross.tolatex( beamer.file() )# internal swotch is true to false: operation if debug: break beamer.close() self._logger.info('Done! ')
def loop(self, **kw): from scipy.io import loadmat import gc output = kw.pop('output', 'Mon') tuningReport = kw.pop('tuningReport', 'tuningReport') doBeamer = kw.pop('doBeamer', True) shortSlides = kw.pop('shortSlides', False) debug = kw.pop('debug', False) overwrite = kw.pop('overwrite', False) choicesfile = kw.pop('choicesfile', None) basepath = output basepath += ('_et%d_eta%d') % (self._infoObjs[0].etbinidx(), self._infoObjs[0].etabinidx()) if choicesfile: choices = loadmat(choicesfile) if not overwrite and os.path.isdir(basepath): self._logger.warning("Monitoring output path already exists!") return if shortSlides: self._logger.warning('Short slides enabled! Doing only tables...') if debug: self._logger.warning('Debug mode activated!') wantedPlotNames = {'allBestTstSorts','allBestOpSorts','allWorstTstSorts', 'allWorstOpSorts',\ 'allBestTstNeurons','allBestOpNeurons', 'allWorstTstNeurons', 'allWorstOpNeurons'} perfBenchmarks = dict() pathBenchmarks = dict() from PlotHolder import PlotHolder from PlotHelper import plot_4c, plot_rocs, plot_nnoutput from TuningMonitoringInfo import MonitoringPerfInfo #Loop over benchmarks for infoObj in self._infoObjs: #Initialize all plos plotObjects = dict() perfObjects = dict() infoObjects = dict() pathObjects = dict() #Init PlotsHolder for plotname in wantedPlotNames: if 'Sorts' in plotname: plotObjects[plotname] = PlotHolder(label='Sort') else: plotObjects[plotname] = PlotHolder(label='Neuron') # keyboard() #Retrieve benchmark name benchmarkName = infoObj.name() #Retrieve reference name reference = infoObj.reference() #summary csummary = infoObj.summary() #benchmark object cbenchmark = infoObj.rawBenchmark() # etBin = infoObj.etbin() # reference value refVal = infoObj.rawBenchmark()['refVal'] #Eta bin etabinidx = infoObj.etabinidx() #Et bin etbinidx = infoObj.etbinidx() #Eta bin etabin = infoObj.etabin() #Et bin etbin = infoObj.etbin() self._logger.info( ('Start loop over the benchmark: %s and etaBin = %d etBin = %d' ) % (benchmarkName, etabinidx, etbinidx)) import copy args = dict() args['reference'] = reference args['refVal'] = refVal args['eps'] = cbenchmark['eps'] self._logger.info('Creating plots...') # Creating plots for neuron in progressbar(infoObj.neuronBounds(), len(infoObj.neuronBounds()), 'Loading : ', 60, False, logger=self._logger): if choicesfile: neuron = choices['choices'][infoObj.name().split( '_')[-1]][0][0][etbinidx][etabinidx] # Figure path location currentPath = ('%s/figures/%s/%s') % (basepath, benchmarkName, 'neuron_' + str(neuron)) neuronName = 'config_' + str(neuron).zfill(3) # Create folder to store all plot objects mkdir_p(currentPath) #Clear all hold plots stored plotObjects['allBestTstSorts'].clear() plotObjects['allBestOpSorts'].clear() infoObjects['allInfoOpBest_' + neuronName] = list() #plotObjects['allWorstTstSorts'].clear() #plotObjects['allWorstOpSorts'].clear() for sort in infoObj.sortBounds(neuron): sortName = 'sort_' + str(sort).zfill(3) #Init bounds initBounds = infoObj.initBounds(neuron, sort) #Create path list from initBound list initPaths = [('%s/%s/%s/init_%s') % (benchmarkName, neuronName, sortName, init) for init in initBounds] self._logger.debug('Creating init plots into the path: %s, (neuron_%s,sort_%s)', \ benchmarkName, neuron, sort) obj = PlotHolder(label='Init') try: #Create plots holder class (Helper), store all inits obj.retrieve(self._rootObj, initPaths) except RuntimeError: self._logger.fatal('Can not create plot holder object') #Hold all inits from current sort obj.set_index_correction(initBounds) obj.set_best_index( csummary[neuronName][sortName]['infoTstBest']['init']) obj.set_worst_index( csummary[neuronName][sortName]['infoTstWorst']['init']) plotObjects['allBestTstSorts'].append( copy.deepcopy(obj.get_best())) obj.set_best_index( csummary[neuronName][sortName]['infoOpBest']['init']) obj.set_worst_index( csummary[neuronName][sortName]['infoOpWorst']['init']) plotObjects['allBestOpSorts'].append( copy.deepcopy(obj.get_best())) #plotObjects['allWorstTstSorts'].append( copy.deepcopy(tstObj.getBest() ) #plotObjects['allWorstOpSorts'].append( copy.deepcopy(opObj.getBest() ) infoObjects['allInfoOpBest_' + neuronName].append( copy.deepcopy( csummary[neuronName][sortName]['infoOpBest'])) #Release memory del obj #Loop over sorts gc.collect() plotObjects['allBestTstSorts'].set_index_correction( infoObj.sortBounds(neuron)) plotObjects['allBestOpSorts'].set_index_correction( infoObj.sortBounds(neuron)) #plotObjects['allWorstTstSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) #plotObjects['allWorstOpSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) # Best and worst sorts for this neuron configuration plotObjects['allBestTstSorts'].set_best_index( csummary[neuronName]['infoTstBest']['sort']) plotObjects['allBestTstSorts'].set_worst_index( csummary[neuronName]['infoTstWorst']['sort']) plotObjects['allBestOpSorts'].set_best_index( csummary[neuronName]['infoOpBest']['sort']) plotObjects['allBestOpSorts'].set_worst_index( csummary[neuronName]['infoOpWorst']['sort']) # Hold the information from the best and worst discriminator for this neuron infoObjects['infoOpBest_' + neuronName] = copy.deepcopy( csummary[neuronName]['infoOpBest']) infoObjects['infoOpWorst_' + neuronName] = copy.deepcopy( csummary[neuronName]['infoOpWorst']) # Best and worst neuron sort for this configuration plotObjects['allBestTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_best())) plotObjects['allBestOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_best())) plotObjects['allWorstTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_worst())) plotObjects['allWorstOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_worst())) # Create perf (tables) Objects for test and operation (Table) perfObjects[neuronName] = MonitoringPerfInfo( benchmarkName, reference, csummary[neuronName]['summaryInfoTst'], csummary[neuronName]['infoOpBest'], cbenchmark) # Debug information self._logger.debug(('Crossval indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst, plotObjects['allBestTstSorts'].get_worst()['bestInit'])) self._logger.debug(('Operation indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst, plotObjects['allBestOpSorts'].get_worst()['bestInit'])) # Figure 1: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestTstSorts'].size(),etabinidx, etbinidx, plotObjects['allBestTstSorts'].best, \ plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst,\ plotObjects['allBestTstSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_val') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = False args['paintListIdx'] = [ plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst ] pname1 = plot_4c(plotObjects['allBestTstSorts'], args) # Figure 2: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. But, here the painted curves represented the best and the worst # curve from the operation dataset. In other words, we pass all events into the # network and get the efficiencis than we choose the best operation and the worst # operation network and paint the validation curve who represent these sorts. # There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d (operation)}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestOpSorts'].size(),etabinidx, etbinidx, plotObjects['allBestOpSorts'].best, \ plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst,\ plotObjects['allBestOpSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = True args['paintListIdx'] = [ plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst ] pname2 = plot_4c(plotObjects['allBestOpSorts'], args) # Figure 3: This figure show us in deteails the best operation network for the current hidden # layer and benchmark analysis. Depend on the benchmark, we draw lines who represents the # stops for each curve. The current neuron will be the last position of the plotObjects splotObject = PlotHolder() args['label'] = ('#splitline{#splitline{Best network neuron: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{}}') % \ (neuron,etabinidx, etbinidx, plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_best_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = True splotObject.append(plotObjects['allBestOpNeurons'][-1]) pname3 = plot_4c(splotObject, args) # Figure 4: Here, we have a plot of the discriminator output for all dataset. Black histogram # represents the signal and the red onces represent the background. TODO: Apply this outputs # using the feedfoward manual method to generate the network outputs and create the histograms. args['cname'] = ('%s/plot_%s_neuron_%s_best_op_output') % ( currentPath, benchmarkName, neuron) args['nsignal'] = self._data[0].shape[0] args['nbackground'] = self._data[1].shape[0] sbest = plotObjects['allBestOpNeurons'][-1]['bestSort'] args['cut'] = csummary[neuronName][ 'sort_' + str(sbest).zfill(3)]['infoOpBest']['cut'] args['rocname'] = 'roc_operation' pname4 = plot_nnoutput(splotObject, args) # Figure 5: The receive operation test curve for all sorts using the test dataset as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_tst') % ( currentPath, benchmarkName, neuron) args['set'] = 'tst' args['paintListIdx'] = [ plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst ] pname5 = plot_rocs(plotObjects['allBestTstSorts'], args) # Figure 6: The receive operation curve for all sorts using the operation dataset (train+test) as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'operation' args['paintListIdx'] = [ plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst ] pname6 = plot_rocs(plotObjects['allBestOpSorts'], args) # Map names for beamer, if you add a plot, you must add into # the path objects holder pathObjects['neuron_' + str(neuron) + '_sorts_val'] = pname1 pathObjects['neuron_' + str(neuron) + '_sort_op'] = pname2 pathObjects['neuron_' + str(neuron) + '_best_op'] = pname3 pathObjects['neuron_' + str(neuron) + '_best_op_output'] = pname4 pathObjects['neuron_' + str(neuron) + '_sorts_roc_tst'] = pname5 pathObjects['neuron_' + str(neuron) + '_sorts_roc_op'] = pname6 if choicesfile: break #Loop over neurons #External pathBenchmarks[benchmarkName] = pathObjects perfBenchmarks[benchmarkName] = perfObjects #Release memory for xname in plotObjects.keys(): del plotObjects[xname] gc.collect() #if debug: break #Loop over benchmark #Eta bin # etabinidx = self._infoObjs[0].etabinidx() #Et bin binBounds = dict() if len(etbin) > 0: binBounds['etbinstr'] = r'$%d < E_{T} \text{[Gev]}<%d$' % etbin else: binBounds['etbinstr'] = r'\text{etBin[%d]}' % etbinidx if len(etabin) > 0: binBounds['etabinstr'] = r'$%.2f<\eta<%.2f$' % etabin else: binBounds['etabinstr'] = r'\text{etaBin[%d]}' % etabinidx perfBounds = dict() perfBounds['bounds'] = binBounds perfBounds['perf'] = perfBenchmarks fname = basepath + '/' + 'perfBounds' save(perfBounds, fname) #Start beamer presentation if doBeamer: from BeamerTemplates import BeamerReport, BeamerTables, BeamerFigure, BeamerBlocks #Eta bin etabin = self._infoObjs[0].etabin() etabinidx = self._infoObjs[0].etabinidx() #Et bin etbin = self._infoObjs[0].etbin() etbinidx = self._infoObjs[0].etbinidx() #Create the beamer manager reportname = ('%s_et%d_eta%d') % (output, etbinidx, etabinidx) beamer = BeamerReport(basepath + '/' + reportname, title=('Tuning Report (et=%d, eta=%d)') % (etbinidx, etabinidx)) neuronBounds = self._infoObjs[0].neuronBounds() for neuron in neuronBounds: #Make the tables for crossvalidation ptableCross = BeamerTables( frametitle=[ 'Neuron ' + str(neuron) + ': Cross Validation Performance', 'Neuron ' + str(neuron) + ": Operation Best Network" ], caption=[ 'Efficiencies from each benchmark.', 'Efficiencies for the best operation network' ]) block = BeamerBlocks('Neuron ' + str(neuron) + ' Analysis', [ ('All sorts (validation)', 'All sorts evolution are ploted, each sort represents the best init;' ), ('All sorts (operation)', 'All sorts evolution only for operation set;'), ('Best operation', 'Detailed analysis from the best sort discriminator.'), ('Tables', 'Cross validation performance') ]) if not shortSlides: block.tolatex(beamer.file()) for info in self._infoObjs: #If we produce a short presentation, we do not draw all plots if not shortSlides: bname = info.name().replace('OperationPoint_', '') fig1 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_sorts_val'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': All sorts (validation)') fig2 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_sorts_roc_tst'].replace(basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': All ROC sorts (validation)') fig3 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_sort_op'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': All sorts (operation)') fig4 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_sorts_roc_op'].replace( basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': All ROC sorts (operation)') fig5 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_best_op'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': Best Network') fig6 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_best_op_output'].replace(basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': Best Network output') #Draw figures into the tex file fig1.tolatex(beamer.file()) fig2.tolatex(beamer.file()) fig3.tolatex(beamer.file()) fig4.tolatex(beamer.file()) fig5.tolatex(beamer.file()) fig6.tolatex(beamer.file()) #Concatenate performance table, each line will be a benchmark #e.g: det, sp and fa ptableCross.add( perfBenchmarks[info.name()]['config_' + str(neuron).zfill(3)]) #if debug: break ptableCross.tolatex( beamer.file()) # internal switch is false to true: test ptableCross.tolatex(beamer.file( )) # internal swotch is true to false: operation if debug: break beamer.close() self._logger.info('Done! ')
] pidnames = [ ['Medium', 'VeryLoose'], ['Medium'], ['Medium', 'VeryLoose'], ['Tight', 'Medium', 'Loose', 'VeryLoose'], ['Medium', 'VeryLoose'], ] ####################### Extract Ringer Configuration ######################### for idx, cv in enumerate(crossval): tpath = os.getcwd() + '/' + tuningdirs[idx] mkdir_p(tpath) for jdx, pid in enumerate(pidnames[idx]): files = expandFolders(basepath + '/' + cv[jdx]) crossValGrid = [] for path in files: if path.endswith('.pic.gz'): crossValGrid.append(path) d = CrossValidStatAnalysis.exportDiscrFilesToOnlineFormat( crossValGrid, refBenchCol=ref, discrFilename='%s/ElectronRinger%sConstants' % (tpath, pid), thresFilename='%s/ElectronRinger%sThresholds' % (tpath, pid), version=4,
parser.add_argument('--triggerList', nargs='+', default=defaultTrigList) parser.add_argument('--numberOfSamplesPerPackage', type=int, default=50) args = parser.parse_args() mainLogger = Logger.getModuleLogger(__name__, LoggingLevel.INFO) if os.path.exists('dq2_ls.txt'): os.system('rm dq2_ls.txt') if args.inDS[-1] != '/': args.inDS += '/' if args.outFolder[-1] != '/': args.outFolder += '/' mkdir_p(args.outFolder) mkdir_p('tmpDir') os.system('dq2-ls -fH ' + args.inDS + ' >& dq2_ls.txt') with open('dq2_ls.txt', 'r') as f: lines = f.readlines() samples = [] dataset = '' fileLine = re.compile('\[ \]\s+(\S+)\s+\S+\s+\S+\s+\S+\s+\S+') for s in lines: m = fileLine.match(s) if m: samples.append(m.group(1)) package = []