parser = ArgumentParser( description= 'Retrieve performance information from the Cross-Validation method on the GRID.', parents=[crossValStatsJobParser, parentParser, ioGridParser, loggerParser], conflict_handler='resolve') parser.make_adjustments() emptyArgumentsPrintHelp(parser) # Retrieve parser args: args = parser.parse_args(namespace=TuningToolGridNamespace('prun')) args.setBExec( 'source ./buildthis.sh --grid --with-scipy --no-color || source ./buildthis.sh --grid --with-scipy --no-color' ) mainLogger = Logger.getModuleLogger(__name__, args.output_level) printArgs(args, mainLogger.debug) # Set primary dataset number of files: try: # The input files can be send via a text file to avoid very large command lines? mainLogger.info(("Retrieving files on the data container to separate " "the jobs accordingly to each tunned bin reagion.")) from rucio.client import DIDClient from rucio.common.exception import DataIdentifierNotFound didClient = DIDClient() parsedDataDS = args.grid__inDS.split(':') did = parsedDataDS[-1] if len(parsedDataDS) > 1: scope = parsedDataDS else: import re
parser.add_argument('--mergeOutput', action='store_const', required = False, default = True, const = True, dest = 'grid_mergeOutput', help = argparse.SUPPRESS) mainLogger = Logger.getModuleLogger(__name__) import sys if len(sys.argv)==1: parser.print_help() sys.exit(1) args = parser.parse_args( namespace = TuningToolGridNamespace('prun') ) mainLogger = Logger.getModuleLogger( __name__, args.output_level ) printArgs( args, mainLogger.debug ) args.grid_allowTaskDuplication = True # Set primary dataset number of files: import os.path user_scope = 'user.%s' % os.path.expandvars('$USER') try: # The input files can be send via a text file to avoid very large command lines? mainLogger.info(("Retrieving files on the data container to separate " "the jobs accordingly to each tunned bin region.")) from rucio.client import DIDClient from rucio.common.exception import DataIdentifierNotFound didClient = DIDClient() parsedDataDS = args.grid_inDS.split(':') did = parsedDataDS[-1]
# Treat special argument if len(args.reference) > 2: raise ValueError("--reference set to multiple values: %r", args.reference) if len(args.reference) is 1: args.reference.append(args.reference[0]) from RingerCore import Logger, LoggingLevel, printArgs, NotSet logger = Logger.getModuleLogger(__name__, args.output_level) from TuningTools import RingerOperation if RingerOperation.retrieve(args.operation) < 0 and not args.treePath: ValueError( "If operation is not set to Offline, it is needed to set the TreePath manually." ) printArgs(args, logger.debug) crossVal = NotSet if args.crossFile not in (None, NotSet): from TuningTools import CrossValidArchieve with CrossValidArchieve(args.crossFile) as CVArchieve: crossVal = CVArchieve del CVArchieve from TuningTools import createData createData( args.sgnInputFiles, args.bkgInputFiles, ringerOperation=args.operation, referenceSgn=args.reference[0],
def __init__(self, **kw): Logger.__init__(self, kw) printArgs(kw, self._debug) self._nSorts = None self._nBoxes = None self._nTrain = None self._nValid = None self._nTest = None self._seed = None self._method = CrossValidMethod.retrieve( retrieve_kw(kw, 'method', CrossValidMethod.Standard)) if self._method is CrossValidMethod.Standard: self._nSorts = retrieve_kw(kw, 'nSorts', 50) self._nBoxes = retrieve_kw(kw, 'nBoxes', 10) self._nTrain = retrieve_kw(kw, 'nTrain', 6) self._nValid = retrieve_kw(kw, 'nValid', 4) self._nTest = retrieve_kw( kw, 'nTest', self._nBoxes - (self._nTrain + self._nValid)) self._seed = retrieve_kw(kw, 'seed', None) checkForUnusedVars(kw, self._warning) # Check if variables are ok: if (not self._nTest is None) and self._nTest < 0: self._fatal("Number of test clusters is lesser than zero", ValueError) totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \ self._nTrain + self._nValid if totalSum != self._nBoxes: self._fatal( "Sum of train, validation and test boxes doesn't match.", ValueError) np.random.seed(self._seed) # Test number of possible combinations (N!/((N-K)!(K)!) is greater # than the required sorts. If number of sorts (greater than half of the # possible cases) is close to the number of combinations, generate all # possible combinations and then gather the number of needed sorts. # However, as calculating factorial can be heavy, we don't do this if the # number of boxes is large. self._sort_boxes_list = [] useRandomCreation = True from math import factorial if self._nBoxes < 201: totalPossibilities = ( factorial( self._nBoxes ) ) / \ ( factorial( self._nTrain ) * \ factorial( self._nValid ) * \ factorial( self._nTest ) ) if self._nSorts > (totalPossibilities / 2): useRandomCreation = False if useRandomCreation: count = 0 while True: random_boxes = np.random.permutation(self._nBoxes) random_boxes = tuple( chain( sorted(random_boxes[0:self._nTrain]), sorted(random_boxes[self._nTrain:self._nTrain + self._nValid]), sorted(random_boxes[self._nTrain + self._nValid:]))) # Make sure we are not appending same sort again: if not random_boxes in self._sort_boxes_list: self._sort_boxes_list.append(random_boxes) count += 1 if count == self._nSorts: break else: self._sort_boxes_list = list( combinations_taken_by_multiple_groups( range(self._nBoxes), (self._nTrain, self._nValid, self._nTest))) for i in range(totalPossibilities - self._nSorts): self._sort_boxes_list.pop( np.random.random_integers(0, totalPossibilities)) elif self._method is CrossValidMethod.JackKnife: self._nBoxes = retrieve_kw(kw, 'nBoxes', 10) checkForUnusedVars(kw, self._warning) self._nSorts = self._nBoxes self._nTrain = self._nBoxes - 1 self._nValid = 1 self._nTest = 0 self._sort_boxes_list = list( combinations_taken_by_multiple_groups(range(self._nBoxes), ( 9, 1, ))) elif self._method is CrossValidMethod.StratifiedKFold: self._nBoxes = retrieve_kw(kw, 'nBoxes', 10) self._shuffle = retrieve_kw(kw, 'shuffle', False) checkForUnusedVars(kw, self._logger.warning) self._nSorts = self._nBoxes self._nTrain = self._nBoxes - 1 self._nValid = 1 self._nTest = 0
parser.add_argument('--overrideOutputPlace', default = None, help = "If the job is submitted by another user, then it is needed to override the output place.") parser.add_argument('-i','--inputFolder', default = None, metavar='InputFolder', help = "Folder to loop upon files to retrieve configuration (only needed if using checkForMissingJobs is set.") parser.add_argument('-b','--bsubJobsQueue', default = None, metavar='JOBS-QUEUE', help = "Jobs from --logFile which pending or running on bsub and shouldn't be searched for the output file.") import sys if len(sys.argv)==1: parser.print_help() sys.exit(1) # Retrieve parser args: args = parser.parse_args() from RingerCore import printArgs, Logger logger = Logger.getModuleLogger(__name__) printArgs( args, logger.info ) if args.checkForMissingJobs and not args.inputFolder: logger.fatal("--checkForMissingJobs is set, please specify --inputFolder.") import os #os.system('rcSetup -u') oPlace = os.path.abspath(args.permanentOutputPlace) ofiles = [ os.path.join(oPlace,f) for f in os.listdir(oPlace) if os.path.isfile(os.path.join(oPlace,f)) ] import re executeLine = re.compile('.*Executing following command:') submissionLine = re.compile('(\s+env -i bsub -q )(\S*)( -u "" -J .+\\\\)') commandLine = re.compile('\s+\S+\s+\\\\') jobLine = re.compile('\s+--jobConfig\s+(\S+.n(\d+).sl(\d+).su(\d+).il(\d+).iu(\d+).pic)\s+\\\\') dataPlaceLine = re.compile('\s+--datasetPlace\s+(\S+)\s+\\\\')
## Treating special args: # Configuration conf_kw = {} if args.neuronBounds is not None: conf_kw['neuronBoundsCol'] = args.neuronBounds if args.sortBounds is not None: conf_kw['sortBoundsCol'] = args.sortBounds if args.initBounds is not None: conf_kw['initBoundsCol'] = args.initBounds if args.confFileList is not None: conf_kw['confFileList'] = args.confFileList # Binning from RingerCore import printArgs, NotSet, Logger, LoggingLevel if not(args.et_bins is NotSet) and len(args.et_bins) == 1: args.et_bins = args.et_bins[0] if not(args.eta_bins is NotSet) and len(args.eta_bins) == 1: args.eta_bins = args.eta_bins[0] logger = Logger.getModuleLogger( __name__, args.output_level ) printArgs( args, logger.debug ) compress = False if args.no_compress else True # Submit job: from TuningTools import TuningJob tuningJob = TuningJob() tuningJob( args.data, level = args.output_level, compress = compress, outputFileBase = args.outputFileBase, operationPoint = args.operation, # Cross validation args crossValidFile = args.crossFile, # Pre Processing
'-b', '--bsubJobsQueue', default=None, metavar='JOBS-QUEUE', help= "Jobs from --logFile which pending or running on bsub and shouldn't be searched for the output file." ) emptyArgumentsPrintHelp(parser) # Retrieve parser args: args = parser.parse_args() from RingerCore import printArgs, Logger logger = Logger.getModuleLogger(__name__) printArgs(args, logger.info) if args.checkForMissingJobs and not args.inputFolder: logger.fatal("--checkForMissingJobs is set, please specify --inputFolder.") import os #os.system('rcSetup -u') oPlace = os.path.abspath(args.permanentOutputPlace) ofiles = [ os.path.join(oPlace, f) for f in os.listdir(oPlace) if os.path.isfile(os.path.join(oPlace, f)) ] import re executeLine = re.compile('.*Executing following command:') submissionLine = re.compile('(\s+env -i bsub -q )(\S*)( -u "" -J .+\\\\)')
def __init__(self, **kw ): Logger.__init__( self, kw ) printArgs( kw, self._logger.debug ) self._nSorts = None self._nBoxes = None self._nTrain = None self._nValid = None self._nTest = None self._seed = None self._method = CrossValidMethod.retrieve( retrieve_kw( kw, 'method', CrossValidMethod.Standard ) ) if self._method is CrossValidMethod.Standard: self._nSorts = retrieve_kw( kw, 'nSorts', 50 ) self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 ) self._nTrain = retrieve_kw( kw, 'nTrain', 6 ) self._nValid = retrieve_kw( kw, 'nValid', 4 ) self._nTest = retrieve_kw( kw, 'nTest', self._nBoxes - ( self._nTrain + self._nValid ) ) self._seed = retrieve_kw( kw, 'seed', None ) checkForUnusedVars( kw, self._logger.warning ) # Check if variables are ok: if (not self._nTest is None) and self._nTest < 0: self._logger.fatal("Number of test clusters is lesser than zero", ValueError) totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \ self._nTrain + self._nValid if totalSum != self._nBoxes: self._logger.fatal("Sum of train, validation and test boxes doesn't match.", ValueError) np.random.seed(self._seed) # Test number of possible combinations (N!/((N-K)!(K)!) is greater # than the required sorts. If number of sorts (greater than half of the # possible cases) is close to the number of combinations, generate all # possible combinations and then gather the number of needed sorts. # However, as calculating factorial can be heavy, we don't do this if the # number of boxes is large. self._sort_boxes_list = [] useRandomCreation = True from math import factorial if self._nBoxes < 201: totalPossibilities = ( factorial( self._nBoxes ) ) / \ ( factorial( self._nTrain ) * \ factorial( self._nValid ) * \ factorial( self._nTest ) ) if self._nSorts > (totalPossibilities / 2): useRandomCreation = False if useRandomCreation: count = 0 while True: random_boxes = np.random.permutation(self._nBoxes) random_boxes = tuple(chain(sorted(random_boxes[0:self._nTrain]), sorted(random_boxes[self._nTrain:self._nTrain+self._nValid]), sorted(random_boxes[self._nTrain+self._nValid:]))) # Make sure we are not appending same sort again: if not random_boxes in self._sort_boxes_list: self._sort_boxes_list.append( random_boxes ) count += 1 if count == self._nSorts: break else: self._sort_boxes_list = list( combinations_taken_by_multiple_groups(range(self._nBoxes), (self._nTrain, self._nVal, self._nTest))) for i in range(totalPossibilities - self._nSorts): self._sort_boxes_list.pop( np.random_integers(0, totalPossibilities) ) elif self._method is CrossValidMethod.JackKnife: self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 ) checkForUnusedVars( kw, self._logger.warning ) self._nSorts = self._nBoxes self._nTrain = self._nBoxes - 1 self._nValid = 1 self._nTest = 0 self._sort_boxes_list = list( combinations_taken_by_multiple_groups(range(self._nBoxes), (9, 1,)) )