parser = ArgumentParser(
    description=
    'Retrieve performance information from the Cross-Validation method on the GRID.',
    parents=[crossValStatsJobParser, parentParser, ioGridParser, loggerParser],
    conflict_handler='resolve')
parser.make_adjustments()

emptyArgumentsPrintHelp(parser)

# Retrieve parser args:
args = parser.parse_args(namespace=TuningToolGridNamespace('prun'))
args.setBExec(
    'source ./buildthis.sh --grid --with-scipy --no-color || source ./buildthis.sh --grid --with-scipy --no-color'
)
mainLogger = Logger.getModuleLogger(__name__, args.output_level)
printArgs(args, mainLogger.debug)

# Set primary dataset number of files:
try:
    # The input files can be send via a text file to avoid very large command lines?
    mainLogger.info(("Retrieving files on the data container to separate "
                     "the jobs accordingly to each tunned bin reagion."))
    from rucio.client import DIDClient
    from rucio.common.exception import DataIdentifierNotFound
    didClient = DIDClient()
    parsedDataDS = args.grid__inDS.split(':')
    did = parsedDataDS[-1]
    if len(parsedDataDS) > 1:
        scope = parsedDataDS
    else:
        import re
Esempio n. 2
0
parser.add_argument('--mergeOutput', action='store_const',
    required = False, default = True, const = True, 
    dest = 'grid_mergeOutput',
    help = argparse.SUPPRESS)

mainLogger = Logger.getModuleLogger(__name__)

import sys
if len(sys.argv)==1:
  parser.print_help()
  sys.exit(1)

args = parser.parse_args( namespace = TuningToolGridNamespace('prun') )

mainLogger = Logger.getModuleLogger( __name__, args.output_level )
printArgs( args, mainLogger.debug )

args.grid_allowTaskDuplication = True

# Set primary dataset number of files:
import os.path
user_scope = 'user.%s' % os.path.expandvars('$USER')
try:
  # The input files can be send via a text file to avoid very large command lines?
  mainLogger.info(("Retrieving files on the data container to separate "
                  "the jobs accordingly to each tunned bin region."))
  from rucio.client import DIDClient
  from rucio.common.exception import DataIdentifierNotFound
  didClient = DIDClient()
  parsedDataDS = args.grid_inDS.split(':')
  did = parsedDataDS[-1]
Esempio n. 3
0
# Treat special argument
if len(args.reference) > 2:
    raise ValueError("--reference set to multiple values: %r", args.reference)
if len(args.reference) is 1:
    args.reference.append(args.reference[0])
from RingerCore import Logger, LoggingLevel, printArgs, NotSet

logger = Logger.getModuleLogger(__name__, args.output_level)

from TuningTools import RingerOperation
if RingerOperation.retrieve(args.operation) < 0 and not args.treePath:
    ValueError(
        "If operation is not set to Offline, it is needed to set the TreePath manually."
    )

printArgs(args, logger.debug)

crossVal = NotSet
if args.crossFile not in (None, NotSet):
    from TuningTools import CrossValidArchieve
    with CrossValidArchieve(args.crossFile) as CVArchieve:
        crossVal = CVArchieve
    del CVArchieve

from TuningTools import createData

createData(
    args.sgnInputFiles,
    args.bkgInputFiles,
    ringerOperation=args.operation,
    referenceSgn=args.reference[0],
Esempio n. 4
0
    def __init__(self, **kw):
        Logger.__init__(self, kw)
        printArgs(kw, self._debug)
        self._nSorts = None
        self._nBoxes = None
        self._nTrain = None
        self._nValid = None
        self._nTest = None
        self._seed = None
        self._method = CrossValidMethod.retrieve(
            retrieve_kw(kw, 'method', CrossValidMethod.Standard))

        if self._method is CrossValidMethod.Standard:
            self._nSorts = retrieve_kw(kw, 'nSorts', 50)
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            self._nTrain = retrieve_kw(kw, 'nTrain', 6)
            self._nValid = retrieve_kw(kw, 'nValid', 4)
            self._nTest = retrieve_kw(
                kw, 'nTest', self._nBoxes - (self._nTrain + self._nValid))
            self._seed = retrieve_kw(kw, 'seed', None)
            checkForUnusedVars(kw, self._warning)
            # Check if variables are ok:
            if (not self._nTest is None) and self._nTest < 0:
                self._fatal("Number of test clusters is lesser than zero",
                            ValueError)
            totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \
                       self._nTrain + self._nValid
            if totalSum != self._nBoxes:
                self._fatal(
                    "Sum of train, validation and test boxes doesn't match.",
                    ValueError)

            np.random.seed(self._seed)

            # Test number of possible combinations (N!/((N-K)!(K)!) is greater
            # than the required sorts. If number of sorts (greater than half of the
            # possible cases) is close to the number of combinations, generate all
            # possible combinations and then gather the number of needed sorts.
            # However, as calculating factorial can be heavy, we don't do this if the
            # number of boxes is large.
            self._sort_boxes_list = []
            useRandomCreation = True
            from math import factorial
            if self._nBoxes < 201:
                totalPossibilities = ( factorial( self._nBoxes ) ) / \
                    ( factorial( self._nTrain ) * \
                      factorial( self._nValid ) * \
                      factorial( self._nTest  ) )
                if self._nSorts > (totalPossibilities / 2):
                    useRandomCreation = False
            if useRandomCreation:
                count = 0
                while True:
                    random_boxes = np.random.permutation(self._nBoxes)
                    random_boxes = tuple(
                        chain(
                            sorted(random_boxes[0:self._nTrain]),
                            sorted(random_boxes[self._nTrain:self._nTrain +
                                                self._nValid]),
                            sorted(random_boxes[self._nTrain +
                                                self._nValid:])))
                    # Make sure we are not appending same sort again:
                    if not random_boxes in self._sort_boxes_list:
                        self._sort_boxes_list.append(random_boxes)
                        count += 1
                        if count == self._nSorts:
                            break
            else:
                self._sort_boxes_list = list(
                    combinations_taken_by_multiple_groups(
                        range(self._nBoxes),
                        (self._nTrain, self._nValid, self._nTest)))
                for i in range(totalPossibilities - self._nSorts):
                    self._sort_boxes_list.pop(
                        np.random.random_integers(0, totalPossibilities))
        elif self._method is CrossValidMethod.JackKnife:
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            checkForUnusedVars(kw, self._warning)
            self._nSorts = self._nBoxes
            self._nTrain = self._nBoxes - 1
            self._nValid = 1
            self._nTest = 0
            self._sort_boxes_list = list(
                combinations_taken_by_multiple_groups(range(self._nBoxes), (
                    9,
                    1,
                )))
        elif self._method is CrossValidMethod.StratifiedKFold:
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            self._shuffle = retrieve_kw(kw, 'shuffle', False)
            checkForUnusedVars(kw, self._logger.warning)
            self._nSorts = self._nBoxes
            self._nTrain = self._nBoxes - 1
            self._nValid = 1
            self._nTest = 0
Esempio n. 5
0
parser.add_argument('--overrideOutputPlace',  default = None,
    help = "If the job is submitted by another user, then it is needed to override the output place.")
parser.add_argument('-i','--inputFolder',  default = None, metavar='InputFolder', 
    help = "Folder to loop upon files to retrieve configuration (only needed if using checkForMissingJobs is set.")
parser.add_argument('-b','--bsubJobsQueue',  default = None, metavar='JOBS-QUEUE', 
    help = "Jobs from --logFile which pending or running on bsub and shouldn't be searched for the output file.")
import sys
if len(sys.argv)==1:
  parser.print_help()
  sys.exit(1)
# Retrieve parser args:
args = parser.parse_args()

from RingerCore import printArgs, Logger
logger = Logger.getModuleLogger(__name__)
printArgs( args, logger.info )

if args.checkForMissingJobs and not args.inputFolder:
  logger.fatal("--checkForMissingJobs is set, please specify --inputFolder.")

import os
#os.system('rcSetup -u')
oPlace = os.path.abspath(args.permanentOutputPlace)
ofiles = [ os.path.join(oPlace,f) for f in os.listdir(oPlace) if os.path.isfile(os.path.join(oPlace,f)) ]

import re
executeLine = re.compile('.*Executing following command:')
submissionLine = re.compile('(\s+env -i bsub -q )(\S*)( -u "" -J .+\\\\)')
commandLine = re.compile('\s+\S+\s+\\\\')
jobLine = re.compile('\s+--jobConfig\s+(\S+.n(\d+).sl(\d+).su(\d+).il(\d+).iu(\d+).pic)\s+\\\\')
dataPlaceLine = re.compile('\s+--datasetPlace\s+(\S+)\s+\\\\')
Esempio n. 6
0
## Treating special args:
# Configuration
conf_kw = {}
if args.neuronBounds is not None: conf_kw['neuronBoundsCol'] = args.neuronBounds
if args.sortBounds   is not None: conf_kw['sortBoundsCol']   = args.sortBounds
if args.initBounds   is not None: conf_kw['initBoundsCol']   = args.initBounds
if args.confFileList is not None: conf_kw['confFileList']    = args.confFileList
# Binning
from RingerCore import printArgs, NotSet, Logger, LoggingLevel
if not(args.et_bins is NotSet) and len(args.et_bins)  == 1: args.et_bins  = args.et_bins[0]
if not(args.eta_bins is NotSet) and len(args.eta_bins) == 1: args.eta_bins = args.eta_bins[0]

logger = Logger.getModuleLogger( __name__, args.output_level )

printArgs( args, logger.debug )

compress = False if args.no_compress else True

# Submit job:
from TuningTools import TuningJob
tuningJob = TuningJob()
tuningJob( 
           args.data, 
           level          = args.output_level,
					 compress       = compress,
					 outputFileBase = args.outputFileBase,
           operationPoint = args.operation,
           # Cross validation args
					 crossValidFile = args.crossFile,
           # Pre Processing
Esempio n. 7
0
    '-b',
    '--bsubJobsQueue',
    default=None,
    metavar='JOBS-QUEUE',
    help=
    "Jobs from --logFile which pending or running on bsub and shouldn't be searched for the output file."
)

emptyArgumentsPrintHelp(parser)

# Retrieve parser args:
args = parser.parse_args()

from RingerCore import printArgs, Logger
logger = Logger.getModuleLogger(__name__)
printArgs(args, logger.info)

if args.checkForMissingJobs and not args.inputFolder:
    logger.fatal("--checkForMissingJobs is set, please specify --inputFolder.")

import os
#os.system('rcSetup -u')
oPlace = os.path.abspath(args.permanentOutputPlace)
ofiles = [
    os.path.join(oPlace, f) for f in os.listdir(oPlace)
    if os.path.isfile(os.path.join(oPlace, f))
]

import re
executeLine = re.compile('.*Executing following command:')
submissionLine = re.compile('(\s+env -i bsub -q )(\S*)( -u "" -J .+\\\\)')
Esempio n. 8
0
  def __init__(self, **kw ):
    Logger.__init__( self, kw  )
    printArgs( kw, self._logger.debug  )
    self._nSorts = None
    self._nBoxes = None
    self._nTrain = None
    self._nValid = None
    self._nTest  = None
    self._seed   = None
    self._method = CrossValidMethod.retrieve( retrieve_kw( kw, 'method', CrossValidMethod.Standard ) )

    if self._method is CrossValidMethod.Standard:
      self._nSorts = retrieve_kw( kw, 'nSorts', 50 )
      self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 )
      self._nTrain = retrieve_kw( kw, 'nTrain', 6  )
      self._nValid = retrieve_kw( kw, 'nValid', 4  )
      self._nTest  = retrieve_kw( kw, 'nTest',  self._nBoxes - ( self._nTrain + self._nValid ) )
      self._seed   = retrieve_kw( kw, 'seed',   None )
      checkForUnusedVars( kw, self._logger.warning )
      # Check if variables are ok:
      if (not self._nTest is None) and self._nTest < 0:
        self._logger.fatal("Number of test clusters is lesser than zero", ValueError)
      totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \
                 self._nTrain + self._nValid
      if totalSum != self._nBoxes:
        self._logger.fatal("Sum of train, validation and test boxes doesn't match.", ValueError)

      np.random.seed(self._seed)

      # Test number of possible combinations (N!/((N-K)!(K)!) is greater
      # than the required sorts. If number of sorts (greater than half of the
      # possible cases) is close to the number of combinations, generate all
      # possible combinations and then gather the number of needed sorts.
      # However, as calculating factorial can be heavy, we don't do this if the
      # number of boxes is large.
      self._sort_boxes_list = []
      useRandomCreation = True
      from math import factorial
      if self._nBoxes < 201:
        totalPossibilities = ( factorial( self._nBoxes ) ) / \
            ( factorial( self._nTrain ) * \
              factorial( self._nValid ) * \
              factorial( self._nTest  ) )
        if self._nSorts > (totalPossibilities / 2):
          useRandomCreation = False
      if useRandomCreation:
        count = 0
        while True:
          random_boxes = np.random.permutation(self._nBoxes)
          random_boxes = tuple(chain(sorted(random_boxes[0:self._nTrain]),
                          sorted(random_boxes[self._nTrain:self._nTrain+self._nValid]),
                          sorted(random_boxes[self._nTrain+self._nValid:])))
          # Make sure we are not appending same sort again:
          if not random_boxes in self._sort_boxes_list:
            self._sort_boxes_list.append( random_boxes )
            count += 1
            if count == self._nSorts:
              break
      else:
        self._sort_boxes_list = list(
            combinations_taken_by_multiple_groups(range(self._nBoxes),
                                                  (self._nTrain, 
                                                   self._nVal, 
                                                   self._nTest)))
        for i in range(totalPossibilities - self._nSorts):
          self._sort_boxes_list.pop( np.random_integers(0, totalPossibilities) )
    elif self._method is CrossValidMethod.JackKnife:
      self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 )
      checkForUnusedVars( kw, self._logger.warning )
      self._nSorts = self._nBoxes
      self._nTrain = self._nBoxes - 1
      self._nValid = 1
      self._nTest  = 0
      self._sort_boxes_list = list(
          combinations_taken_by_multiple_groups(range(self._nBoxes), (9, 1,)) )