def testUpload(self): """ Test uploading to a crab server """ tb = UserTarball(name='default.tgz', logger=self.logger, config=testWMConfig) result = tb.upload() self.assertTrue(result['size'] > 0) self.assertTrue(len(result['hashkey']) > 0)
def testContext(self): """ Test the object out of context (after TarFile is closed) """ with UserTarball(name='default.tgz', logger=self.logger) as tb: self.tarBalls.append(tb.name) self.assertRaises(IOError, tb.addFiles) self.assertEqual(tarfile.GNU_FORMAT, tb.tarfile.format)
def testInit(self): """ Test constructor """ tb = UserTarball(name='default.tgz', logger=self.logger) self.assertEqual(os.path.basename(tb.name), 'default.tgz') self.tarBalls.append(tb.name)
def testGlob(self): """ Test globbing and extra files """ userFiles = ['%s/src/Module/Submodule/extra_*.txt' % (self.base), '%s/src/Module/Submodule/additional_file.txt' % (self.base)] tb = UserTarball(name='default.tgz', logger=self.logger) tb.addFiles(userFiles=userFiles) members = ['lib', 'lib/slc5_ia32_gcc434', 'lib/slc5_ia32_gcc434/libSomewhere.so', 'lib/slc5_ia32_gcc434/libSomething.so', 'src/Module/Submodule/data', 'src/Module/Submodule/data/datafile.txt', 'extra_file2.txt', 'extra_file.txt', 'additional_file.txt'] self.assertEqual(sorted(tb.getnames()), sorted(members)) self.tarBalls.append(tb.name)
def testAccess(self): """ Test accesses with __getattr__ to the underlying TarFile. This test really should be done with assertRaises as a context manager which is only available in python 2.7 """ tb = UserTarball(name='default.tgz', logger=self.logger) try: tb.doesNotExist() self.fail('Did not raise AttributeError') except AttributeError: pass try: x = tb.doesNotExistEither self.fail('Did not raise AttributeError') except AttributeError: pass
def testGlob(self): """ Test globbing and extra files """ userFiles = [ '%s/src/Module/Submodule/extra_*.txt' % (self.base), '%s/src/Module/Submodule/additional_file.txt' % (self.base) ] tb = UserTarball(name='default.tgz', logger=self.logger) tb.addFiles(userFiles=userFiles) members = [ 'lib', 'lib/slc5_ia32_gcc434', 'lib/slc5_ia32_gcc434/libSomewhere.so', 'lib/slc5_ia32_gcc434/libSomething.so', 'src/Module/Submodule/data', 'src/Module/Submodule/data/datafile.txt', 'extra_file2.txt', 'extra_file.txt', 'additional_file.txt' ] self.assertEqual(sorted(tb.getnames()), sorted(members)) self.tarBalls.append(tb.name)
def testMissingGlob(self): """ Test globbing and extra files """ userFiles = [ '%s/src/Module/Submodule/extra_*.txt' % (self.base), '%s/src/Module/Submodule/missing_file.txt' % (self.base) ] tb = UserTarball(name='default.tgz', logger=self.logger) self.assertRaises(InputFileNotFoundException, tb.addFiles, userFiles=userFiles) self.tarBalls.append(tb.name)
def testAddFiles(self): """ Test the basic tarball, no userfiles """ members = [ 'lib', 'lib/slc5_ia32_gcc434', 'lib/slc5_ia32_gcc434/libSomewhere.so', 'lib/slc5_ia32_gcc434/libSomething.so', 'src/Module/Submodule/data', 'src/Module/Submodule/data/datafile.txt', ] with UserTarball(name='default.tgz', logger=self.logger) as tb: self.tarBalls.append(tb.name) tb.addFiles() self.assertEqual(sorted(tb.getnames()), sorted(members))
def run(self, filecacheurl=None): """ Override run() for JobType """ configArguments = { 'addoutputfiles': [], 'adduserfiles': [], 'tfileoutfiles': [], 'edmoutfiles': [], } if getattr(self.config.Data, 'useParent', False) and getattr( self.config.Data, 'secondaryInputDataset', None): msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together." raise ConfigurationException(msg) # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({ 'jobarch': scram.getScramArch(), 'jobsw': scram.getCmsswVersion() }) # Build tarball if self.workdir: tarUUID = PandaInterface.wrappedUuidGen() self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID + 'default.tgz') cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE) else: raise EnvironmentException( 'Problem with uuidgen while preparing for Sandbox upload.') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset ## Create CMSSW config. self.logger.debug("self.config: %s" % (self.config)) self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName)) ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent ## in the sense that a second loading of the same pset may not produce the same ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW ## pset twice. However, some "complicated" psets seem to evade the caching. ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that ## it can be reused later if wanted (for example, in PrivateMC when checking if ## the pset has an LHE source) instead of having to load the pset again. ## As for what does "complicated" psets mean, Daniel Riley said that there are ## some psets where one module modifies the configuration from another module. self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## If there is a CMSSW pset, do a basic validation of it. if not bootstrapDone() and self.config.JobType.psetName: valid, msg = self.cmsswCfg.validateConfig() if not valid: raise ConfigurationException(msg) ## We need to put the pickled CMSSW configuration in the right place. ## Here, we determine if the bootstrap script already run and prepared everything ## for us. In such case we move the file, otherwise we pickle.dump the pset if not bootstrapDone(): # Write out CMSSW config self.cmsswCfg.writeFile(cfgOutputName) else: # Move the pickled and the configuration files created by the bootstrap script self.moveCfgFile(cfgOutputName) ## Interrogate the CMSSW pset for output files (only output files produced by ## PoolOutputModule or TFileService are identified automatically). Do this ## automatic detection even if JobType.disableAutomaticOutputCollection = True, ## so that we can still classify the output files in EDM, TFile and additional ## output files in the Task DB (and the job ad). ## TODO: Do we really need this classification at all? cmscp and PostJob read ## the FJR to know if an output file is EDM, TFile or other. edmfiles, tfiles = self.cmsswCfg.outputFiles() ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile ## output files that are not listed in JobType.outputFiles. if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): outputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) ] edmfiles = [file for file in edmfiles if file in outputFiles] tfiles = [file for file in tfiles if file in outputFiles] ## Get the list of additional output files that have to be collected as given ## in JobType.outputFiles, but remove duplicates listed already as EDM files or ## TFiles. addoutputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles + tfiles ] self.logger.debug( "The following EDM output files will be collected: %s" % edmfiles) self.logger.debug( "The following TFile output files will be collected: %s" % tfiles) self.logger.debug( "The following user output files will be collected: %s" % addoutputFiles) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) ## Give warning message in case no output file was detected in the CMSSW pset ## nor was any specified in the CRAB configuration. if not configArguments['edmoutfiles'] and not configArguments[ 'tfileoutfiles'] and not configArguments['addoutputfiles']: msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) if getattr( self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue( 'JobType.disableAutomaticOutputCollection')): msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration" msg += " and no output file was explicitly specified in the CRAB configuration." else: msg += " CRAB could not detect any output file in the CMSSW configuration" msg += " nor was any explicitly specified in the CRAB configuration." msg += " Hence CRAB will not collect any output file from this task." self.logger.warning(msg) ## UserTarball calls ScramEnvironment which can raise EnvironmentException. ## Since ScramEnvironment is already called above and the exception is not ## handled, we are sure that if we reached this point it will not raise EnvironmentException. ## But otherwise we should take this into account. with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', []) ] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [ os.path.basename(f) for f in inputFiles ] try: uploadResult = tb.upload(filecacheurl=filecacheurl) except HTTPException as hte: if 'X-Error-Info' in hte.headers: reason = hte.headers['X-Error-Info'] reason_re = re.compile( r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$' ) re_match = reason_re.match(reason) if re_match: ISBSize = int(re_match.group(1)) ISBSizeLimit = int(re_match.group(2)) reason = "%sError%s:" % (colors.RED, colors.NORMAL) reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % ( ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024) ISBContent = sorted(tb.content, reverse=True) biggestFileSize = ISBContent[0][0] ndigits = int( math.ceil(math.log(biggestFileSize + 1, 10))) reason += "\nInput sanbox content sorted by size[Bytes]:" for (size, name) in ISBContent: reason += ("\n%" + str(ndigits) + "s\t%s") % (size, name) raise ClientException(reason) raise hte except Exception as e: msg = ( "Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n" "More details can be found in %s" % (e, self.logger.logfile)) LOGGERS['CRAB3'].exception( msg) #the traceback is only printed into the logfile raise ClientException(msg) configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = "%s.tar.gz" % uploadResult self.logger.debug("Result uploading input files: %(cachefilename)s " % configArguments) # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug( "Attaching list of user-specified primary input files.") userFilesList = map(string.strip, userFilesList) userFilesList = [file for file in userFilesList if file] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s:" % (colors.RED, colors.NORMAL) msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries." msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) configArguments['primarydataset'] = getattr( self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles') lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name)) try: lumi_list = getLumiList(lumi_mask_name, logger=self.logger) except ValueError as ex: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex) raise ConfigurationException(msg) run_ranges = getattr(self.config.Data, 'runRange', None) if run_ranges: run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) if not lumi_list: msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null." raise ConfigurationException(msg) else: if len(run_list) > 50000: msg = "CRAB configuration parameter Data.runRange includes %s runs." % str( len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs=run_list) else: msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'" raise ConfigurationException(msg) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [ str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(' ', '') for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments
# Horrible hack to be able to import this from anywhere template_filename = os.path.join(os.environ['CMSSW_BASE'], 'src/UHH2/scripts/crab/crab_template.py') sys.path.append(os.path.dirname(template_filename)) from crab_template import config as dummy_config # # Create dummy config file for testing - only needs JobType info # dummy_config = config() # dummy_config.JobType.psetName = os.path.join(os.environ['CMSSW_BASE'], 'src/UHH2/core/python/ntuplewriter_data_2018.py') # dummy_config.JobType.inputFiles = [os.path.join(os.environ['CMSSW_BASE'], 'src/L1Prefiring/EventWeightProducer/files/L1PrefiringMaps_new.root')] # dummy_config.JobType.sendExternalFolder = True # needed for fastjet setup # Create tarball tarball_name = args.output with UserTarball(name=tarball_name, config=dummy_config, logger=logger) as tb: # We ignore the pset and pickle file as they are only a few MB, account for later inputFiles = [ re.sub(r'^file:', '', file) for file in getattr(dummy_config.JobType, 'inputFiles', []) ] tb.addFiles(userFiles=inputFiles) if not os.path.isfile(tarball_name): raise IOError("Tarball file %s was not produced" % tarball_name) if args.noSizeCheck: sys.exit(0) # Check size size_bytes = os.path.getsize(tarball_name)
def run(self, filecacheurl = None): """ Override run() for JobType """ configArguments = {'addoutputfiles' : [], 'adduserfiles' : [], 'tfileoutfiles' : [], 'edmoutfiles' : [], } # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({'jobarch' : scram.scramArch, 'jobsw' : scram.cmsswVersion, }) # Build tarball if self.workdir: tarUUID = PandaInterface.wrappedUuidGen() self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID +'default.tgz') cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py') else: raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset # configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None) # Create CMSSW config self.logger.debug("self.config: %s" % self.config) self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName) cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here. edmfiles, tfiles = cmsswCfg.outputFiles() addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles] self.logger.debug("The following EDM output files will be collected: %s" % edmfiles) self.logger.debug("The following TFile output files will be collected: %s" % tfiles) self.logger.debug("The following user output files will be collected: %s" % addoutputFiles) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) # Write out CMSSW config cmsswCfg.writeFile(cfgOutputName) ## UserTarball calls ScramEnvironment which can raise EnvironmentException. ## Since ScramEnvironment is already called above and the exception is not ## handled, we are sure that if we reached this point it will not raise EnvironmentException. ## But otherwise we should take this into account. with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles] uploadResults = tb.upload(filecacheurl = filecacheurl) self.logger.debug("Result uploading input files: %s " % str(uploadResults)) configArguments['cacheurl'] = filecacheurl configArguments['cachefilename'] = uploadResults[0] isbchecksum = uploadResults[1] # Upload list of user-defined input files to process as the primary input userFilesList = getattr(self.config.Data, 'userInputFiles', None) if userFilesList: self.logger.debug("Attaching list of user-specified primary input files.") userFilesList = map(string.strip, userFilesList) userFilesList = [file for file in userFilesList if file] if len(userFilesList) != len(set(userFilesList)): msg = "%sWarning%s: CRAB configuration parameter Data.userInputFiles contains duplicated entries." % (colors.RED, colors.NORMAL) msg += " Duplicated entries will be removed." self.logger.warning(msg) configArguments['userfiles'] = set(userFilesList) ## Get the user-specified primary dataset name. primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles') # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar" primaryDataset = "/" + os.path.join(*primaryDataset.split("/")) if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset): self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset)) configArguments['inputdata'] = primaryDataset lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name) lumi_list = getLumiList(lumi_mask_name, logger = self.logger) run_ranges = getattr(self.config.Data, 'runRange', None) run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) else: if len(run_list) > 50000: msg = "Data.runRange includes %s runs." % str(len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs = run_list) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments, isbchecksum
def run(self, requestConfig): """ Override run() for JobType """ configArguments = { 'addoutputfiles': [], 'adduserfiles': [], 'tfileoutfiles': [], 'edmoutfiles': [], } # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({ 'jobarch': scram.scramArch, 'jobsw': scram.cmsswVersion, }) # Build tarball if self.workdir: tarFilename = os.path.join( self.workdir, PandaInterface.wrappedUuidGen() + 'default.tgz') cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') #configArguments['userisburl'] = 'https://'+ self.config.General.ufccacheUrl + '/crabcache/file?hashkey=' + uploadResults['hashkey']#XXX hardcoded #configArguments['userisburl'] = 'INSERTuserisburl'#XXX hardcoded if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset # configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None) # Create CMSSW config self.logger.debug("self.config: %s" % self.config) self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName) cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) # Interogate CMSSW config and user config for output file names, for now no use for edmFiles or TFiles here. analysisFiles, edmFiles = cmsswCfg.outputFiles() self.logger.debug("TFiles %s and EDM Files %s will be collected" % (analysisFiles, edmFiles)) configArguments['tfileoutfiles'] = analysisFiles configArguments['edmoutfiles'] = edmFiles outputFiles = getattr(self.config.JobType, 'outputFiles', []) self.logger.debug("User files %s will be collected" % outputFiles) configArguments['addoutputfiles'].extend(outputFiles) # Write out CMSSW config cmsswCfg.writeFile(cfgOutputName) with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = getattr(self.config.JobType, 'inputFiles', []) tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [ os.path.basename(f) for f in inputFiles ] uploadResults = tb.upload() self.logger.debug("Result uploading input files: %s " % str(uploadResults)) configArguments['cachefilename'] = uploadResults[1] configArguments['cacheurl'] = uploadResults[0] isbchecksum = uploadResults[2] # Upload lumi mask if it exists lumiMaskName = getattr(self.config.Data, 'lumiMask', None) if lumiMaskName: self.logger.debug("Attaching lumi mask %s to the request" % lumiMaskName) lumiDict = getLumiMask(config=self.config, logger=self.logger) configArguments['runs'] = lumiDict.keys() #for each run we'll encode the lumis as a string representing a list of integers #[[1,2],[5,5]] ==> '1,2,5,5' configArguments['lumis'] = [ str(reduce(lambda x,y: x+y, \ lumiDict[run]))[1:-1].replace(' ','') \ for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments, isbchecksum
def run(self, requestConfig): """ Override run() for JobType """ configArguments = { 'addoutputfiles': [], 'adduserfiles': [], 'tfileoutfiles': [], 'edmoutfiles': [], } # Get SCRAM environment scram = ScramEnvironment(logger=self.logger) configArguments.update({ 'jobarch': scram.scramArch, 'jobsw': scram.cmsswVersion, }) # Build tarball if self.workdir: tarUUID = PandaInterface.wrappedUuidGen() self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID) if len(tarUUID): tarFilename = os.path.join(self.workdir, tarUUID + 'default.tgz') cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py') else: raise EnvironmentException( 'Problem with uuidgen while preparing for Sandbox upload.') else: _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz') _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py') #configArguments['userisburl'] = 'https://'+ self.config.General.ufccacheUrl + '/crabcache/file?hashkey=' + uploadResults['hashkey']#XXX hardcoded #configArguments['userisburl'] = 'INSERTuserisburl'#XXX hardcoded if getattr(self.config.Data, 'inputDataset', None): configArguments['inputdata'] = self.config.Data.inputDataset # configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None) # Create CMSSW config self.logger.debug("self.config: %s" % self.config) self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName) cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName) ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here. edmfiles, tfiles = cmsswCfg.outputFiles() addoutputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles + tfiles ] self.logger.debug( "The following EDM output files will be collected: %s" % edmfiles) self.logger.debug( "The following TFile output files will be collected: %s" % tfiles) self.logger.debug( "The following user output files will be collected: %s" % addoutputFiles) configArguments['edmoutfiles'] = edmfiles configArguments['tfileoutfiles'] = tfiles configArguments['addoutputfiles'].extend(addoutputFiles) # Write out CMSSW config cmsswCfg.writeFile(cfgOutputName) with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb: inputFiles = [ re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', []) ] tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName) configArguments['adduserfiles'] = [ os.path.basename(f) for f in inputFiles ] uploadResults = tb.upload() self.logger.debug("Result uploading input files: %s " % str(uploadResults)) configArguments['cachefilename'] = uploadResults[1] configArguments['cacheurl'] = uploadResults[0] isbchecksum = uploadResults[2] # Upload list of user-defined input files to process as the primary input userFileName = getattr(self.config.Data, 'userInputFile', None) if userFileName: self.logger.debug( "Attaching a list of user-specified primary input files from %s." % userFileName) fnames = [] for fname in open(userFileName).readlines(): fnames.append(fname.strip()) configArguments['userfiles'] = filter( lambda x: x, fnames) #removing whitelines and empty objects primDS = getattr(self.config.Data, 'primaryDataset', None) if primDS: # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar" primDS = "/" + os.path.join(*primDS.split("/")) if not re.match("/%(primDS)s.*" % lfnParts, primDS): self.logger.warning( "Invalid primary dataset name %s for private MC; publishing may fail" % primDS) configArguments['inputdata'] = primDS else: configArguments['inputdata'] = getattr(self.config.Data, 'inputDataset', '/CRAB_UserFiles') lumi_mask_name = getattr(self.config.Data, 'lumiMask', None) lumi_list = None if lumi_mask_name: self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name) lumi_list = getLumiList(lumi_mask_name, logger=self.logger) run_ranges = getattr(self.config.Data, 'runRange', None) run_ranges_is_valid = run_ranges is not None and isinstance( run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges) if run_ranges_is_valid: run_list = getRunList(run_ranges) if lumi_list: lumi_list.selectRuns(run_list) else: if len(run_list) > 50000: msg = "Data.runRange includes %s runs." % str( len(run_list)) msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs." raise ConfigurationException(msg) lumi_list = LumiList(runs=run_list) if lumi_list: configArguments['runs'] = lumi_list.getRuns() ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5' lumi_mask = lumi_list.getCompactList() configArguments['lumis'] = [ str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(' ', '') for run in configArguments['runs'] ] configArguments['jobtype'] = 'Analysis' return tarFilename, configArguments, isbchecksum