"CMSSW_8_0_0" : "CMSSW_8_0_6", "CMSSW_8_0_1" : "CMSSW_8_0_6", "CMSSW_8_0_2" : "CMSSW_8_0_6", "CMSSW_8_0_3" : "CMSSW_8_0_6", "CMSSW_8_0_4" : "CMSSW_8_0_6", "CMSSW_8_0_5" : "CMSSW_8_0_6" } #set default repack settings for bulk streams addRepackConfig(tier0Config, "Default", proc_ver = defaultProcVersion, maxSizeSingleLumi = 12 * 1024 * 1024 * 1024, maxSizeMultiLumi = 8 * 1024 * 1024 * 1024, minInputSize = 2.1 * 1024 * 1024 * 1024, maxInputSize = 4 * 1024 * 1024 * 1024, maxEdmSize = 12 * 1024 * 1024 * 1024, maxOverSize = 8 * 1024 * 1024 * 1024, maxInputEvents = 250 * 1000, maxInputFiles = 1000, maxLatency = 24 * 3600, blockCloseDelay = 1200, versionOverride = repackVersionOverride) addDataset(tier0Config, "Default", do_reco = False, write_reco = True, write_aod = True, write_miniaod = True, write_dqm = False, reco_delay = defaultRecoTimeout, reco_delay_offset = defaultRecoLockTimeout, reco_split = defaultRecoSplitting, proc_version = defaultProcVersion, cmssw_version = defaultCMSSWVersion,
# setup repack and express version mappings repackVersionOverride = { } expressVersionOverride = { "CMSSW_5_2_7" : "CMSSW_5_3_14", } hltmonVersionOverride = { "CMSSW_5_2_7" : "CMSSW_5_3_8", } addRepackConfig(tier0Config, "Default", proc_ver = 1, maxSizeSingleLumi = 1234, maxSizeMultiLumi = 1122, minInputSize = 210, maxInputSize = 400, maxEdmSize = 1233, maxOverSize = 1133, maxInputEvents = 500, maxInputFiles = 1111, versionOverride = repackVersionOverride) addExpressConfig(tier0Config, "Express", scenario = "pp", multicore = 4, data_tiers = [ "FEVT", "ALCARECO", "DQM" ], maxInputRate = 1234, maxInputEvents = 123, maxInputSize = 123456789, maxInputFiles = 1234, maxLatency = 12 * 23,
"CMSSW_5_2_9" : "CMSSW_5_3_20" } expressVersionOverride = { "CMSSW_5_2_7" : "CMSSW_5_3_20", "CMSSW_5_2_8" : "CMSSW_5_3_20", "CMSSW_5_2_9" : "CMSSW_5_3_20" } #set default repack settings for bulk streams addRepackConfig(tier0Config, "Default", proc_ver = defaultProcVersion, maxSizeSingleLumi = 10 * 1024 * 1024 * 1024, maxSizeMultiLumi = 8 * 1024 * 1024 * 1024, minInputSize = 2.1 * 1024 * 1024 * 1024, maxInputSize = 4 * 1024 * 1024 * 1024, maxEdmSize = 10 * 1024 * 1024 * 1024, maxOverSize = 8 * 1024 * 1024 * 1024, maxInputEvents = 3 * 1000 * 1000, maxInputFiles = 1000, maxLatency = 24 * 3600, blockCloseDelay = 1200, versionOverride = repackVersionOverride) addDataset(tier0Config, "Default", do_reco = False, write_reco = True, write_aod = True, write_miniaod = True, write_dqm = True, reco_delay = defaultRecoTimeout, reco_delay_offset = defaultRecoLockTimeout, reco_split = defaultRecoSplitting, proc_version = defaultProcVersion, cmssw_version = defaultCMSSWVersion,
# setup repack and express version mappings repackVersionOverride = {} expressVersionOverride = { "CMSSW_5_2_7": "CMSSW_5_3_14", } hltmonVersionOverride = { "CMSSW_5_2_7": "CMSSW_5_3_8", } addRepackConfig(tier0Config, "Default", proc_ver=1, maxSizeSingleLumi=1234, maxSizeMultiLumi=1122, minInputSize=210, maxInputSize=400, maxEdmSize=1233, maxOverSize=1133, maxInputEvents=500, maxInputFiles=1111, versionOverride=repackVersionOverride) addExpressConfig(tier0Config, "Express", scenario="pp", multicore=4, data_tiers=["FEVT"], write_dqm=True, maxInputRate=1234, maxInputEvents=123, maxInputSize=123456789,
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory( classname="RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction=False) if len(datasets) == 0: raise RuntimeError( "Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory( classname="RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory( classname="RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory( classname="RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory( classname="RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory( classname="RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory( classname="RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory( classname="RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory( classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = { 'RUN': run, 'STREAM': stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory( classname="RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction=False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Repack.CMSSWVersion}) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE': streamConfig.Repack.MinInputSize, 'MAX_SIZE': streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize, 'MAX_EVENTS': streamConfig.Repack.MaxInputEvents, 'MAX_FILES': streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay, 'CMSSW': streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append({'PRIMDS': specialDataset}) bindsStreamDataset.append({ 'RUN': run, 'PRIMDS': specialDataset, 'STREAM': stream }) bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset} bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': specialDataset, 'SCENARIO': streamConfig.Express.Scenario }) if streamConfig.Express.WriteDQM: outputModuleDetails.append({ 'dataTier': tier0Config.Global.DQMDataTier, 'eventContent': tier0Config.Global.DQMDataTier, 'primaryDataset': specialDataset }) bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode}) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': specialDataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': specialDataset }) alcaSkim = None if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append({ 'dataTier': "ALCARECO", 'eventContent': "ALCARECO", 'primaryDataset': specialDataset }) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN': run, 'STREAM': stream, 'NUM_PRODUCER': numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.CMSSWVersion}) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.RecoCMSSWVersion}) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Express.ProcessingVersion, 'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers), 'WRITE_DQM': streamConfig.Express.WriteDQM, 'GLOBAL_TAG': streamConfig.Express.GlobalTag, 'MAX_RATE': streamConfig.Express.MaxInputRate, 'MAX_EVENTS': streamConfig.Express.MaxInputEvents, 'MAX_SIZE': streamConfig.Express.MaxInputSize, 'MAX_FILES': streamConfig.Express.MaxInputFiles, 'MAX_LATENCY': streamConfig.Express.MaxLatency, 'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay, 'CMSSW': streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Express.ScramArch, 'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch, 'MULTICORE': streamConfig.Express.Multicore, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory( classname="RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute( run, stream, transaction=False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [216120, 216125, 216130]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[ datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append({ 'dataTier': "RAW", 'eventContent': "ALL", 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': datasetConfig.ArchivalNode, 'TAPE_NODE': datasetConfig.TapeNode, 'DISK_NODE': datasetConfig.DiskNode }) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( {'NODE': datasetConfig.ArchivalNode}) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append({'NODE': datasetConfig.TapeNode}) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append({'NODE': datasetConfig.DiskNode}) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RAW" }) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': "%s-Error" % dataset, 'dataTier': "RAW" }) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in ["ALCARECO", "DQM", "DQMIO"]: outputModuleDetails.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': dataset }) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments[ 'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments[ 'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments[ 'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments[ 'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments[ 'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments[ 'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments[ 'ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments[ 'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments[ 'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments[ 'MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[ 'backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments[ 'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments[ 'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in ['Bulk', 'Express']: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments[ 'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute( bindsPromptCalibration, conn=myThread.transaction.conn, transaction=True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute( bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn=myThread.transaction.conn, transaction=True) insertStreamStyleDAO.execute(bindsStreamStyle, conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: insertStreamFilesetDAO.execute(run, stream, filesetName, conn=myThread.transaction.conn, transaction=True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute( [fileset.id], conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping( ).items(): bindsRecoReleaseConfig.append({ 'RUN': run, 'PRIMDS': primds, 'FILESET': fileset, 'RECODELAY': promptRecoDelay[primds], 'RECODELAYOFFSET': promptRecoDelayOffset[primds] }) insertRecoReleaseConfigDAO.execute( bindsRecoReleaseConfig, conn=myThread.transaction.conn, transaction=True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute( [workflowName], injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False) if len(datasets) == 0: raise RuntimeError("Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } ) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay, 'CMSSW' : streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream, 'NUM_PRODUCER' : numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } ) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } ) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_RATE' : streamConfig.Express.MaxInputRate, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay, 'CMSSW' : streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Express.ScramArch, 'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch, 'MULTICORE' : streamConfig.Express.Multicore, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [ 216120, 216125, 216130 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : datasetConfig.ArchivalNode, 'TAPE_NODE' : datasetConfig.TapeNode, 'DISK_NODE' : datasetConfig.DiskNode } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } ) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } ) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RAW" } ) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : "%s-Error" % dataset, 'dataTier' : "RAW" } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
conditionUploadTimeout = 18*3600, dropboxHost = "webcondvm.cern.ch", validationMode = True) # setup repack and express version mappings repackVersionOverride = { } expressVersionOverride = { "CMSSW_4_2_7" : "CMSSW_4_2_8_patch6", } hltmonVersionOverride = { "CMSSW_4_2_7" : "CMSSW_4_2_8_patch7", } addRepackConfig(tier0Config, "Default", proc_ver = 1, versionOverride = repackVersionOverride) addExpressConfig(tier0Config, "Express", scenario = "pp", data_tiers = [ "FEVT", "ALCARECO", "DQM" ], alca_producers = [ "SiStripCalZeroBias", "PromptCalibProd" ], global_tag = "GlobalTag1", proc_ver = 2, versionOverride = expressVersionOverride) addExpressConfig(tier0Config, "HLTMON", scenario = "cosmics", data_tiers = [ "FEVTHLTALL" ], global_tag = "GlobalTag2", proc_ver = 3,
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") updateStreamOverrideDAO = daoFactory(classname = "RunConfig.UpdateStreamOverride") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsStreamOverride = {} bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = { 'Express' : [], 'Bulk' : [] } # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) if "PromptCalibProd" in streamConfig.Express.AlcaSkims: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } overrideVersion = streamConfig.VersionOverride.get(onlineVersion, None) if overrideVersion != None: bindsCMSSWVersion.append( { 'VERSION' : overrideVersion } ) bindsStreamOverride = { "RUN" : run, "STREAM" : stream, "OVERRIDE" : overrideVersion } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.CustodialNode != None: custodialSites.append(datasetConfig.CustodialNode) requestOnly = "y" if datasetConfig.CustodialAutoApprove: requestOnly = "n" autoApproveSites.append(datasetConfig.CustodialNode) bindsStorageNode.append( { 'NODE' : datasetConfig.CustodialNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.CustodialNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : requestOnly, 'PRIO' : datasetConfig.CustodialPriority } ) if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.ArchivalNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : datasetConfig.CustodialPriority } ) if len(custodialSites) + len(nonCustodialSites) > 0: subscriptions['Bulk'].append( { 'custodialSites' : custodialSites, 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : datasetConfig.CustodialPriority, 'primaryDataset' : dataset } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = getRepackArguments() specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "%s/t0temp/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) specArguments['MergedLFNBase'] = "%s/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = getExpressArguments() specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['ProcScenario'] = streamConfig.Express.Scenario specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['UnmergedLFNBase'] = "%s/t0temp/express" % runInfo['lfn_prefix'] specArguments['MergedLFNBase'] = "%s/express" % runInfo['lfn_prefix'] specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['CMSSWVersion'] = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/afs/cern.ch/cms/SITECONF/T0_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" if streamConfig.ProcessingStyle == "Bulk": wmSpec = repackWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions['Bulk']: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": wmSpec = expressWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions['Express']: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamOverride) > 0: updateStreamOverrideDAO.execute(bindsStreamOverride, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return