Exemple #1
0
def wmbsSubscriptionStatus(logger, dbi, conn, transaction):
    """Function to return status of wmbs subscriptions
    """
    action = DAOFactory(package = 'WMBS',
                        logger = logger,
                        dbinterface = dbi)('Monitoring.SubscriptionStatus')
    return action.execute(conn = conn,
                          transaction = transaction)
Exemple #2
0
def availableScheddSlots(dbi, logger=logging, condorFraction=1):
    """
    check executing jobs and compare with condor limit.
    return the difference -- executing jobs - (condor limit * condorFraction)
    """
    action = DAOFactory(package='WMCore.WMBS',
                        logger=logger,
                        dbinterface=dbi)(classname="Jobs.GetCountByState")
    executingJobs = int(action.execute("executing"))

    maxScheddJobs = getScheddParamValue("MAX_JOBS_PER_OWNER")

    if maxScheddJobs is None:
        logger.warning("Failed to retrieve 'MAX_JOBS_PER_OWNER' from HTCondor")
        return 0

    freeSubmitSlots = int(int(maxScheddJobs) * condorFraction - executingJobs)
    return freeSubmitSlots
Exemple #3
0
    def algorithm(self, groupInstance = None, jobInstance = None,
                  *args, **kwargs):
        """
        _algorithm_

        A file based splitting algorithm

        """
        # extract some global scheduling parameters
        self.jobNamePrefix = kwargs.get('jobNamePrefix', "RepackMerge")
        self.minInputSize = kwargs['minInputSize']
        self.maxInputSize = kwargs['maxInputSize']
        self.maxInputEvents = kwargs['maxInputEvents']
        self.maxInputFiles = kwargs['maxInputFiles']
        self.maxEdmSize = kwargs['maxEdmSize']
        self.maxOverSize = kwargs['maxOverSize']
        self.maxLatency = kwargs['maxLatency']

        # catch configuration errors
        if self.maxOverSize > self.maxEdmSize:
            self.maxOverSize = self.maxEdmSize

        self.currentTime = time.time()

        self.createdGroup = False

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        # data discovery
        getAvailableFilesDAO = daoFactory(classname = "Subscriptions.GetAvailableRepackMergeFiles")
        availableFiles = getAvailableFilesDAO.execute(self.subscription["id"])

        # nothing to do, stop immediately
        if len(availableFiles) == 0:
            return

        # data discovery for already used lumis
        getUsedLumisDAO = daoFactory(classname = "Subscriptions.GetUsedLumis")
        usedLumis = getUsedLumisDAO.execute(self.subscription["id"], True)

        # empty lumis (as declared by StorageManager) are treated the
        # same way as used lumis, ie. we process around them
        getEmptyLumisDAO = daoFactory(classname = "Subscriptions.GetLumiHolesForRepackMerge")
        usedLumis |= getEmptyLumisDAO.execute(self.subscription["id"])

        # sort available files by lumi
        availableFileLumiDict = {}
        for result in availableFiles:
            for lumi in range(result['first_lumi'], 1+result['last_lumi']):
                if lumi not in availableFileLumiDict:
                    availableFileLumiDict[lumi] = []
                if lumi == result['first_lumi']:
                    availableFileLumiDict[lumi].append(result)

        # loop through lumis in order
        haveLumiHole = False
        filesByLumi = {}
        maxUsedLumi = max(usedLumis) if usedLumis else 0
        for lumi in range(1, 1+max(maxUsedLumi,max(availableFileLumiDict.keys()))):

            # lumi contains data => remember it for potential processing
            if lumi in availableFileLumiDict:

                filesByLumi[lumi] = availableFileLumiDict[lumi]

            # lumi is used and we have data => trigger processing
            elif lumi in usedLumis:

                if len(filesByLumi) > 0:

                    if haveLumiHole:
                        # if lumi hole check for maxLatency first
                        if self.getDataAge(filesByLumi) > self.maxLatency:
                            self.defineJobs(filesByLumi, True)
                            filesByLumi = {}
                        # if maxLatency not met ignore data for now
                        else:
                            filesByLumi = {}
                    else:
                        self.defineJobs(filesByLumi, True)
                        filesByLumi = {}

                # if we had a lumi hole it is now not relevant anymore
                # the next data will have a used lumi in front of it
                haveLumiHole = False

            # lumi has no data and isn't used, ie. we have a lumi hole
            # also has an impact on how to handle later data
            else:

                if len(filesByLumi) > 0:

                    # forceClose if maxLatency trigger is met
                    if self.getDataAge(filesByLumi) > self.maxLatency:
                        self.defineJobs(filesByLumi, True)
                        filesByLumi = {}
                    # follow the normal thresholds, but only if
                    # there is no lumi hole in front of the data
                    elif not haveLumiHole:
                        self.defineJobs(filesByLumi, False)
                        filesByLumi = {}
                    # otherwise ignore the data for now
                    else:
                        filesByLumi = {}

                haveLumiHole = True

        # now handle whatever data is still left (at the high end of the lumi range)
        if haveLumiHole:
            if self.getDataAge(filesByLumi) > self.maxLatency:
                self.defineJobs(filesByLumi, True)
        else:
            fileset = self.subscription.getFileset()
            fileset.load()
            self.defineJobs(filesByLumi, not fileset.open)

        return
Exemple #4
0
    def __init__(self, config):
        BaseWorkerThread.__init__(self)
        myThread = threading.currentThread()

        #DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package = "WMCore.WMBS", \
                                     logger = logging,
                                     dbinterface = myThread.dbi)
        self.config = config

        #Libraries
        self.resourceControl = ResourceControl()

        self.changeState = ChangeState(self.config)
        self.repollCount = getattr(self.config.JobSubmitter, 'repollCount',
                                   10000)

        # BossAir
        self.bossAir = BossAirAPI(config=self.config)

        # Additions for caching-based JobSubmitter
        self.workflowTimestamps = {}
        self.workflowPrios = {}
        self.cachedJobIDs = set()
        self.cachedJobs = {}
        self.jobDataCache = {}
        self.jobsToPackage = {}
        self.sandboxPackage = {}
        self.siteKeys = {}
        self.locationDict = {}
        self.cmsNames = {}
        self.drainSites = set()
        self.abortSites = set()
        self.sortedSites = []
        self.packageSize = getattr(self.config.JobSubmitter, 'packageSize',
                                   500)
        self.collSize = getattr(self.config.JobSubmitter, 'collectionSize',
                                self.packageSize * 1000)

        # initialize the alert framework (if available)
        self.initAlerts(compName="JobSubmitter")

        try:
            if not getattr(self.config.JobSubmitter, 'submitDir', None):
                self.config.JobSubmitter.submitDir = self.config.JobSubmitter.componentDir
            self.packageDir = os.path.join(self.config.JobSubmitter.submitDir,
                                           'packages')

            if not os.path.exists(self.packageDir):
                os.makedirs(self.packageDir)
        except Exception, ex:
            msg = "Error while trying to create packageDir %s\n!"
            msg += str(ex)
            logging.error(msg)
            self.sendAlert(6, msg=msg)
            try:
                logging.debug("PackageDir: %s" % self.packageDir)
                logging.debug("Config: %s" % config)
            except:
                pass
            raise JobSubmitterPollerException(msg)
Exemple #5
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024

        return
Exemple #6
0
    def __init__(self, config):

        self.config = config

        BasePlugin.__init__(self, config)

        self.locationDict = {}

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS", logger = myThread.logger,
                                dbinterface = myThread.dbi)
        self.locationAction = daoFactory(classname = "Locations.GetSiteInfo")


        self.packageDir = None

        if os.path.exists(os.path.join(getWMBASE(),
                                       'src/python/WMCore/WMRuntime/Unpacker.py')):
            self.unpacker = os.path.join(getWMBASE(),
                                         'src/python/WMCore/WMRuntime/Unpacker.py')
        else:
            self.unpacker = os.path.join(getWMBASE(),
                                         'WMCore/WMRuntime/Unpacker.py')

        self.agent         = getattr(config.Agent, 'agentName', 'WMAgent')
        self.sandbox       = None
        self.scriptFile    = None
        self.submitDir     = None
        self.removeTime    = getattr(config.BossAir, 'removeTime', 60)
        self.useGSite      = getattr(config.BossAir, 'useGLIDEINSites', False)
        self.submitWMSMode = getattr(config.BossAir, 'submitWMSMode', False)
        self.errorThreshold= getattr(config.BossAir, 'submitErrorThreshold', 10)
        self.errorCount    = 0
        self.defaultTaskPriority = getattr(config.BossAir, 'defaultTaskPriority', 0)
        self.maxTaskPriority     = getattr(config.BossAir, 'maxTaskPriority', 1e7)

        # Required for global pool accounting
        self.acctGroup = getattr(config.BossAir, 'acctGroup', "production")
        self.acctGroupUser = getattr(config.BossAir, 'acctGroupUser', "cmsdataops")

        # Build ourselves a pool
        self.pool     = []
        self.input    = None
        self.result   = None
        self.nProcess = getattr(self.config.BossAir, 'nCondorProcesses', 4)

        # Set up my proxy and glexec stuff
        self.setupScript = getattr(config.BossAir, 'UISetupScript', None)
        self.proxy       = None
        self.serverCert  = getattr(config.BossAir, 'delegatedServerCert', None)
        self.serverKey   = getattr(config.BossAir, 'delegatedServerKey', None)
        self.myproxySrv  = getattr(config.BossAir, 'myproxyServer', None)
        self.proxyDir    = getattr(config.BossAir, 'proxyDir', '/tmp/')
        self.serverHash  = getattr(config.BossAir, 'delegatedServerHash', None)
        self.glexecPath  = getattr(config.BossAir, 'glexecPath', None)
        self.glexecWrapScript = getattr(config.BossAir, 'glexecWrapScript', None)
        self.glexecUnwrapScript = getattr(config.BossAir, 'glexecUnwrapScript', None)
        self.jdlProxyFile    = None # Proxy name to put in JDL (owned by submit user)
        self.glexecProxyFile = None # Copy of same file owned by submit user

        if self.glexecPath:
            if not (self.myproxySrv and self.proxyDir):
                raise WMException('glexec requires myproxyServer and proxyDir to be set.')
        if self.myproxySrv:
            if not (self.serverCert and self.serverKey):
                raise WMException('MyProxy server requires serverCert and serverKey to be set.')

        # Make the directory for the proxies
        if self.proxyDir and not os.path.exists(self.proxyDir):
            logging.debug("proxyDir not found: creating it.")
            try:
                os.makedirs(self.proxyDir, 0o1777)
            except Exception as ex:
                msg = "Error: problem when creating proxyDir directory - '%s'" % str(ex)
                raise BossAirPluginException(msg)
        elif not os.path.isdir(self.proxyDir):
            msg = "Error: proxyDir '%s' is not a directory" % self.proxyDir
            raise BossAirPluginException(msg)

        if self.serverCert and self.serverKey and self.myproxySrv:
            self.proxy = self.setupMyProxy()

        # Build a request string
        self.reqStr = "(Memory >= 1 && OpSys == \"LINUX\" ) && (Arch == \"INTEL\" || Arch == \"X86_64\") && stringListMember(GLIDEIN_CMSSite, DESIRED_Sites) && ((REQUIRED_OS==\"any\") || (GLIDEIN_REQUIRED_OS==REQUIRED_OS))"
        if hasattr(config.BossAir, 'condorRequirementsString'):
            self.reqStr = config.BossAir.condorRequirementsString

        return
Exemple #7
0
    def algorithm(self, groupInstance = None, jobInstance = None,
                  *args, **kwargs):
        """
        _algorithm_

        A file based splitting algorithm

        """
        # extract some global scheduling parameters
        self.jobNamePrefix = kwargs.get('jobNamePrefix', "Repack")

        self.maxSizeSingleLumi = kwargs['maxSizeSingleLumi']
        self.maxSizeMultiLumi = kwargs['maxSizeMultiLumi']

        self.maxInputEvents = kwargs['maxInputEvents']
        self.maxInputFiles = kwargs['maxInputFiles']

        self.createdGroup = False

        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        maxLumiWithJobDAO = daoFactory(classname = "Subscriptions.MaxLumiWithJob")
        getClosedEmptyLumisDAO = daoFactory(classname = "JobSplitting.GetClosedEmptyLumis")

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")

        # data discovery
        getFilesDAO = daoFactory(classname = "Subscriptions.GetAvailableRepackFiles")
        availableFiles = getFilesDAO.execute(self.subscription["id"])

        # nothing to do, stop immediately
        if len(availableFiles) == 0:
            return

        # lumis we have data for
        lumiList = set([])
        for result in availableFiles:
            lumiList.add(result['lumi'])
        lumiList = sorted(list(lumiList))

        # highest lumi with a job
        maxLumiWithJob = 0
        if lumiList[0] > 1:
            maxLumiWithJob = maxLumiWithJobDAO.execute(self.subscription["id"])

        # consistency check
        if lumiList[0] <= maxLumiWithJob:
            logging.error("ERROR: finding data that can't be there, bailing out...")
            return

        # do we have lumi holes ?
        detectEmptyLumis = False
        lumi = maxLumiWithJob + 1
        while lumi in lumiList:
            lumi += 1
        if lumi < lumiList[-1]:
            detectEmptyLumis = True

        # empty and closed lumis
        emptyLumis = []
        if detectEmptyLumis:
            emptyLumis = getClosedEmptyLumisDAO.execute(self.subscription["id"], maxLumiWithJob)

        # figure out lumi range to create jobs for
        streamersByLumi = {}
        firstLumi = maxLumiWithJob + 1
        lastLumi = lumiList[-1]
        for lumi in range(firstLumi, lastLumi + 1):
            if (lumi in lumiList) or (lumi in emptyLumis):
                streamersByLumi[lumi] = []
            else:
                break

        # figure out what data to create jobs for
        for fileInfo in availableFiles:
            lumi = fileInfo['lumi']
            if streamersByLumi.has_key(lumi):
                streamersByLumi[lumi].append(fileInfo)

        # check if fileset is closed
        fileset = self.subscription.getFileset()
        fileset.load()

        self.defineJobs(streamersByLumi, fileset.open, memoryRequirement)

        return
    def stuffDatabase(self):
        """
        _stuffDatabase_

        Fill the dbsbuffer with some files and blocks.  We'll insert a total
        of 5 files spanning two blocks.  There will be a total of two datasets
        inserted into the database.

        All files will be already in GLOBAL and in_phedex
        """
        myThread = threading.currentThread()

        buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                    logger=myThread.logger,
                                    dbinterface=myThread.dbi)
        insertWorkflow = buffer3Factory(classname="InsertWorkflow")
        insertWorkflow.execute("BogusRequestA", "BogusTask",
                               0, 0, 0, 0)
        insertWorkflow.execute("BogusRequestB", "BogusTask",
                               0, 0, 0, 0)

        checksums = {"adler32": "1234", "cksum": "5678"}
        testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath(self.testDatasetA)
        testFileA.addRun(Run(2, *[45]))
        testFileA.create()

        testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileB.setDatasetPath(self.testDatasetA)
        testFileB.addRun(Run(2, *[45]))
        testFileB.create()

        testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileC.setDatasetPath(self.testDatasetA)
        testFileC.addRun(Run(2, *[45]))
        testFileC.create()

        self.testFilesA.append(testFileA)
        self.testFilesA.append(testFileB)
        self.testFilesA.append(testFileC)

        testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileD.setDatasetPath(self.testDatasetB)
        testFileD.addRun(Run(2, *[45]))
        testFileD.create()

        testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileE.setDatasetPath(self.testDatasetB)
        testFileE.addRun(Run(2, *[45]))
        testFileE.create()

        self.testFilesB.append(testFileD)
        self.testFilesB.append(testFileE)

        uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)
        datasetAction = uploadFactory(classname="NewDataset")
        createAction = uploadFactory(classname="CreateBlocks")

        datasetAction.execute(datasetPath=self.testDatasetA)
        datasetAction.execute(datasetPath=self.testDatasetB)

        self.blockAName = self.testDatasetA + "#" + makeUUID()
        self.blockBName = self.testDatasetB + "#" + makeUUID()

        newBlockA = DBSBufferBlock(name=self.blockAName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockA.setDataset(self.testDatasetA, 'data', 'VALID')
        newBlockA.status = 'Closed'

        newBlockB = DBSBufferBlock(name=self.blockBName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockB.setDataset(self.testDatasetB, 'data', 'VALID')
        newBlockB.status = 'Closed'

        createAction.execute(blocks=[newBlockA, newBlockB])

        bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)

        setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock")
        setBlock.execute(testFileA["lfn"], self.blockAName)
        setBlock.execute(testFileB["lfn"], self.blockAName)
        setBlock.execute(testFileC["lfn"], self.blockAName)
        setBlock.execute(testFileD["lfn"], self.blockBName)
        setBlock.execute(testFileE["lfn"], self.blockBName)

        fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus")
        fileStatus.execute(testFileA["lfn"], "GLOBAL")
        fileStatus.execute(testFileB["lfn"], "GLOBAL")
        fileStatus.execute(testFileC["lfn"], "GLOBAL")
        fileStatus.execute(testFileD["lfn"], "GLOBAL")
        fileStatus.execute(testFileE["lfn"], "GLOBAL")

        phedexStatus = bufferFactory(classname="DBSBufferFiles.SetPhEDExStatus")
        phedexStatus.execute(testFileA["lfn"], 1)
        phedexStatus.execute(testFileB["lfn"], 1)
        phedexStatus.execute(testFileC["lfn"], 1)
        phedexStatus.execute(testFileD["lfn"], 1)
        phedexStatus.execute(testFileE["lfn"], 1)

        associateWorkflow = buffer3Factory(classname="DBSBufferFiles.AssociateWorkflowToFile")
        associateWorkflow.execute(testFileA["lfn"], "BogusRequestA", "BogusTask")
        associateWorkflow.execute(testFileB["lfn"], "BogusRequestA", "BogusTask")
        associateWorkflow.execute(testFileC["lfn"], "BogusRequestA", "BogusTask")
        associateWorkflow.execute(testFileD["lfn"], "BogusRequestB", "BogusTask")
        associateWorkflow.execute(testFileE["lfn"], "BogusRequestB", "BogusTask")

        # Make the desired subscriptions
        insertSubAction = buffer3Factory(classname="NewSubscription")
        datasetA = DBSBufferDataset(path=self.testDatasetA)
        datasetB = DBSBufferDataset(path=self.testDatasetB)
        workload = WMWorkloadHelper()
        workload.load(os.path.join(getTestBase(), 'WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl'))
        insertSubAction.execute(datasetA.exists(), workload.getSubscriptionInformation()[self.testDatasetA])
        insertSubAction.execute(datasetB.exists(), workload.getSubscriptionInformation()[self.testDatasetB])

        return
Exemple #9
0
    def __init__(self,
                 wmSpec,
                 taskName,
                 blockName=None,
                 mask=None,
                 cachepath='.'):
        """
        _init_

        Initialize DAOs and other things needed.
        """
        self.block = blockName
        self.mask = mask
        self.wmSpec = wmSpec
        self.topLevelTask = wmSpec.getTask(taskName)
        self.cachepath = cachepath
        self.isDBS = True

        self.topLevelFileset = None
        self.topLevelSubscription = None
        self.topLevelTaskDBSBufferId = None

        self.mergeOutputMapping = {}

        # Initiate the pieces you need to run your own DAOs
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        # DAOs from WMBS for file commit
        self.setParentage = self.daofactory(classname="Files.SetParentage")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationForWorkQueue")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset")
        self.getLocations = self.daofactory(classname="Locations.ListSites")
        self.getLocationInfo = self.daofactory(
            classname="Locations.GetSiteInfo")

        # DAOs from DBSBuffer
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")

        # Added for file creation bookkeeping
        self.dbsFilesToCreate = []
        self.addedLocations = []
        self.wmbsFilesToCreate = []
        self.insertedBogusDataset = -1

        return
Exemple #10
0
def injectNewData(dbInterfaceStorageManager,
                  dbInterfaceHltConf,
                  dbInterfaceSMNotify,
                  streamerPNN,
                  minRun=None,
                  maxRun=None,
                  injectRun=None):
    """
    _injectNewData_

    Replaces the old-style file notification injecton into the Tier0.

    Queries the StorageManager database for new data and injects it into the Tier0.

    These queries will find duplicates, ie. data that was already found and
    processed in a previous polling cycle. Code has to be robust against that.

    Needs to be passed the PNN on which streamer files are located

    """
    logging.debug("injectNewData()")
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package="T0.WMBS",
                            logger=logging,
                            dbinterface=myThread.dbi)

    daoFactoryStorageManager = DAOFactory(
        package="T0.WMBS",
        logger=logging,
        dbinterface=dbInterfaceStorageManager)

    daoFactoryHltConf = DAOFactory(package="T0.WMBS",
                                   logger=logging,
                                   dbinterface=dbInterfaceHltConf)

    if dbInterfaceSMNotify:
        daoFactorySMNotify = DAOFactory(package="T0.WMBS",
                                        logger=logging,
                                        dbinterface=dbInterfaceSMNotify)
        insertFileStatusDAO = daoFactorySMNotify(
            classname="SMNotification.InsertOfflineFileStatus")

    getNewDataDAO = daoFactoryStorageManager(
        classname="StorageManager.GetNewData")
    getRunInfoDAO = daoFactoryHltConf(classname="StorageManager.GetRunInfo")
    insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
    insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
    insertCMSSWVersionDAO = daoFactory(
        classname="RunConfig.InsertCMSSWVersion")
    insertStreamCMSSWVersionDAO = daoFactory(
        classname="RunConfig.InsertStreamCMSSWVersion")
    insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
    insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer")

    newData = getNewDataDAO.execute(minRun=minRun,
                                    maxRun=maxRun,
                                    injectRun=injectRun,
                                    transaction=False)

    # remove already processed files
    newData[:] = [
        newFile for newFile in newData
        if newFile['p5_id'] not in knownStreamers
    ]

    logging.debug("StoragemanagerAPI: found %d new files", len(newData))

    newRuns = set()
    newRunStreams = {}
    for newFile in newData:

        run = newFile['run']
        stream = newFile['stream']

        newRuns.add(newFile['run'])

        if run not in newRunStreams:
            newRunStreams[run] = set()
        if stream not in newRunStreams[run]:
            newRunStreams[run].add(stream)

    logging.debug("StoragemanagerAPI: found %d new runs", len(newRuns))

    cmsswVersions = set()
    streams = set()
    bindRunHltKey = []
    bindRunStreamCMSSW = []
    for run in sorted(list(newRuns)):
        (hltkey, cmssw) = getRunInfoDAO.execute(run=run, transaction=False)
        logging.debug("StorageManagerAPI: run = %d, hltkey = %s, cmssw = %s",
                      run, hltkey, cmssw)
        if hltkey and cmssw:
            cmssw = '_'.join(
                cmssw.split('_')[0:4])  # only consider base release
            cmsswVersions.add(cmssw)
            bindRunHltKey.append({'RUN': run, 'HLTKEY': hltkey})
            for stream in newRunStreams[run]:
                streams.add(stream)
                bindRunStreamCMSSW.append({
                    'RUN': run,
                    'STREAM': stream,
                    'VERSION': cmssw
                })
        else:
            # can't retrieve hltkey and cmssw for run, ignore any data for it
            newRuns.remove(run)

    if len(bindRunHltKey) > 0:
        insertRunDAO.execute(binds=bindRunHltKey, transaction=False)

    bindStream = []
    for stream in streams:
        bindStream.append({'STREAM': stream})
    if len(bindStream) > 0:
        insertStreamDAO.execute(binds=bindStream, transaction=False)

    bindCMSSW = []
    for cmssw in cmsswVersions:
        bindCMSSW.append({'VERSION': cmssw})
    if len(bindCMSSW) > 0:
        insertCMSSWVersionDAO.execute(binds=bindCMSSW, transaction=False)

    if len(bindRunStreamCMSSW) > 0:
        insertStreamCMSSWVersionDAO.execute(binds=bindRunStreamCMSSW,
                                            transaction=False)

    lumis = set()
    bindStreamer = []
    bindInsertFileStatus = []
    for newFile in newData:

        run = newFile['run']

        if run not in newRuns:
            continue

        lumi = newFile['lumi']
        lumis.add((run, lumi))

        if newFile[
                'filename'] == 'run289461_ls0020_streamExpressCosmics_StorageManager.dat':
            newFile[
                'path'] = '/store/t0streamer/Data/ExpressCosmics/000/289/461'

        bindStreamer.append({
            'LFN': newFile['path'] + '/' + newFile['filename'],
            'P5_ID': newFile['p5_id'],
            'RUN': run,
            'LUMI': lumi,
            'STREAM': newFile['stream'],
            'FILESIZE': newFile['filesize'],
            'EVENTS': newFile['events'],
            'TIME': int(time.time())
        })

        if dbInterfaceSMNotify:
            bindInsertFileStatus.append({
                'P5_ID': newFile['p5_id'],
                'FILENAME': newFile['filename']
            })

    bindLumi = []
    for lumi in lumis:
        bindLumi.append({'RUN': lumi[0], 'LUMI': lumi[1]})
    if len(bindLumi) > 0:
        insertLumiDAO.execute(binds=bindLumi, transaction=False)

    if len(bindStreamer) > 0:
        insertStreamerDAO.execute(streamerPNN,
                                  binds=bindStreamer,
                                  transaction=False)

    if len(bindInsertFileStatus) > 0:
        insertFileStatusDAO.execute(bindInsertFileStatus, transaction=False)

    for x in bindStreamer:
        knownStreamers.add(x['P5_ID'])

    return
Exemple #11
0
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """
        global LOCK

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        locationNew = daofactory(classname="Locations.New")
        getFileLoc = daofactory(classname="Files.GetLocation")


        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        startRun = (filesetToProcess.name).split(":")[3]
        fileType = (filesetToProcess.name).split(":")[2]

        # url builder
        primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1]
        processedDataset = ((
            filesetToProcess.name).split(":")[0]).split('/')[2]
        dataTier = (((filesetToProcess.name\
            ).split(":")[0]).split('/')[3]).split('-')[0]

        # Fisrt call to T0 db for this fileset
        # Here add test for the closed fileset
        LASTIME = filesetToProcess.lastUpdate

        url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \
              (fileType, LASTIME, primaryDataset,processedDataset, dataTier)

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url="vocms52.cern.ch:8889")
                requestResult = myRequester.get(\
             url+"/"+"?return_type=text/json%2Bdas")
                newFilesList = requestResult[0]["results"]

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0ASTRun queries done ...")
            now = time.time()
            filesetToProcess.last_update = now
            LASTIME = int(newFilesList['end_time']) + 1

            break

        # process all files
        if len(newFilesList['files']):

            LOCK.acquire()

            try:
                locationNew.execute(siteName="caf.cern.ch",
                                    seName="caf.cern.ch")
            except Exception as e:
                logging.debug("Error when adding new location...")
                logging.debug(e)
                logging.debug(format_exc())

            for files in newFilesList['files']:

                # Assume parents aren't asked
                newfile = File(str(files['lfn']), \
           size = files['file_size'], events = files['events'])

                try:
                    if newfile.exists() == False:
                        newfile.create()

                    else:
                        newfile.loadData()

                    #Add run test if already exist
                    for run in files['runs']:

                        if startRun != 'None' and int(startRun) <= int(run):

                            # ToDo: Distinguish between
                            # filestA-RunX and filesetA-Run[0-9]*
                            filesetRun = Fileset( name = (((\
                   filesetToProcess.name).split(':')[0]).split('/')[0]\
                   )+'/'+(((filesetToProcess.name).split(':')[0]).split\
                   ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\
                   ).split('/')[2])+'/'+((((filesetToProcess.name).split\
                   (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\
               (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \
                                     ) )

                            if filesetRun.exists() == False:
                                filesetRun.create()

                            else:
                                filesetRun.loadData()

                            # Add test runs already there
                            # (for growing dataset) -
                            # to support file with different runs and lumi
                            if not newfile['runs']:

                                runSet = set()
                                runSet.add(Run(run, *files['runs'][run]))
                                newfile.addRunSet(runSet)

                            fileLoc = getFileLoc.execute(file=files['lfn'])

                            if 'caf.cern.ch' not in fileLoc:
                                newfile.setLocation("caf.cern.ch")

                            filesetRun.addFile(newfile)
                            logging.debug(
                                "new file created/loaded added by T0ASTRun...")
                            filesetRun.commit()

                except Exception as e:

                    logging.debug("Error when adding new files in T0ASTRun...")
                    logging.debug(e)
                    logging.debug(format_exc())



                filesetToProcess.setLastUpdate\
              (int(newFilesList['end_time']) + 1)
                filesetToProcess.commit()

            LOCK.release()

        else:

            logging.debug("nothing to do...")
            # For re-opned fileset or empty, try until the purge time
            if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime:

                filesetToProcess.setLastUpdate(time.time())
                filesetToProcess.commit()

        if LASTIME:

            myRequester = JSONRequests(url="vocms52.cern.ch:8889")
            requestResult = myRequester.get("/tier0/runs")

            for listRun in requestResult[0]:

                if int(startRun) <= int(listRun['run']):

                    if listRun['status'] =='CloseOutExport' or \
           listRun['status'] =='Complete' or listRun['status'] ==\
                          'CloseOutT1Skimming':

                        closeFileset = Fileset( name = (((\
      filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\
     (((filesetToProcess.name).split(':')[0]).split('/')[1]\
     )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\
     [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\
     ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\
     +":"+":".join((filesetToProcess.name).split(':')[1:] ) )

                        if closeFileset.exists() != False:

                            closeFileset = Fileset(id=closeFileset.exists())
                            closeFileset.loadData()

                            if closeFileset.open == True:
                                closeFileset.markOpen(False)

        # Commit the fileset
        filesetToProcess.commit()

        # Commit the fileset
        logging.debug("Test purge in T0ASTRun ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")

        filesetToProcess.commit()
Exemple #12
0
    def testPublishJSONCreate(self):
        """
        Re-run testA_BasicFunctionTest with data in DBSBuffer
        Make sure files are generated
        """

        # Set up uploading and write them elsewhere since the test deletes them.
        self.uploadPublishInfo = True
        self.uploadPublishDir = self.testDir

        # Insert some DBSFiles
        testFileChildA = DBSBufferFile(lfn="/this/is/a/child/lfnA",
                                       size=1024,
                                       events=20)
        testFileChildA.setAlgorithm(appName="cmsRun",
                                    appVer="CMSSW_2_1_8",
                                    appFam="RECO",
                                    psetHash="GIBBERISH",
                                    configContent="MOREGIBBERISH")
        testFileChildB = DBSBufferFile(lfn="/this/is/a/child/lfnB",
                                       size=1024,
                                       events=20)
        testFileChildB.setAlgorithm(appName="cmsRun",
                                    appVer="CMSSW_2_1_8",
                                    appFam="RECO",
                                    psetHash="GIBBERISH",
                                    configContent="MOREGIBBERISH")
        testFileChildC = DBSBufferFile(lfn="/this/is/a/child/lfnC",
                                       size=1024,
                                       events=20)
        testFileChildC.setAlgorithm(appName="cmsRun",
                                    appVer="CMSSW_2_1_8",
                                    appFam="RECO",
                                    psetHash="GIBBERISH",
                                    configContent="MOREGIBBERISH")

        testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER")
        testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER")
        testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER")

        testFileChildA.create()
        testFileChildB.create()
        testFileChildC.create()

        testFile = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10)
        testFile.setAlgorithm(appName="cmsRun",
                              appVer="CMSSW_2_1_8",
                              appFam="RECO",
                              psetHash="GIBBERISH",
                              configContent="MOREGIBBERISH")
        testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFile.create()

        testFileChildA.addParents([testFile["lfn"]])
        testFileChildB.addParents([testFile["lfn"]])
        testFileChildC.addParents([testFile["lfn"]])

        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)
        self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")
        workflowID = self.insertWorkflow.execute(
            requestName='TestWorkload',
            taskPath='TestWorkload/Analysis',
            blockMaxCloseTime=100,
            blockMaxFiles=100,
            blockMaxEvents=100,
            blockMaxSize=100)
        myThread.dbi.processData(
            "update dbsbuffer_file set workflow=1 where id < 4")

        # Run the test again
        self.testA_BasicFunctionTest()

        # Reset default values
        self.uploadPublishInfo = False
        self.uploadPublishDir = None

        # Make sure the files are there
        self.assertTrue(
            os.path.exists(
                os.path.join(self.testDir, 'TestWorkload_publish.json')))
        self.assertTrue(
            os.path.getsize(
                os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100)
        self.assertTrue(
            os.path.exists(
                os.path.join(self.testDir, 'TestWorkload_publish.tgz')))

        return
Exemple #13
0
def uploadConditions(username, password, serviceProxy):
    """
    _uploadConditions_

    Called by Tier0Feeder in every polling cycle

    Determine PCL status incl. files for upload for all
    run/stream combos that are not finished yet.

    Loop through the runs, uploading files for all
    streams. If the run/stream  upload subscription
    is finished, mark that run/stream PCL as finished.

    Terminate the loop on the first run that has
    not completely finished streams, but only
    within a certain timeout based on the runs
    end time (either from the EoR record or based
    on the insertion time of the last streamer file).

    """
    logging.debug("uploadConditions()")
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package="T0.WMBS",
                            logger=logging,
                            dbinterface=myThread.dbi)

    getConditionsDAO = daoFactory(classname="ConditionUpload.GetConditions")
    completeFilesDAO = daoFactory(classname="ConditionUpload.CompleteFiles")

    finishPCLforEmptyExpressDAO = daoFactory(
        classname="ConditionUpload.FinishPCLforEmptyExpress")

    isPromptCalibrationFinishedDAO = daoFactory(
        classname="ConditionUpload.IsPromptCalibrationFinished")
    markPromptCalibrationFinishedDAO = daoFactory(
        classname="ConditionUpload.MarkPromptCalibrationFinished")

    # look at all runs which are finished with conditions uploads
    # check for late arriving payloads and upload them
    conditions = getConditionsDAO.execute(finished=True, transaction=False)

    for (index, run) in enumerate(sorted(conditions.keys()), 1):

        dropboxHost = conditions[run]['dropboxHost']
        validationMode = conditions[run]['validationMode']

        for streamid, uploadableFiles in conditions[run]['streams'].items():

            if len(uploadableFiles) > 0:

                uploadedFiles = uploadToDropbox(uploadableFiles, dropboxHost,
                                                validationMode, username,
                                                password, serviceProxy)

                if len(uploadedFiles) > 0:

                    bindVarList = []
                    for uploadedFile in uploadedFiles:
                        bindVarList.append({
                            'FILEID':
                            uploadedFile['fileid'],
                            'SUBSCRIPTION':
                            uploadedFile['subscription']
                        })

                    # need a transaction here so we don't have files in
                    # state acquired and complete at the same time
                    try:
                        myThread.transaction.begin()
                        completeFilesDAO.execute(
                            bindVarList,
                            conn=myThread.transaction.conn,
                            transaction=True)
                    except:
                        myThread.transaction.rollback()
                        raise
                    else:
                        myThread.transaction.commit()

    # check for pathological runs with no express data that will never
    # create conditions for upload and set them to finished
    finishPCLforEmptyExpressDAO.execute(transaction=False)

    # look at all runs not completely finished with condition uploads
    # return acquired (to be uploaded) files for them
    conditions = getConditionsDAO.execute(finished=False, transaction=False)

    for (index, run) in enumerate(sorted(conditions.keys()), 1):

        advanceToNextRun = True

        timeout = conditions[run]['condUploadTimeout']
        dropboxHost = conditions[run]['dropboxHost']
        validationMode = conditions[run]['validationMode']

        for streamid, uploadableFiles in conditions[run]['streams'].items():

            if len(uploadableFiles) > 0:

                uploadedFiles = uploadToDropbox(uploadableFiles, dropboxHost,
                                                validationMode, username,
                                                password, serviceProxy)

                if len(uploadedFiles) > 0:

                    bindVarList = []
                    for uploadedFile in uploadedFiles:
                        bindVarList.append({
                            'FILEID':
                            uploadedFile['fileid'],
                            'SUBSCRIPTION':
                            uploadedFile['subscription']
                        })

                    # need a transaction here so we don't have files in
                    # state acquired and complete at the same time
                    try:
                        myThread.transaction.begin()
                        completeFilesDAO.execute(
                            bindVarList,
                            conn=myThread.transaction.conn,
                            transaction=True)
                    except:
                        myThread.transaction.rollback()
                        raise
                    else:
                        myThread.transaction.commit()

                    # check if all files for run/stream uploaded (that means only complete
                    # files for same number of subscriptions as number of producers)
                    markPromptCalibrationFinishedDAO.execute(run,
                                                             streamid,
                                                             transaction=False)

                else:
                    # upload failed
                    advanceToNextRun = False

            else:
                # no files available for upload yet
                advanceToNextRun = False

        # check if all streams for run finished
        if advanceToNextRun:
            finished = isPromptCalibrationFinishedDAO.execute(
                run, transaction=False)
            if not finished:
                advanceToNextRun = False

        # check for timeout, but only if there is a next run
        if not advanceToNextRun and index < len(conditions.keys()):

            getRunStopTimeDAO = daoFactory(
                classname="ConditionUpload.GetRunStopTime")
            stopTime = getRunStopTimeDAO.execute(run, transaction=False)

            if time.time() < stopTime + timeout:
                break

    return
    def stuffDatabase(self):
        """
        _stuffDatabase_

        Fill the dbsbuffer with some files and blocks.  We'll insert a total
        of 5 files spanning two blocks.  There will be a total of two datasets
        inserted into the datbase.

        We'll inject files with the location set as an SE name as well as a
        PhEDEx node name as well.
        """
        myThread = threading.currentThread()

        buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                    logger=myThread.logger,
                                    dbinterface=myThread.dbi)
        insertWorkflow = buffer3Factory(classname="InsertWorkflow")
        insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0)

        checksums = {"adler32": "1234", "cksum": "5678"}
        testFileA = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileA.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath(self.testDatasetA)
        testFileA.addRun(Run(2, *[45]))
        testFileA.create()

        testFileB = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileB.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileB.setDatasetPath(self.testDatasetA)
        testFileB.addRun(Run(2, *[45]))
        testFileB.create()

        testFileC = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileC.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileC.setDatasetPath(self.testDatasetA)
        testFileC.addRun(Run(2, *[45]))
        testFileC.create()

        self.testFilesA.append(testFileA)
        self.testFilesA.append(testFileB)
        self.testFilesA.append(testFileC)

        testFileD = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileD.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileD.setDatasetPath(self.testDatasetB)
        testFileD.addRun(Run(2, *[45]))
        testFileD.create()

        testFileE = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileE.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileE.setDatasetPath(self.testDatasetB)
        testFileE.addRun(Run(2, *[45]))
        testFileE.create()

        self.testFilesB.append(testFileD)
        self.testFilesB.append(testFileE)

        uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)
        datasetAction = uploadFactory(classname="NewDataset")
        createAction = uploadFactory(classname="CreateBlocks")

        datasetAction.execute(datasetPath=self.testDatasetA)
        datasetAction.execute(datasetPath=self.testDatasetB)

        self.blockAName = self.testDatasetA + "#" + makeUUID()
        self.blockBName = self.testDatasetB + "#" + makeUUID()

        newBlockA = DBSBufferBlock(name=self.blockAName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockA.setDataset(self.testDatasetA, 'data', 'VALID')
        newBlockA.status = 'Closed'

        newBlockB = DBSBufferBlock(name=self.blockBName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockB.setDataset(self.testDatasetB, 'data', 'VALID')
        newBlockB.status = 'Closed'

        createAction.execute(blocks=[newBlockA, newBlockB])

        bufferFactory = DAOFactory(package="WMComponent.DBSBuffer.Database",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)

        setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock")
        setBlock.execute(testFileA["lfn"], self.blockAName)
        setBlock.execute(testFileB["lfn"], self.blockAName)
        setBlock.execute(testFileC["lfn"], self.blockAName)
        setBlock.execute(testFileD["lfn"], self.blockBName)
        setBlock.execute(testFileE["lfn"], self.blockBName)

        fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus")
        fileStatus.execute(testFileA["lfn"], "LOCAL")
        fileStatus.execute(testFileB["lfn"], "LOCAL")
        fileStatus.execute(testFileC["lfn"], "LOCAL")
        fileStatus.execute(testFileD["lfn"], "LOCAL")
        fileStatus.execute(testFileE["lfn"], "LOCAL")

        associateWorkflow = buffer3Factory(
            classname="DBSBufferFiles.AssociateWorkflowToFile")
        associateWorkflow.execute(testFileA["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileB["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileC["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileD["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileE["lfn"], "BogusRequest",
                                  "BogusTask")

        return
    def stuffDatabase(self):
        """
        Fill the dbsbuffer tables with some files and blocks.  We'll insert a total
        of 5 files spanning two blocks.  There will be a total of two datasets
        inserted into the database.
        We'll inject files with the location set as an SE name as well as a
        PhEDEx node name as well.
        """
        myThread = threading.currentThread()

        # Create the DAOs factory and the relevant instances
        buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                    logger=myThread.logger,
                                    dbinterface=myThread.dbi)
        setBlock = buffer3Factory(classname="DBSBufferFiles.SetBlock")
        fileStatus = buffer3Factory(classname="DBSBufferFiles.SetStatus")
        associateWorkflow = buffer3Factory(classname="DBSBufferFiles.AssociateWorkflowToFile")
        insertWorkflow = buffer3Factory(classname="InsertWorkflow")
        datasetAction = buffer3Factory(classname="NewDataset")
        createAction = buffer3Factory(classname="CreateBlocks")

        # Create workflow in the database
        insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0)

        # First file on first block
        checksums = {"adler32": "1234", "cksum": "5678"}
        testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["T2_CH_CERN"]))
        testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath(self.testDatasetA)
        testFileA.addRun(Run(2, *[45]))
        testFileA.create()

        # Second file on first block
        testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["T2_CH_CERN"]))
        testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileB.setDatasetPath(self.testDatasetA)
        testFileB.addRun(Run(2, *[45]))
        testFileB.create()

        # Third file on first block
        testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["T2_CH_CERN"]))
        testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileC.setDatasetPath(self.testDatasetA)
        testFileC.addRun(Run(2, *[45]))
        testFileC.create()

        self.testFilesA.append(testFileA)
        self.testFilesA.append(testFileB)
        self.testFilesA.append(testFileC)

        # First file on second block
        testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["T1_US_FNAL_Disk"]))
        testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileD.setDatasetPath(self.testDatasetB)
        testFileD.addRun(Run(2, *[45]))
        testFileD.create()

        # Second file on second block
        testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10,
                                  checksums=checksums,
                                  locations=set(["T1_US_FNAL_Disk"]))
        testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8",
                               appFam="RECO", psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileE.setDatasetPath(self.testDatasetB)
        testFileE.addRun(Run(2, *[45]))
        testFileE.create()

        self.testFilesB.append(testFileD)
        self.testFilesB.append(testFileE)

        # insert datasets in the dbsbuffer table
        datasetAction.execute(datasetPath=self.testDatasetA)
        datasetAction.execute(datasetPath=self.testDatasetB)

        self.blockAName = self.testDatasetA + "#" + makeUUID()
        self.blockBName = self.testDatasetB + "#" + makeUUID()

        # create and insert blocks into dbsbuffer table
        newBlockA = DBSBufferBlock(name=self.blockAName,
                                   location="T2_CH_CERN",
                                   datasetpath=None)
        newBlockA.setDataset(self.testDatasetA, 'data', 'VALID')
        newBlockA.status = 'Closed'

        newBlockB = DBSBufferBlock(name=self.blockBName,
                                   location="T1_US_FNAL_Disk",
                                   datasetpath=None)
        newBlockB.setDataset(self.testDatasetB, 'data', 'VALID')
        newBlockB.status = 'Closed'

        createAction.execute(blocks=[newBlockA, newBlockB])

        # associate files to their correspondent block id
        setBlock.execute(testFileA["lfn"], self.blockAName)
        setBlock.execute(testFileB["lfn"], self.blockAName)
        setBlock.execute(testFileC["lfn"], self.blockAName)
        setBlock.execute(testFileD["lfn"], self.blockBName)
        setBlock.execute(testFileE["lfn"], self.blockBName)

        # set file status to LOCAL
        fileStatus.execute(testFileA["lfn"], "LOCAL")
        fileStatus.execute(testFileB["lfn"], "LOCAL")
        fileStatus.execute(testFileC["lfn"], "LOCAL")
        fileStatus.execute(testFileD["lfn"], "LOCAL")
        fileStatus.execute(testFileE["lfn"], "LOCAL")

        # associate files to a given workflow
        associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask")
        associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask")
        associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask")
        associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask")
        associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask")

        return
Exemple #16
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", pnn="T2_CH_CERN")
        locationAction.execute(siteName="site2", pnn="T1_US_FNAL_Disk")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T2_CH_CERN"]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["T2_CH_CERN"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("T2_CH_CERN")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["T2_CH_CERN", "T1_US_FNAL_Disk"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleSiteSubscription.create()
        return
Exemple #17
0
    def __init__(self, config):
        """
        Initialise class members
        """
        logging.info("Running __init__ for DBS3 Uploader")
        BaseWorkerThread.__init__(self)
        self.config = config

        # This is slightly dangerous, but DBSUpload depends
        # on DBSInterface anyway
        self.dbsUrl = self.config.DBS3Upload.dbsUrl

        # Tier0 Agent don't need this
        if hasattr(self.config, "Tier0Feeder"):
            self.wmstatsServerSvc = None
        else:
            wmstatsSvcURL = self.config.General.centralWMStatsURL.replace("couchdb/wmstats",
                                                                          "wmstatsserver")
            self.wmstatsServerSvc = WMStatsServer(wmstatsSvcURL)

        self.dbsUtil = DBSBufferUtil()

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)
        self.updateBlocksDAO = daoFactory(classname="UpdateBlocks")
        self.updateFilesDAO = daoFactory(classname="UpdateFiles")
        self.createBlocksDAO = daoFactory(classname="CreateBlocks")
        self.setBlockFilesDAO = daoFactory(classname="SetBlockFiles")

        self.pool = []
        self.blocksToCheck = []
        self.workInput = None
        self.workResult = None
        self.nProc = getattr(self.config.DBS3Upload, 'nProcesses', 4)
        self.wait = getattr(self.config.DBS3Upload, 'dbsWaitTime', 2)
        self.nTries = getattr(self.config.DBS3Upload, 'dbsNTries', 300)
        self.physicsGroup = getattr(self.config.DBS3Upload, "physicsGroup", "NoGroup")
        self.datasetType = getattr(self.config.DBS3Upload, "datasetType", "PRODUCTION")
        self.primaryDatasetType = getattr(self.config.DBS3Upload, "primaryDatasetType", "mc")
        self.blockCount = 0
        self.dbsApi = DbsApi(url=self.dbsUrl)

        # List of blocks currently in processing
        self.queuedBlocks = []

        # Set up the pool of worker processes
        self.setupPool()

        # Setting up any cache objects
        self.blockCache = {}

        self.filesToUpdate = []

        self.produceCopy = getattr(self.config.DBS3Upload, 'dumpBlock', False)

        self.copyPath = os.path.join(getattr(self.config.DBS3Upload, 'componentDir', '/data/srv/'),
                                     'dbsuploader_block.json')

        self.timeoutWaiver = 1

        self.datasetParentageCache = {}

        return
Exemple #18
0
def wmbsSubscriptionStatus(logger, dbi, conn, transaction):
    """Function to return status of wmbs subscriptions
    """
    action = DAOFactory(package='WMBS', logger=logger,
                        dbinterface=dbi)('Monitoring.SubscriptionStatus')
    return action.execute(conn=conn, transaction=transaction)
Exemple #19
0
    def addParents(self, parentLFNs):
        """
        _addParents_

        Associate this file with it's parents.  If the parents do not exist in
        the buffer then bogus place holder files will be created so that the
        parentage information can be tracked and correctly inserted into DBS.
        """
        newAlgoAction = self.daoFactory(classname = "NewAlgo")
        newDatasetAction = self.daoFactory(classname = "NewDataset")
        assocAction = self.daoFactory(classname = "AlgoDatasetAssoc")
        existsAction = self.daoFactory(classname = "DBSBufferFiles.Exists")

        uploadFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                   logger = self.logger,
                                   dbinterface = self.dbi)
        setDatasetAlgoAction = uploadFactory(classname = "SetDatasetAlgo")

        existingTransaction = self.beginTransaction()

        toBeCreated = []
        for parentLFN in parentLFNs:
            self["parents"].add(DBSBufferFile(lfn = parentLFN))
            if not existsAction.execute(lfn = parentLFN,
                                        conn = self.getDBConn(),
                                        transaction = True):
                toBeCreated.append(parentLFN)

        if len(toBeCreated) > 0:
            newAlgoAction.execute(appName = "cmsRun", appVer = "UNKNOWN",
                                  appFam = "UNKNOWN", psetHash = "NOT_SET",
                                  configContent = "NOT_SET",
                                  conn = self.getDBConn(),
                                  transaction = True)

            newDatasetAction.execute(datasetPath = "bogus",
                                     conn = self.getDBConn(),
                                     transaction = True)

            assocID = assocAction.execute(appName = "cmsRun", appVer = "UNKNOWN",
                                          appFam = "UNKNOWN", psetHash = "NOT_SET",
                                          datasetPath = "bogus",
                                          conn = self.getDBConn(),
                                          transaction = True)

            setDatasetAlgoAction.execute(datasetAlgo = assocID, inDBS = 1,
                                         conn = self.getDBConn(),
                                         transaction = True)

            action = self.daoFactory(classname = "DBSBufferFiles.AddIgnore")
            action.execute(lfns = toBeCreated, datasetAlgo = assocID,
                           status = "GLOBAL",
                           conn = self.getDBConn(),
                           transaction = True)

        action = self.daoFactory(classname = "DBSBufferFiles.HeritageLFNParent")
        action.execute(parentLFNs = parentLFNs, childLFN = self["lfn"],
                       conn = self.getDBConn(),
                       transaction = self.existingTransaction())
        self.commitTransaction(existingTransaction)
        return
Exemple #20
0
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None):
    """
    _killWorkflow_

    Kill a workflow that is already executing inside the agent.  This will
    mark all incomplete jobs as failed and files that belong to all
    non-cleanup and non-logcollect subscriptions as failed.  The name of the
    JSM couch database and the URL to the database must be passed in as well
    so the state transitions are logged.
    """
    myThread = threading.currentThread()
    daoFactory = DAOFactory(package="WMCore.WMBS",
                            logger=myThread.logger,
                            dbinterface=myThread.dbi)
    killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow")
    killJobsAction = daoFactory(classname="Jobs.KillWorkflow")

    killFilesAction.execute(workflowName=workflowName,
                            conn=myThread.transaction.conn)

    liveJobs = killJobsAction.execute(workflowName=workflowName,
                                      conn=myThread.transaction.conn)

    changeState = ChangeState(jobCouchConfig)

    # Deal with any jobs that are running in the batch system
    # only works if we can start the API
    if bossAirConfig:
        bossAir = BossAirAPI(config=bossAirConfig, noSetup=True)
        killableJobs = []
        for liveJob in liveJobs:
            if liveJob["state"].lower() == 'executing':
                # Then we need to kill this on the batch system
                liveWMBSJob = Job(id=liveJob["id"])
                liveWMBSJob.update(liveJob)
                killableJobs.append(liveJob)
        # Now kill them
        try:
            bossAir.kill(jobs=killableJobs)
        except BossAirException as ex:
            # Something's gone wrong
            # Jobs not killed!
            logging.error(
                "Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.
            pass

    liveWMBSJobs = defaultdict(list)
    for liveJob in liveJobs:
        if liveJob["state"] == "killed":
            # Then we've killed it already
            continue
        liveWMBSJob = Job(id=liveJob["id"])
        liveWMBSJob.update(liveJob)
        liveWMBSJobs[liveJob["state"]].append(liveWMBSJob)

    for state, jobsByState in liveWMBSJobs.items():
        if len(jobsByState) > 100 and state != "executing":
            # if there are to many jobs skip the couch and dashboard update
            # TODO: couch and dashboard need to be updated or parallel.
            changeState.check("killed", state)
            changeState.persist(jobsByState, "killed", state)
        else:
            changeState.propagate(jobsByState, "killed", state)
    return
Exemple #21
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Run the discovery query and generate jobs if we find enough files.
        """
        # This doesn't use a proxy
        self.grabByProxy = False

        filesPerJob = int(kwargs.get("files_per_job", 10))

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        fileAvail = daoFactory(
            classname="Subscriptions.SiblingSubscriptionsComplete")
        completeFiles = fileAvail.execute(self.subscription["id"],
                                          conn=myThread.transaction.conn,
                                          transaction=True)

        self.subscription["fileset"].load()
        if self.subscription["fileset"].open == True:
            filesetClosed = False
        else:
            fileFailed = daoFactory(
                classname="Subscriptions.SiblingSubscriptionsFailed")
            fileFailed.execute(self.subscription["id"],
                               self.subscription["fileset"].id,
                               conn=myThread.transaction.conn,
                               transaction=True)
            filesetClosed = True

        fileSites = {}
        foundFiles = []
        for completeFile in completeFiles:
            if completeFile["lfn"] not in foundFiles:
                foundFiles.append(completeFile["lfn"])
            else:
                continue

            if not fileSites.has_key(completeFile["se_name"]):
                fileSites[completeFile["se_name"]] = []

            fileSites[completeFile["se_name"]].append(completeFile)

        for siteName in fileSites.keys():
            if len(fileSites[siteName]) < filesPerJob and not filesetClosed:
                continue

            self.newGroup()
            while len(fileSites[siteName]) >= filesPerJob:
                self.newJob(name=makeUUID())
                for jobFile in fileSites[siteName][0:filesPerJob]:
                    newFile = File(id=jobFile["id"],
                                   lfn=jobFile["lfn"],
                                   events=jobFile["events"])
                    newFile["locations"] = set([jobFile["se_name"]])
                    self.currentJob.addFile(newFile)

                fileSites[siteName] = fileSites[siteName][filesPerJob:]

            if filesetClosed and len(fileSites[siteName]) > 0:
                self.newJob(name=makeUUID())
                for jobFile in fileSites[siteName]:
                    newFile = File(id=jobFile["id"],
                                   lfn=jobFile["lfn"],
                                   events=jobFile["events"])
                    newFile["locations"] = set([jobFile["se_name"]])
                    self.currentJob.addFile(newFile)

        return
Exemple #22
0
    def algorithm(self, groupInstance=None, jobInstance=None, *args, **kwargs):
        """
        _algorithm_

        A file based splitting algorithm

        """
        # extract some global scheduling parameters
        self.jobNamePrefix = kwargs.get('jobNamePrefix', "RepackMerge")

        self.minInputSize = kwargs['minInputSize']
        self.maxInputSize = kwargs['maxInputSize']

        self.maxInputEvents = kwargs['maxInputEvents']
        self.maxInputFiles = kwargs['maxInputFiles']

        self.maxEdmSize = kwargs['maxEdmSize']
        self.maxOverSize = kwargs['maxOverSize']

        # catch configuration errors
        if self.maxOverSize > self.maxEdmSize:
            self.maxOverSize = self.maxEdmSize

        self.createdGroup = False

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        maxLumiWithJobDAO = daoFactory(
            classname="Subscriptions.MaxLumiWithJob")
        getGoodLumiHolesDAO = daoFactory(
            classname="JobSplitting.GetGoodLumiHoles")

        # highest lumi with a job
        maxLumiWithJob = maxLumiWithJobDAO.execute(self.subscription["id"])

        logging.debug("DEBUG Sub %d, maxLumiWithJob = %d" %
                      (self.subscription["id"], maxLumiWithJob))

        # find good lumi holes (needs to be done before data discovery)
        goodLumiHoles = getGoodLumiHolesDAO.execute(self.subscription["id"],
                                                    maxLumiWithJob)

        logging.debug("DEBUG Sub %d, goodLumiHoles = %s" %
                      (self.subscription["id"], sorted(goodLumiHoles)))

        # data discovery
        getFilesDAO = daoFactory(
            classname="Subscriptions.GetAvailableRepackMergeFiles")
        availableFiles = getFilesDAO.execute(self.subscription["id"])

        # nothing to do, stop immediately
        if len(availableFiles) == 0:
            return

        # lumis we have data for
        lumiList = set([])
        for result in availableFiles:
            for lumi in range(result['first_lumi'], result['last_lumi'] + 1):
                lumiList.add(lumi)
        lumiList = sorted(list(lumiList))

        logging.debug("DEBUG Sub %d, lumiList = %s" %
                      (self.subscription["id"], lumiList))

        # check if fileset is closed
        fileset = self.subscription.getFileset()
        fileset.load()

        # extended lumi range for job creation
        firstLumi = maxLumiWithJob + 1
        lastLumi = lumiList[-1]

        # consistency check (ignore at end of run)
        if lumiList[0] <= maxLumiWithJob:
            if fileset.open:
                logging.error(
                    "ERROR: finding data that can't be there, bailing out...")
                return
            else:
                logging.info(
                    "WARNING: finding data that can't be there, fileset is closed, merge anyways..."
                )
                firstLumi = lumiList[0]

        # narrow down lumi range for job creation
        filesByLumi = {}
        for lumi in range(firstLumi, lastLumi + 1):
            if (lumi in lumiList) or (lumi in goodLumiHoles):
                filesByLumi[lumi] = []
            else:
                break

        # figure out what data to create jobs for
        for fileInfo in availableFiles:
            lumi = fileInfo['first_lumi']
            if filesByLumi.has_key(lumi):
                filesByLumi[lumi].append(fileInfo)

        logging.debug("DEBUG Sub %d, create jobs for lumis = %s" %
                      (self.subscription["id"], sorted(filesByLumi.keys())))

        self.defineJobs(filesByLumi, fileset.open)

        return
    def __init__(self, config):
        BasePlugin.__init__(self, config)

        self.locationDict = {}

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)
        self.locationAction = daoFactory(classname="Locations.GetSiteInfo")

        self.packageDir = None

        # if agent is running in a container, Unpacker.py must come from a directory
        # on the host so the condor schedd can see it
        # config.General.workDir should always be bind mounted to the container
        if getattr(config.Agent, "isDocker", False):
            unpackerPath = os.path.join(config.General.workDir +
                                        "/Docker/WMRuntime/Unpacker.py")
        else:
            unpackerPath = os.path.join(
                getWMBASE(), 'src/python/WMCore/WMRuntime/Unpacker.py')

        if os.path.exists(unpackerPath):
            self.unpacker = unpackerPath
        else:
            self.unpacker = os.path.join(getWMBASE(),
                                         'WMCore/WMRuntime/Unpacker.py')

        self.agent = getattr(config.Agent, 'agentName', 'WMAgent')
        self.sandbox = None

        self.scriptFile = config.JobSubmitter.submitScript

        self.defaultTaskPriority = getattr(config.BossAir,
                                           'defaultTaskPriority', 0)
        self.maxTaskPriority = getattr(config.BossAir, 'maxTaskPriority', 1e7)
        self.jobsPerSubmit = getattr(config.JobSubmitter, 'jobsPerSubmit', 200)
        self.extraMem = getattr(config.JobSubmitter, 'extraMemoryPerCore', 500)

        # Required for global pool accounting
        self.acctGroup = getattr(config.BossAir, 'acctGroup', "production")
        self.acctGroupUser = getattr(config.BossAir, 'acctGroupUser',
                                     "cmsdataops")

        # Build a requirement string.  All CMS resources match DESIRED_Sites on the START
        # expression side; however, there are currently some resources (T2_CH_CERN_HLT)
        # that are missing the REQUIRED_OS logic.  Hence, we duplicate it here.
        # TODO(bbockelm): Remove reqStr once HLT has upgraded.
        self.reqStr = (
            '((REQUIRED_OS=?="any") || '
            '(GLIDEIN_REQUIRED_OS =?= "any") || '
            'stringListMember(GLIDEIN_REQUIRED_OS, REQUIRED_OS)) && '
            '(AuthenticatedIdentity =!= "*****@*****.**")')
        if hasattr(config.BossAir, 'condorRequirementsString'):
            self.reqStr = config.BossAir.condorRequirementsString

        # x509 proxy handling
        proxy = Proxy({'logger': myThread.logger})
        self.x509userproxy = proxy.getProxyFilename()

        # These are added now by the condor client
        #self.x509userproxysubject = proxy.getSubject()
        #self.x509userproxyfqan = proxy.getAttributeFromProxy(self.x509userproxy)

        return
Exemple #24
0
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="T0.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.transferSystemBaseDir = getattr(config.Tier0Feeder,
                                             "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy",
                                      None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(
            config.AnalyticsDataCollector.localT0RequestDBURL,
            couchapp=config.AnalyticsDataCollector.RequestCouchApp)

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging,
                                     dburl=hltConfConnectUrl,
                                     options={})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package="T0.WMBS",
                                       logger=logging,
                                       dbinterface=dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(
            classname="RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging,
                                            dburl=storageManagerConnectUrl,
                                            options={})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase,
                                          "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging,
                                               dburl=popConLogConnectUrl,
                                               options={})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(
                    classname="Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase,
                                          "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging,
                                               dburl=t0datasvcConnectUrl,
                                               options={})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfaceT0DataSvc)

        return
Exemple #25
0
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getOutputMapAction = self.daofactory(
            classname="Jobs.GetOutputMap")
        self.bulkAddToFilesetAction = self.daofactory(
            classname="Fileset.BulkAddByLFN")
        self.bulkParentageAction = self.daofactory(
            classname="Files.AddBulkParentage")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        self.getParentInfoAction = self.daofactory(
            classname="Files.GetParentInfo")
        self.setParentageByJob = self.daofactory(
            classname="Files.SetParentageByJob")
        self.setParentageByMergeJob = self.daofactory(
            classname="Files.SetParentageByMergeJob")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationByLFN")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput")
        self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk")
        self.getWorkflowSpec = self.daofactory(
            classname="Workflow.GetSpecAndNameFromTask")
        self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID")
        self.getFullJobInfo = self.daofactory(
            classname="Jobs.LoadForErrorHandler")
        self.getJobTaskNameAction = self.daofactory(
            classname="Jobs.GetFWJRTaskName")

        self.dbsStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetChildren")
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddRunLumi")
        self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow")

        self.dbsLFNHeritage = self.dbsDaoFactory(
            classname="DBSBufferFiles.BulkHeritageParent")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant,
                                       'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # ACDC service
        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url']
        jobDBName = config.JobStateMachine.couchDBName
        jobCouchdb = CouchServer(jobDBurl)
        self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL,
                                          "WMStatsAgent")

        # Hold data for later commital
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        self.count = 0
        self.datasetAlgoID = collections.deque(maxlen=1000)
        self.datasetAlgoPaths = collections.deque(maxlen=1000)
        self.dbsLocations = set()
        self.workflowIDs = collections.deque(maxlen=1000)
        self.workflowPaths = collections.deque(maxlen=1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()

        return
Exemple #26
0
    def setUp(self):
        """
        _setUp_

        """
        import WMQuality.TestInit
        WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious")
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn)
                                    VALUES (2, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 2)
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in [1, 2, 3, 4]:
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Repack",
                                          type="Repack")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(
            classname="RunLumiCloseout.InsertClosedLumi")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 500000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxLatency'] = 50000

        return
Exemple #27
0
    def testGetFinishedWorkflows(self):
        """
        _testGetFinishedWorkflows_

        Test that we get only those workflows which are finished, that is, workflows where
        all its subscriptions are finished and all other workflows with the same
        spec are finished too

        """

        owner = "no-one"

        #Create a bunch of worklows with "different" specs and tasks
        workflows = []
        for i in range(0, 100):
            scaledIndex = i % 10
            testWorkflow = Workflow(spec="sp00%i" % scaledIndex,
                                    owner=owner,
                                    name="wf00%i" % scaledIndex,
                                    task="task%i" % i)
            testWorkflow.create()
            workflows.append(testWorkflow)

        #Everyone will use this fileset
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        #Create subscriptions!
        subscriptions = []
        for workflow in workflows:
            subscription = Subscription(fileset=testFileset, workflow=workflow)
            subscription.create()
            subscriptions.append(subscription)

        #Check that all workflows are NOT finished
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        getFinishedDAO = daoFactory(classname="Workflow.GetFinishedWorkflows")
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 0,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Mark the first 50 subscriptions as finished
        for idx, sub in enumerate(subscriptions):
            if idx > 49:
                break
            sub.markFinished()

        #No workflow is finished, none of them has all the subscriptions completed
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 0,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Now finish all workflows in wf{000-5}
        for idx, sub in enumerate(subscriptions):
            if idx < 50 or idx % 10 > 5:
                continue
            sub.markFinished()

        #Check the workflows
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 6,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Check the overall structure of the workflows
        for wf in result:
            #Sanity checks on the results
            # These are very specific checks and depends heavily on the names of task, spec and workflow
            self.assertEqual(
                wf[2:], result[wf]['spec'][2:],
                "A workflow has the wrong spec-name combination: %s" % str(wf))
            self.assertTrue(
                int(wf[2:]) < 6,
                "A workflow is incorrectly flagged as finished: %s" % str(wf))
            self.assertEqual(
                len(result[wf]['workflows']), 10,
                "A workflow has more tasks than it should: %s" %
                str(result[wf]))
            for task in result[wf]['workflows']:
                self.assertEqual(
                    len(result[wf]['workflows'][task]), 1,
                    "A workflow has more subscriptions than it should: %s" %
                    str(result[wf]))

        return
Exemple #28
0
    def setUp(self):
        """
        _setUp_

        Setup the database and WMBS for the test.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "WMCore.WMBS"],
            useDefault=False)

        myThread = threading.currentThread()
        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.dbsfactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        locationAction = self.daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", pnn="T1_US_FNAL_Disk")

        inputFile = File(lfn="/path/to/some/lfn",
                         size=10,
                         events=10,
                         locations="T1_US_FNAL_Disk")
        inputFile.create()

        inputFileset = Fileset(name="InputFileset")
        inputFileset.create()
        inputFileset.addFile(inputFile)
        inputFileset.commit()

        unmergedFileset = Fileset(name="UnmergedFileset")
        unmergedFileset.create()

        mergedFileset = Fileset(name="MergedFileset")
        mergedFileset.create()

        procWorkflow = Workflow(spec="wf001.xml",
                                owner="Steve",
                                name="TestWF",
                                task="/TestWF/None")
        procWorkflow.create()
        procWorkflow.addOutput("outputRECORECO", unmergedFileset)

        mergeWorkflow = Workflow(spec="wf002.xml",
                                 owner="Steve",
                                 name="MergeWF",
                                 task="/MergeWF/None")
        mergeWorkflow.create()
        mergeWorkflow.addOutput("Merged", mergedFileset)

        insertWorkflow = self.dbsfactory(classname="InsertWorkflow")
        insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0)
        insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0)

        self.procSubscription = Subscription(fileset=inputFileset,
                                             workflow=procWorkflow,
                                             split_algo="FileBased",
                                             type="Processing")
        self.procSubscription.create()
        self.procSubscription.acquireFiles()

        self.mergeSubscription = Subscription(fileset=unmergedFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize",
                                              type="Merge")
        self.mergeSubscription.create()

        self.procJobGroup = JobGroup(subscription=self.procSubscription)
        self.procJobGroup.create()
        self.mergeJobGroup = JobGroup(subscription=self.mergeSubscription)
        self.mergeJobGroup.create()

        self.testJob = Job(name="testJob", files=[inputFile])
        self.testJob.create(group=self.procJobGroup)
        self.testJob["state"] = "complete"

        myThread = threading.currentThread()
        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.stateChangeAction = self.daofactory(classname="Jobs.ChangeState")
        self.setFWJRAction = self.daofactory(classname="Jobs.SetFWJRPath")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        locationAction = self.daofactory(classname="Locations.New")
        locationAction.execute(siteName="cmssrm.fnal.gov")

        self.stateChangeAction.execute(jobs=[self.testJob])

        self.tempDir = tempfile.mkdtemp()
        return
Exemple #29
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                    logger=logging,
                                    dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn) 
                                    VALUES (2, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnn
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer")
        insertStreamerDAO.execute(streamerPNN="SomePNN",
                                  binds={
                                      'RUN': 1,
                                      'P5_ID': 1,
                                      'LUMI': 1,
                                      'STREAM': "Express",
                                      'TIME': int(time.time()),
                                      'LFN': "/streamer",
                                      'FILESIZE': 0,
                                      'EVENTS': 0
                                  },
                                  transaction=False)

        insertPromptCalibrationDAO = daoFactory(
            classname="RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute(
            {
                'RUN': 1,
                'STREAM': "Express",
                'NUM_PRODUCER': 1
            },
            transaction=False)

        self.markPromptCalibrationFinishedDAO = daoFactory(
            classname="ConditionUpload.MarkPromptCalibrationFinished")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Condition",
                                          type="Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size=0, events=0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomePNN", immediateSave=False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size=0, events=0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomePNN", immediateSave=False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size=0, events=0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction=False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage")
        setParentageDAO.execute(binds=[{
            'parent': "/streamer",
            'child': "/alcareco"
        }, {
            'parent': "/alcareco",
            'child': "/alcaprompt"
        }, {
            'parent': "/alcaprompt",
            'child': "/sqlite"
        }],
                                transaction=False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return
Exemple #30
0
    def populateWMBS(self):
        """
        _populateWMBS_

        Create files and subscriptions in WMBS
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName='s1', pnn="T1_US_FNAL_Disk")
        locationAction.execute(siteName='s2', pnn="T2_CH_CERN")
        self.validLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"]

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for _ in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["T1_US_FNAL_Disk"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("T1_US_FNAL_Disk")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["T1_US_FNAL_Disk", "T2_CH_CERN"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        return
Exemple #31
0
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="T0.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy",
                                      None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(
            config.AnalyticsDataCollector.localT0RequestDBURL,
            couchapp=config.AnalyticsDataCollector.RequestCouchApp)

        self.injectedRuns = set()

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging,
                                     dburl=hltConfConnectUrl,
                                     options={})
        self.dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package="T0.WMBS",
                                       logger=logging,
                                       dbinterface=self.dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(
            classname="RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging,
                                            dburl=storageManagerConnectUrl,
                                            options={})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.dbInterfaceSMNotify = None
        if hasattr(config, "SMNotifyDatabase"):
            smNotifyConnectUrl = config.SMNotifyDatabase.connectUrl
            dbFactorySMNotify = DBFactory(logging,
                                          dburl=smNotifyConnectUrl,
                                          options={})
            self.dbInterfaceSMNotify = dbFactorySMNotify.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase,
                                          "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging,
                                               dburl=popConLogConnectUrl,
                                               options={})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(
                    classname="Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase,
                                          "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging,
                                               dburl=t0datasvcConnectUrl,
                                               options={})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfaceT0DataSvc)

        #
        # Set deployment ID
        #

        SetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.SetDeploymentID")
        GetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.GetDeploymentID")
        try:
            self.deployID = GetDeploymentIdDAO.execute()
            if self.deployID == 0:
                self.deployID = int(
                    datetime.datetime.now().strftime("%y%m%d%H%M%S"))
                SetDeploymentIdDAO.execute(self.deployID)

        except:
            logging.exception(
                "Something went wrong with setting deployment ID")
            raise

        return
    def setUp(self):
        """
        _setUp_

        Setup the database connections and schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute("T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute("T1_US_FNAL", pnn="T1_US_FNAL_Disk")

        self.testFilesetA = Fileset(name="FilesetA")
        self.testFilesetA.create()
        self.testFilesetB = Fileset(name="FilesetB")
        self.testFilesetB.create()

        self.testFileA = File("testFileA",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileA.create()
        self.testFileB = File("testFileB",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileB.create()
        self.testFileC = File("testFileC",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileC.create()

        self.testFilesetA.addFile(self.testFileA)
        self.testFilesetA.addFile(self.testFileB)
        self.testFilesetA.addFile(self.testFileC)
        self.testFilesetA.commit()

        self.testFileD = File("testFileD",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileD.create()
        self.testFileE = File("testFileE",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileE.create()
        self.testFileF = File("testFileF",
                              size=1000,
                              events=100,
                              locations=set(["T2_CH_CERN"]))
        self.testFileF.create()

        self.testFilesetB.addFile(self.testFileD)
        self.testFilesetB.addFile(self.testFileE)
        self.testFilesetB.addFile(self.testFileF)
        self.testFilesetB.commit()

        testWorkflowA = Workflow(spec="specA.xml",
                                 owner="Steve",
                                 name="wfA",
                                 task="Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec="specB.xml",
                                 owner="Steve",
                                 name="wfB",
                                 task="Test")
        testWorkflowB.create()
        testWorkflowC = Workflow(spec="specC.xml",
                                 owner="Steve",
                                 name="wfC",
                                 task="Test")
        testWorkflowC.create()
        testWorkflowD = Workflow(spec="specD.xml",
                                 owner="Steve",
                                 name="wfD",
                                 task="Test")
        testWorkflowD.create()

        self.testSubscriptionA = Subscription(fileset=self.testFilesetA,
                                              workflow=testWorkflowA,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionA.create()
        self.testSubscriptionB = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowB,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionB.create()
        self.testSubscriptionC = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowC,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionC.create()
        self.testSubscriptionD = Subscription(fileset=self.testFilesetB,
                                              workflow=testWorkflowD,
                                              split_algo="FileBased",
                                              type="Processing")
        self.testSubscriptionD.create()

        deleteWorkflow = Workflow(spec="specE.xml",
                                  owner="Steve",
                                  name="wfE",
                                  task="Test")
        deleteWorkflow.create()

        self.deleteSubscriptionA = Subscription(
            fileset=self.testFilesetA,
            workflow=deleteWorkflow,
            split_algo="SiblingProcessingBased",
            type="Cleanup")
        self.deleteSubscriptionA.create()
        self.deleteSubscriptionB = Subscription(
            fileset=self.testFilesetB,
            workflow=deleteWorkflow,
            split_algo="SiblingProcessingBased",
            type="Cleanup")
        self.deleteSubscriptionB.create()
        return