Esempio n. 1
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception, ex:
            pass
Esempio n. 2
0
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef = True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return
Esempio n. 3
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception, ex:
            pass
Esempio n. 4
0
    def testE_NoMigration(self):
        """
        _NoMigration_

        Test the DBSUpload system with no global migration
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=3)
        config.DBSInterface.doGlobalMigration = False
        config.DBSUpload.pollInterval = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('Open', )])

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1"
        )[0].fetchall()
        for r in result:
            self.assertEqual(r[0], 'GLOBAL')

        return
Esempio n. 5
0
    def __init__(self, config, dbsconfig=None):
        """
        Initialise class members

        """
        myThread = threading.currentThread()

        BaseWorkerThread.__init__(self)
        self.config = config

        # This is slightly dangerous, but DBSUpload depends
        # on DBSInterface anyway
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)

        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        self.uploadToDBS = factory.loadObject("UploadToDBS")

        addFactory = WMFactory("dbsBuffer",
                               "WMComponent.DBSBuffer.Database.Interface")
        self.addToBuffer = addFactory.loadObject("AddToBuffer")

        # Set DBSInterface
        self.dbsInterface = DBSInterface(config=config)

        # Set DAOs
        self.setBlock = self.bufferFactory(classname="DBSBufferFiles.SetBlock")
        self.setStatus = self.bufferFactory(
            classname="DBSBufferFiles.SetStatus")

        # Set config parameters
        self.doMigration = getattr(self.config.DBSInterface,
                                   'doGlobalMigration', True)

        if dbsconfig == None:
            self.dbsconfig = config

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="DBSUpload")

        return
Esempio n. 6
0
    def testE_NoMigration(self):
        """
        _NoMigration_

        Test the DBSUpload system with no global migration
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 3)
        config.DBSInterface.doGlobalMigration = False
        config.DBSUpload.pollInterval         = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('Open',)])


        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall()
        for r in result:
            self.assertEqual(r[0], 'GLOBAL')


        return
Esempio n. 7
0
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return
Esempio n. 8
0
    def __init__(self, config, dbsconfig = None):
        """
        Initialise class members

        """
        myThread = threading.currentThread()

        BaseWorkerThread.__init__(self)
        self.config     = config

        # This is slightly dangerous, but DBSUpload depends
        # on DBSInterface anyway
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        self.uploadToDBS = factory.loadObject("UploadToDBS")

        addFactory = WMFactory("dbsBuffer",
                               "WMComponent.DBSBuffer.Database.Interface")
        self.addToBuffer = addFactory.loadObject("AddToBuffer")

        # Set DBSInterface
        self.dbsInterface = DBSInterface(config = config)

        # Set DAOs
        self.setBlock  = self.bufferFactory(classname = "DBSBufferFiles.SetBlock")
        self.setStatus = self.bufferFactory(classname = "DBSBufferFiles.SetStatus")

        # Set config parameters
        self.doMigration      = getattr(self.config.DBSInterface, 'doGlobalMigration', True)

        if dbsconfig == None:
            self.dbsconfig = config

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "DBSUpload")

        return
Esempio n. 9
0
class DBSUploadPoller(BaseWorkerThread):
    """
    Handles poll-based DBSUpload

    """


    def __init__(self, config, dbsconfig = None):
        """
        Initialise class members

        """
        myThread = threading.currentThread()

        BaseWorkerThread.__init__(self)
        self.config     = config

        # This is slightly dangerous, but DBSUpload depends
        # on DBSInterface anyway
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        self.uploadToDBS = factory.loadObject("UploadToDBS")

        addFactory = WMFactory("dbsBuffer",
                               "WMComponent.DBSBuffer.Database.Interface")
        self.addToBuffer = addFactory.loadObject("AddToBuffer")

        # Set DBSInterface
        self.dbsInterface = DBSInterface(config = config)

        # Set DAOs
        self.setBlock  = self.bufferFactory(classname = "DBSBufferFiles.SetBlock")
        self.setStatus = self.bufferFactory(classname = "DBSBufferFiles.SetStatus")

        # Set config parameters
        self.doMigration      = getattr(self.config.DBSInterface, 'doGlobalMigration', True)

        if dbsconfig == None:
            self.dbsconfig = config

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "DBSUpload")

        return


    def algorithm(self, parameters = None):
        """
        Runs over all available DBSBuffer filesets/algos
        Commits them using DBSInterface
        Then checks blocks for timeout
        """
        logging.debug("Running subscription / fileset matching algorithm")
        try:
            self.sortBlocks()
            self.uploadBlocks()
        except WMException:
            raise
        except Exception as ex:
            msg =  "Unhandled exception in DBSUploadPoller\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            self.sendAlert(6, msg = msg)
            raise DBSUploadPollerException(msg)
        return

    def terminate(self, params):
        """
        Do one more pass, then terminate

        """
        logging.debug("Terminating. Doing one more pass before we die")
        self.algorithm(params)
        return


    def sortBlocks(self):
        """
        _sortBlocks_

        Find new files to upload, sort them into blocks
        Save the blocks in DBSBuffer
        """
        myThread = threading.currentThread()

        # Grab all the Dataset-Algo combindations
        dasList = self.uploadToDBS.findUploadableDAS()
        logging.debug("Recovered %i DAS to upload" % len(dasList))

        for dasInfo in dasList:
            # Go one DAS at a time
            dasID = dasInfo['DAS_ID']
            logging.info("Processing DAS %i" % dasID)

            # Initial values
            readyBlocks = []
            fileLFNs    = []

            # Get the dataset-algo information
            #algo    = createAlgoFromInfo(info = dasInfo)
            dataset = createDatasetFromInfo(info = dasInfo)

            # Get the files for the DAS
            files  = self.uploadToDBS.findUploadableFilesByDAS(das = dasID)
            if len(files) < 1:
                # Then we have no files for this DAS
                logging.debug("DAS %i has no available files.  Continuing." % dasID)
                continue

            # Load the blocks for the DAS
            blocks = self.uploadToDBS.loadBlocksByDAS(das = dasID)
            logging.debug("Retrieved %i files and %i blocks from DB." % (len(files), len(blocks)))

            # Sort the files and blocks by location
            locationDict = sortListByKey(files, 'locations')
            blockDict    = sortListByKey(blocks, 'location')
            logging.debug("Active DAS file locations: %s" % locationDict.keys())
            logging.debug("Active Block file locations: %s" % blockDict.keys())

            try:
                # Sort files that are already in blocks
                # back into those blocks
                # pass by reference
                blockDict, locationDict = preassignBlocks(files = locationDict, blocks = blockDict)

                # Now go over all the files
                for location in locationDict.keys():
                    # Split files into blocks
                    locFiles  = locationDict.get(location, [])
                    locBlocks = blockDict.get(location, [])
                    locBlocks = self.splitFilesIntoBlocks(files = locFiles,
                                                          blocks = locBlocks,
                                                          dataset = dataset,
                                                          location = location)
                    readyBlocks.extend(locBlocks)
            except WMException:
                raise
            except Exception as ex:
                msg =  "Unhandled exception while sorting files into blocks for DAS %i\n" % dasID
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("BlockDictionary: %s" % blockDict)
                logging.debug("FileDictionary: %s" % locationDict)
                raise DBSUploadPollerException(msg)


            # At this point, all blocks should be in readyBlocks
            # STEP TWO: Commit blocks to DBSBuffer
            fileLFNs = self.createBlocksInDBSBuffer(readyBlocks = readyBlocks)

            # Now we should have all the blocks in DBSBuffer
            # Time to set the status of the files

            lfnList = [x['lfn'] for x in files]
            self.setStatus.execute(lfns = lfnList, status = "READY",
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)

        # All files that were in NOTUPLOADED
        # And had uploaded parents
        # Should now be in assigned to blocks in DBSBuffer, and in the READY status
        return


    def uploadBlocks(self):
        """
        _uploadBlocks_

        Load all OPEN blocks out of the database with all their necessary files
        Once we have the blocks, determine which ones are ready to be uploaded
        Also determine which ones are ready to be migrated
        Upload them
        """
        myThread = threading.currentThread()


        # Get the blocks
        # This should grab all Pending and Open blocks
        blockInfo = self.uploadToDBS.loadBlocks()
        blocks = []

        if len(blockInfo) < 1:
            # Then we have no block, and probably no files
            logging.info("No blocks in this iteration.  Returning")
            return

        # Assemble the blocks
        for info in blockInfo:
            block = createBlock(datasetPath = 'blank', location = 'blank')
            block['id']           = info['id']
            block['das']          = info['das']
            block['Name']         = info['blockname']
            block['CreationDate'] = info['create_time']
            block['open']         = info['open']
            block['MaxCloseTime'] = info['block_close_max_wait_time']
            blocks.append(block)


        dasIDs = []
        for block in blocks:
            if block['das'] not in dasIDs:
                dasIDs.append(block['das'])

        dasAlgoDataset = {}
        dasAlgoInfo = self.uploadToDBS.loadDASInfoByID(ids = dasIDs)
        for dasInfo in dasAlgoInfo:
            algo    = createAlgoFromInfo(info = dasInfo)
            dataset = createDatasetFromInfo(info = dasInfo)
            dasAlgoDataset[dasInfo['DAS_ID']] = {'dataset': dataset,
                                                 'algo': algo}

        # At this point we should have the dataset and algo information
        # The blocks
        # And the files
        # Time to sort the files into blocks

        # the counter / watcher of the alertUploadQueueSize to possibly send alerts
        alertUploadQueueSize = getattr(self.config.DBSUpload, "alertUploadQueueSize", None)
        alertUploadQueueSizeCounter = 0
        for block in blocks:
            files = self.uploadToDBS.loadFilesFromBlocks(blockID = block['id'])
            for f in files:
                if f['blockID'] == block['id']:
                    # Put file in this block
                    logging.debug("Setting file %s to block %s" % (f['lfn'], block['Name']))
                    block['newFiles'].append(f)
                    alertUploadQueueSizeCounter += 1

        # check alertUploadQueueSize threshold (alert condition)
        if alertUploadQueueSize:
            if alertUploadQueueSizeCounter >= int(alertUploadQueueSize):
                msg = ("DBS upload queue size (%s) exceeded configured "
                       "threshold (%s)." % (alertUploadQueueSizeCounter, alertUploadQueueSize))
                self.sendAlert(6, msg = msg)

        # Check for block timeout
        for block in blocks:
            if time.time() - block['CreationDate'] > block['MaxCloseTime']:
                logging.info("Setting status to Pending due to timeout for block %s" % block['Name'])
                block['open'] = 'Pending'

        # Should have files in blocks, now assign them to DAS
        for dasID in dasAlgoDataset.keys():
            readyBlocks = []
            dataset = dasAlgoDataset[dasID]['dataset']
            algo    = dasAlgoDataset[dasID]['algo']
            for block in blocks:
                if len(block['newFiles']) > 0:
                    # Assign a location from the files
                    logging.debug("Block %s has %i files" % (block['Name'], len(block['newFiles'])))
                    block['location'] = list(block['newFiles'][0]['locations'])[0]
                if block['das'] == dasID:
                    if block['open'] == 'Pending':
                        # Always attach pending blocks
                        logging.debug("Attaching block %s" % block['Name'])
                        readyBlocks.append(block)
                    elif len(block['newFiles']) > 0:
                        # Else you only deal with blocks if they have new files
                        logging.debug("Attaching block %s" % block['Name'])
                        readyBlocks.append(block)

            if len(readyBlocks) < 1:
                # Nothing to do
                logging.debug("Nothing to do for DAS %i in uploadBlocks" % dasID)
                continue

            try:
                # Now do the real action of transferring crap
                # Damn it Anzar: Why does DBS print stuff out?

                for singleBlock in readyBlocks:
                    originalOut = sys.stdout
                    originalErr = sys.stderr
                    sys.stdout = open(os.devnull, 'w')
                    sys.stderr = open(os.devnull, 'w')

                    if getattr(self.config.DBSUpload, 'abortStepThree', False):
                        # Blow the stack for testing purposes
                        raise DBSUploadPollerException('None')

                    logging.info("About to upload to DBS for DAS %i with %i blocks" % (dasID, len(readyBlocks)))
                    affBlocks = self.dbsInterface.runDBSBuffer(algo = algo,
                                                               dataset = dataset,
                                                               blocks = [singleBlock])

                    sys.stdout = originalOut
                    sys.stderr = originalErr


                    # Update DBSBuffer with current information
                    myThread.transaction.begin()

                    for block in affBlocks:
                        logging.info("Successfully inserted %i files for block %s." % (len(block['insertedFiles']),
                                                                                       block['Name']))
                        self.uploadToDBS.setBlockStatus(block = block['Name'],
                                                        locations = [block['location']],
                                                        openStatus = block['open'])
                        if block['open'] == 'InGlobalDBS' or not self.doMigration:
                            # Set block files as in global if they've been migrated.
                            # If we aren't doing global migrations, all files are in global
                            logging.debug("Block %s now listed in global DBS" % block['Name'])
                            self.uploadToDBS.closeBlockFiles(blockname = block['Name'], status = 'GLOBAL')
                        else:
                            logging.debug("Block %s now uploaded to local DBS" % block['Name'])
                            self.uploadToDBS.closeBlockFiles(blockname = block['Name'], status = 'LOCAL')

                    logging.debug("About to do post-upload DBS commit for DAS %i" % dasID)
                    myThread.transaction.commit()

            # New plan: If we get an error in trying to commit a block to DBS
            # then we just rollback the transaction and continue to the next
            # block - ignoring the exception
            except WMException:
                if getattr(myThread, 'transaction', None) != None:
                    myThread.transaction.rollbackForError()
                pass
                #raise
            except Exception as ex:
                msg =  'Error in committing files to DBS\n'
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                if getattr(myThread, 'transaction', None) != None:
                    myThread.transaction.rollbackForError()
                pass
                #raise DBSUploadPollerException(msg)


        return



    def splitFilesIntoBlocks(self, files, blocks, dataset, location):
        """
        Break the files into blocks based on config params

        Create a new block when necessary.
        """

        blocksToHandle = []

        if len(blocks) > 1:
            # Well, then we have a bit of a problem
            # Decide what to do about this later
            logging.error("More then one open block for this DAS")

        if len(blocks) == 0:
            currentBlock = createBlock(datasetPath = dataset['Path'],
                                       location = location)
        else:
            currentBlock = blocks[0]

        for newFile in files:
            # Check to see if blocks are full
            if not self.isBlockOpen(block = currentBlock):
                # Add old block to return list
                # Create a new block
                currentBlock['open'] = 'Pending'
                blocksToHandle.append(currentBlock)
                currentBlock = createBlock(datasetPath = dataset['Path'],
                                           location = location)

            # Check if the file has the closing settings, otherwise plug in the settings from the newFile
            if currentBlock['MaxCloseTime'] is None or currentBlock['MaxCloseFiles'] is None \
                or currentBlock['MaxCloseSize'] is None or currentBlock['MaxCloseEvents'] is None:
                currentBlock['MaxCloseTime'] = newFile['block_close_max_wait_time']
                currentBlock['MaxCloseEvents'] = newFile['block_close_max_events']
                currentBlock['MaxCloseFiles'] = newFile['block_close_max_files']
                currentBlock['MaxCloseSize'] = newFile['block_close_max_size']

            # Now process the file
            currentBlock['newFiles'].append(newFile)
            currentBlock['BlockSize'] += newFile['size']
            currentBlock['NumberOfFiles'] += 1
            currentBlock['NumberOfEvents'] += newFile['events']

        if currentBlock['NumberOfFiles'] > 0:
            blocksToHandle.append(currentBlock)


        return blocksToHandle



    def isBlockOpen(self, block):
        """
        _isBlockOpen_

        Tells you if the block should be closed
        """
        if block['MaxCloseTime'] is None or block['MaxCloseFiles'] is None \
            or block['MaxCloseSize'] is None or block['MaxCloseEvents'] is None:
            return True

        if time.time() - int(block.get('CreationDate', 0)) >= block['MaxCloseTime']:
            # We've timed out on this block
            return False
        if block['NumberOfFiles'] >= block['MaxCloseFiles']:
            # We've got too many files
            return False
        if float(block.get('BlockSize')) >= block['MaxCloseSize']:
            # Block is too big
            return False
        if block['NumberOfEvents'] >= block['MaxCloseEvents']:
            # Too many events in the block
            return False

        return True

    def createBlocksInDBSBuffer(self, readyBlocks):
        """
        _createBlocksInDBSBuffer_

        Create the blocks in the local database in
        their initial states.
        """
        myThread = threading.currentThread()
        fileLFNs = []
        try:
            # Do this in its own transaction
            myThread.transaction.begin()

            for block in readyBlocks:
                # First insert each block
                logging.info("Prepping block %s for DBS with status %s" % (block['Name'], block['open']))
                self.uploadToDBS.setBlockStatus(block = block['Name'],
                                                locations = [block['location']],
                                                openStatus = block['open'],
                                                time = int(block['CreationDate']))

                # Then insert files from each block
                blockFileList = []
                for f in block.get('newFiles', []):
                    blockFileList.append(f['lfn'])

                if len(blockFileList) > 0:
                    self.setBlock.execute(lfn = blockFileList,
                                          blockName = block['Name'],
                                          conn = myThread.transaction.conn,
                                          transaction = myThread.transaction)
                    fileLFNs.extend(blockFileList)

            if getattr(self.config.DBSUpload, 'abortStepTwo', False):
                # Blow the stack for testing purposes
                raise DBSUploadPollerException('None')

            logging.debug("Committing transaction at the end of DBSBuffer insertion.")
            myThread.transaction.commit()

        except WMException as ex:
            if getattr(myThread, 'transaction', None) != None:
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            msg =  'Error in committing blocks to DBSBuffer\n'
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            self.sendAlert(6, msg = msg)
            if getattr(myThread, 'transaction', None) != None:
                myThread.transaction.rollback()
            raise DBSUploadPollerException(msg)

        return fileLFNs
Esempio n. 10
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        # Aborting in step two should result in no results
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 0)

        config.DBSUpload.abortStepTwo = False
        config.DBSUpload.abortStepThree = True
        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('Pending', ), ('Open', )])
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1"
        )[0].fetchall()
        for res in result:
            self.assertEqual(res[0], 'READY')

        config.DBSUpload.abortStepThree = False
        self.injectWorkflow(MaxWaitTime=300)
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # After this, one block should have been uploaded, one should still be open
        # This is the result of the pending block updating, and the open block staying open
        result = myThread.dbi.processData(
            "SELECT status, id FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', 3), ('Open', 4)])

        # Check that one block got there
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['NumberOfFiles'], 10)
        self.assertEqual(result[0]['NumberOfEvents'], 200)
        self.assertEqual(result[0]['BlockSize'], 10240)

        # Check that ten files got there
        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 10)

        myThread.dbi.processData(
            "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1")
        testDBSUpload = DBSUploadPoller(config=config)
        time.sleep(3)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )])

        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 12)

        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        testDBSUpload.algorithm()
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('Open', )])

        time.sleep(5)
        testDBSUpload.algorithm()
        time.sleep(2)
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('InGlobalDBS', )])

        result = listDatasetFiles(apiRef=globeAPI,
                                  datasetPath='/%s/%s_2/%s' %
                                  (name, name, tier))
        self.assertEqual(len(result), 1)

        sys.stdout = originalOut
        sys.stderr = originalErr

        return
Esempio n. 11
0
    def testB_AlgoMigration(self):
        """
        _AlgoMigration_

        Test our ability to migrate multiple algos to global

        Do this by creating, mid-poll, two separate batches of files
        One with the same dataset but a different algo
        One with the same algo, but a different dataset
        See that they both get to global
        """
        #raise nose.SkipTest
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=20)
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)

        # Okay, by now, the first migration should have gone through.
        # Now create a second batch of files with the same dataset
        # but a different algo.
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch2-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName="cmsRun",
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH_PART2",
                                  configContent=self.configURL)
            testFile.setDatasetPath(datasetPath)
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Have to do things twice to get parents
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be two blocks
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 2)

        # Now create another batch of files with the original algo
        # But in a different dataset
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch3-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName=name,
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH",
                                  configContent=self.configURL)
            testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier))
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Do it twice for parentage.
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI,
                            datasetPath='/%s/%s_3/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)

        # Well, all the blocks got there, so we're done
        return
Esempio n. 12
0
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=3)
        config.DBSUpload.pollInterval = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('Open', )])

        # Check to see if datasets and algos are in local DBS
        result = listAlgorithms(apiRef=localAPI, patternExe=name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['ExecutableName'], name)
        result = listPrimaryDatasets(apiRef=localAPI, match=name)
        self.assertEqual(result, [name])
        result = listProcessedDatasets(apiRef=localAPI,
                                       primary=name,
                                       dataTier="*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef=localAPI, datasetPath=datasetPath)
        if affectedBlocks[0]['OpenForWriting'] == '0':
            self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2)
        else:
            self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef=localAPI, datasetPath=datasetPath)
        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI,
                             datasetPath='/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)

        # There should be one blocks in global
        # It should have ten files and be closed
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)
        for block in result:
            self.assertEqual(block['OpenForWriting'], '0')
            self.assertTrue(block['NumberOfFiles'] in [2, 10])

        # Okay, deep breath.  First round done
        # In the second round, the second block of the parent fileset should transfer
        # Make sure that the timeout functions work
        time.sleep(10)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )])

        # Check to make sure all the files are in global
        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI,
                             datasetPath='/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)

        # Third round
        # Both of the parent blocks should have transferred
        # So the child block should now transfer
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('Open', )])

        flag = False
        try:
            result = listDatasetFiles(apiRef=localAPI,
                                      datasetPath='/%s/%s_2/%s' %
                                      (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertFalse(flag)

        self.assertEqual(len(result), 1)

        return
Esempio n. 13
0
class DBSUploadPoller(BaseWorkerThread):
    """
    Handles poll-based DBSUpload

    """
    def __init__(self, config, dbsconfig=None):
        """
        Initialise class members

        """
        myThread = threading.currentThread()

        BaseWorkerThread.__init__(self)
        self.config = config

        # This is slightly dangerous, but DBSUpload depends
        # on DBSInterface anyway
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)

        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        self.uploadToDBS = factory.loadObject("UploadToDBS")

        addFactory = WMFactory("dbsBuffer",
                               "WMComponent.DBSBuffer.Database.Interface")
        self.addToBuffer = addFactory.loadObject("AddToBuffer")

        # Set DBSInterface
        self.dbsInterface = DBSInterface(config=config)

        # Set DAOs
        self.setBlock = self.bufferFactory(classname="DBSBufferFiles.SetBlock")
        self.setStatus = self.bufferFactory(
            classname="DBSBufferFiles.SetStatus")

        # Set config parameters
        self.doMigration = getattr(self.config.DBSInterface,
                                   'doGlobalMigration', True)

        if dbsconfig == None:
            self.dbsconfig = config

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="DBSUpload")

        return

    def algorithm(self, parameters=None):
        """
        Runs over all available DBSBuffer filesets/algos
        Commits them using DBSInterface
        Then checks blocks for timeout
        """
        logging.debug("Running subscription / fileset matching algorithm")
        try:
            self.sortBlocks()
            self.uploadBlocks()
        except WMException:
            raise
        except Exception as ex:
            msg = "Unhandled exception in DBSUploadPoller\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            self.sendAlert(6, msg=msg)
            raise DBSUploadPollerException(msg)
        return

    def terminate(self, params):
        """
        Do one more pass, then terminate

        """
        logging.debug("Terminating. Doing one more pass before we die")
        self.algorithm(params)
        return

    def sortBlocks(self):
        """
        _sortBlocks_

        Find new files to upload, sort them into blocks
        Save the blocks in DBSBuffer
        """
        myThread = threading.currentThread()

        # Grab all the Dataset-Algo combindations
        dasList = self.uploadToDBS.findUploadableDAS()
        logging.debug("Recovered %i DAS to upload" % len(dasList))

        for dasInfo in dasList:
            # Go one DAS at a time
            dasID = dasInfo['DAS_ID']
            logging.info("Processing DAS %i" % dasID)

            # Initial values
            readyBlocks = []
            fileLFNs = []

            # Get the dataset-algo information
            #algo    = createAlgoFromInfo(info = dasInfo)
            dataset = createDatasetFromInfo(info=dasInfo)

            # Get the files for the DAS
            files = self.uploadToDBS.findUploadableFilesByDAS(das=dasID)
            if len(files) < 1:
                # Then we have no files for this DAS
                logging.debug("DAS %i has no available files.  Continuing." %
                              dasID)
                continue

            # Load the blocks for the DAS
            blocks = self.uploadToDBS.loadBlocksByDAS(das=dasID)
            logging.debug("Retrieved %i files and %i blocks from DB." %
                          (len(files), len(blocks)))

            # Sort the files and blocks by location
            locationDict = sortListByKey(files, 'locations')
            blockDict = sortListByKey(blocks, 'location')
            logging.debug("Active DAS file locations: %s" %
                          locationDict.keys())
            logging.debug("Active Block file locations: %s" % blockDict.keys())

            try:
                # Sort files that are already in blocks
                # back into those blocks
                # pass by reference
                blockDict, locationDict = preassignBlocks(files=locationDict,
                                                          blocks=blockDict)

                # Now go over all the files
                for location in locationDict.keys():
                    # Split files into blocks
                    locFiles = locationDict.get(location, [])
                    locBlocks = blockDict.get(location, [])
                    locBlocks = self.splitFilesIntoBlocks(files=locFiles,
                                                          blocks=locBlocks,
                                                          dataset=dataset,
                                                          location=location)
                    readyBlocks.extend(locBlocks)
            except WMException:
                raise
            except Exception as ex:
                msg = "Unhandled exception while sorting files into blocks for DAS %i\n" % dasID
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                logging.debug("BlockDictionary: %s" % blockDict)
                logging.debug("FileDictionary: %s" % locationDict)
                raise DBSUploadPollerException(msg)

            # At this point, all blocks should be in readyBlocks
            # STEP TWO: Commit blocks to DBSBuffer
            fileLFNs = self.createBlocksInDBSBuffer(readyBlocks=readyBlocks)

            # Now we should have all the blocks in DBSBuffer
            # Time to set the status of the files

            lfnList = [x['lfn'] for x in files]
            self.setStatus.execute(lfns=lfnList,
                                   status="READY",
                                   conn=myThread.transaction.conn,
                                   transaction=myThread.transaction)

        # All files that were in NOTUPLOADED
        # And had uploaded parents
        # Should now be in assigned to blocks in DBSBuffer, and in the READY status
        return

    def uploadBlocks(self):
        """
        _uploadBlocks_

        Load all OPEN blocks out of the database with all their necessary files
        Once we have the blocks, determine which ones are ready to be uploaded
        Also determine which ones are ready to be migrated
        Upload them
        """
        myThread = threading.currentThread()

        # Get the blocks
        # This should grab all Pending and Open blocks
        blockInfo = self.uploadToDBS.loadBlocks()
        blocks = []

        if len(blockInfo) < 1:
            # Then we have no block, and probably no files
            logging.info("No blocks in this iteration.  Returning")
            return

        # Assemble the blocks
        for info in blockInfo:
            block = createBlock(datasetPath='blank', location='blank')
            block['id'] = info['id']
            block['das'] = info['das']
            block['Name'] = info['blockname']
            block['CreationDate'] = info['create_time']
            block['open'] = info['open']
            block['MaxCloseTime'] = info['block_close_max_wait_time']
            blocks.append(block)

        dasIDs = []
        for block in blocks:
            if block['das'] not in dasIDs:
                dasIDs.append(block['das'])

        dasAlgoDataset = {}
        dasAlgoInfo = self.uploadToDBS.loadDASInfoByID(ids=dasIDs)
        for dasInfo in dasAlgoInfo:
            algo = createAlgoFromInfo(info=dasInfo)
            dataset = createDatasetFromInfo(info=dasInfo)
            dasAlgoDataset[dasInfo['DAS_ID']] = {
                'dataset': dataset,
                'algo': algo
            }

        # At this point we should have the dataset and algo information
        # The blocks
        # And the files
        # Time to sort the files into blocks

        # the counter / watcher of the alertUploadQueueSize to possibly send alerts
        alertUploadQueueSize = getattr(self.config.DBSUpload,
                                       "alertUploadQueueSize", None)
        alertUploadQueueSizeCounter = 0
        for block in blocks:
            files = self.uploadToDBS.loadFilesFromBlocks(blockID=block['id'])
            for f in files:
                if f['blockID'] == block['id']:
                    # Put file in this block
                    logging.debug("Setting file %s to block %s" %
                                  (f['lfn'], block['Name']))
                    block['newFiles'].append(f)
                    alertUploadQueueSizeCounter += 1

        # check alertUploadQueueSize threshold (alert condition)
        if alertUploadQueueSize:
            if alertUploadQueueSizeCounter >= int(alertUploadQueueSize):
                msg = ("DBS upload queue size (%s) exceeded configured "
                       "threshold (%s)." %
                       (alertUploadQueueSizeCounter, alertUploadQueueSize))
                self.sendAlert(6, msg=msg)

        # Check for block timeout
        for block in blocks:
            if time.time() - block['CreationDate'] > block['MaxCloseTime']:
                logging.info(
                    "Setting status to Pending due to timeout for block %s" %
                    block['Name'])
                block['open'] = 'Pending'

        # Should have files in blocks, now assign them to DAS
        for dasID in dasAlgoDataset.keys():
            readyBlocks = []
            dataset = dasAlgoDataset[dasID]['dataset']
            algo = dasAlgoDataset[dasID]['algo']
            for block in blocks:
                if len(block['newFiles']) > 0:
                    # Assign a location from the files
                    logging.debug("Block %s has %i files" %
                                  (block['Name'], len(block['newFiles'])))
                    block['location'] = list(
                        block['newFiles'][0]['locations'])[0]
                if block['das'] == dasID:
                    if block['open'] == 'Pending':
                        # Always attach pending blocks
                        logging.debug("Attaching block %s" % block['Name'])
                        readyBlocks.append(block)
                    elif len(block['newFiles']) > 0:
                        # Else you only deal with blocks if they have new files
                        logging.debug("Attaching block %s" % block['Name'])
                        readyBlocks.append(block)

            if len(readyBlocks) < 1:
                # Nothing to do
                logging.debug("Nothing to do for DAS %i in uploadBlocks" %
                              dasID)
                continue

            try:
                # Now do the real action of transferring crap
                # Damn it Anzar: Why does DBS print stuff out?

                for singleBlock in readyBlocks:
                    originalOut = sys.stdout
                    originalErr = sys.stderr
                    sys.stdout = open(os.devnull, 'w')
                    sys.stderr = open(os.devnull, 'w')

                    if getattr(self.config.DBSUpload, 'abortStepThree', False):
                        # Blow the stack for testing purposes
                        raise DBSUploadPollerException('None')

                    logging.info(
                        "About to upload to DBS for DAS %i with %i blocks" %
                        (dasID, len(readyBlocks)))
                    affBlocks = self.dbsInterface.runDBSBuffer(
                        algo=algo, dataset=dataset, blocks=[singleBlock])

                    sys.stdout = originalOut
                    sys.stderr = originalErr

                    # Update DBSBuffer with current information
                    myThread.transaction.begin()

                    for block in affBlocks:
                        logging.info(
                            "Successfully inserted %i files for block %s." %
                            (len(block['insertedFiles']), block['Name']))
                        self.uploadToDBS.setBlockStatus(
                            block=block['Name'],
                            locations=[block['location']],
                            openStatus=block['open'])
                        if block[
                                'open'] == 'InGlobalDBS' or not self.doMigration:
                            # Set block files as in global if they've been migrated.
                            # If we aren't doing global migrations, all files are in global
                            logging.debug("Block %s now listed in global DBS" %
                                          block['Name'])
                            self.uploadToDBS.closeBlockFiles(
                                blockname=block['Name'], status='GLOBAL')
                        else:
                            logging.debug(
                                "Block %s now uploaded to local DBS" %
                                block['Name'])
                            self.uploadToDBS.closeBlockFiles(
                                blockname=block['Name'], status='LOCAL')

                    logging.debug(
                        "About to do post-upload DBS commit for DAS %i" %
                        dasID)
                    myThread.transaction.commit()

            # New plan: If we get an error in trying to commit a block to DBS
            # then we just rollback the transaction and continue to the next
            # block - ignoring the exception
            except WMException:
                if getattr(myThread, 'transaction', None) != None:
                    myThread.transaction.rollbackForError()
                pass
                #raise
            except Exception as ex:
                msg = 'Error in committing files to DBS\n'
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                if getattr(myThread, 'transaction', None) != None:
                    myThread.transaction.rollbackForError()
                pass
                #raise DBSUploadPollerException(msg)

        return

    def splitFilesIntoBlocks(self, files, blocks, dataset, location):
        """
        Break the files into blocks based on config params

        Create a new block when necessary.
        """

        blocksToHandle = []

        if len(blocks) > 1:
            # Well, then we have a bit of a problem
            # Decide what to do about this later
            logging.error("More then one open block for this DAS")

        if len(blocks) == 0:
            currentBlock = createBlock(datasetPath=dataset['Path'],
                                       location=location)
        else:
            currentBlock = blocks[0]

        for newFile in files:
            # Check to see if blocks are full
            if not self.isBlockOpen(block=currentBlock):
                # Add old block to return list
                # Create a new block
                currentBlock['open'] = 'Pending'
                blocksToHandle.append(currentBlock)
                currentBlock = createBlock(datasetPath=dataset['Path'],
                                           location=location)

            # Check if the file has the closing settings, otherwise plug in the settings from the newFile
            if currentBlock['MaxCloseTime'] is None or currentBlock['MaxCloseFiles'] is None \
                or currentBlock['MaxCloseSize'] is None or currentBlock['MaxCloseEvents'] is None:
                currentBlock['MaxCloseTime'] = newFile[
                    'block_close_max_wait_time']
                currentBlock['MaxCloseEvents'] = newFile[
                    'block_close_max_events']
                currentBlock['MaxCloseFiles'] = newFile[
                    'block_close_max_files']
                currentBlock['MaxCloseSize'] = newFile['block_close_max_size']

            # Now process the file
            currentBlock['newFiles'].append(newFile)
            currentBlock['BlockSize'] += newFile['size']
            currentBlock['NumberOfFiles'] += 1
            currentBlock['NumberOfEvents'] += newFile['events']

        if currentBlock['NumberOfFiles'] > 0:
            blocksToHandle.append(currentBlock)

        return blocksToHandle

    def isBlockOpen(self, block):
        """
        _isBlockOpen_

        Tells you if the block should be closed
        """
        if block['MaxCloseTime'] is None or block['MaxCloseFiles'] is None \
            or block['MaxCloseSize'] is None or block['MaxCloseEvents'] is None:
            return True

        if time.time() - int(block.get('CreationDate',
                                       0)) >= block['MaxCloseTime']:
            # We've timed out on this block
            return False
        if block['NumberOfFiles'] >= block['MaxCloseFiles']:
            # We've got too many files
            return False
        if float(block.get('BlockSize')) >= block['MaxCloseSize']:
            # Block is too big
            return False
        if block['NumberOfEvents'] >= block['MaxCloseEvents']:
            # Too many events in the block
            return False

        return True

    def createBlocksInDBSBuffer(self, readyBlocks):
        """
        _createBlocksInDBSBuffer_

        Create the blocks in the local database in
        their initial states.
        """
        myThread = threading.currentThread()
        fileLFNs = []
        try:
            # Do this in its own transaction
            myThread.transaction.begin()

            for block in readyBlocks:
                # First insert each block
                logging.info("Prepping block %s for DBS with status %s" %
                             (block['Name'], block['open']))
                self.uploadToDBS.setBlockStatus(block=block['Name'],
                                                locations=[block['location']],
                                                openStatus=block['open'],
                                                time=int(
                                                    block['CreationDate']))

                # Then insert files from each block
                blockFileList = []
                for f in block.get('newFiles', []):
                    blockFileList.append(f['lfn'])

                if len(blockFileList) > 0:
                    self.setBlock.execute(lfn=blockFileList,
                                          blockName=block['Name'],
                                          conn=myThread.transaction.conn,
                                          transaction=myThread.transaction)
                    fileLFNs.extend(blockFileList)

            if getattr(self.config.DBSUpload, 'abortStepTwo', False):
                # Blow the stack for testing purposes
                raise DBSUploadPollerException('None')

            logging.debug(
                "Committing transaction at the end of DBSBuffer insertion.")
            myThread.transaction.commit()

        except WMException as ex:
            if getattr(myThread, 'transaction', None) != None:
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            msg = 'Error in committing blocks to DBSBuffer\n'
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            self.sendAlert(6, msg=msg)
            if getattr(myThread, 'transaction', None) != None:
                myThread.transaction.rollback()
            raise DBSUploadPollerException(msg)

        return fileLFNs
Esempio n. 14
0
    def testB_AlgoMigration(self):
        """
        _AlgoMigration_

        Test our ability to migrate multiple algos to global

        Do this by creating, mid-poll, two separate batches of files
        One with the same dataset but a different algo
        One with the same algo, but a different dataset
        See that they both get to global
        """
        #raise nose.SkipTest
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 20)
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)


        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # There should now be one block
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)

        # Okay, by now, the first migration should have gone through.
        # Now create a second batch of files with the same dataset
        # but a different algo.
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-batch2-%i' %(name, i), size = 1024,
                                     events = 20, checksums = {'cksum': 1},
                                     locations = "malpaquet")
            testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1",
                                  appFam = tier, psetHash = "GIBBERISH_PART2",
                                  configContent = self.configURL)
            testFile.setDatasetPath(datasetPath)
            testFile.addRun(Run( 1, *[46]))
            testFile.create()


        # Have to do things twice to get parents
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be two blocks
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 2)


        # Now create another batch of files with the original algo
        # But in a different dataset
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-batch3-%i' %(name, i), size = 1024,
                                     events = 20, checksums = {'cksum': 1},
                                     locations = "malpaquet")
            testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = tier, psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier))
            testFile.addRun(Run( 1, *[46]))
            testFile.create()

        # Do it twice for parentage.
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()


        # There should now be one block
        result    = listBlocks(apiRef = globeAPI, datasetPath = '/%s/%s_3/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)


        # Well, all the blocks got there, so we're done
        return
Esempio n. 15
0
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 3)
        config.DBSUpload.pollInterval  = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('Open',)])

        # Check to see if datasets and algos are in local DBS
        result  = listAlgorithms(apiRef = localAPI, patternExe = name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['ExecutableName'], name)
        result  = listPrimaryDatasets(apiRef = localAPI, match = name)
        self.assertEqual(result, [name])
        result    = listProcessedDatasets(apiRef = localAPI, primary = name, dataTier = "*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef = localAPI, datasetPath = datasetPath)
        if affectedBlocks[0]['OpenForWriting'] == '0':
            self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2)
        else:
            self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef = localAPI, datasetPath = datasetPath)
        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef = localAPI,
                             datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception, ex:
            flag = True
Esempio n. 16
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        # Aborting in step two should result in no results
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 0)

        config.DBSUpload.abortStepTwo   = False
        config.DBSUpload.abortStepThree = True
        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass


        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('Pending',), ('Open',)])
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall()
        for res in result:
            self.assertEqual(res[0], 'READY')

        config.DBSUpload.abortStepThree     = False
        self.injectWorkflow(MaxWaitTime = 300)
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # After this, one block should have been uploaded, one should still be open
        # This is the result of the pending block updating, and the open block staying open
        result = myThread.dbi.processData("SELECT status, id FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', 3L), ('Open', 4L)])

        # Check that one block got there
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['NumberOfFiles'], 10)
        self.assertEqual(result[0]['NumberOfEvents'], 200)
        self.assertEqual(result[0]['BlockSize'], 10240)

        # Check that ten files got there
        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 10)

        myThread.dbi.processData("UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1")
        testDBSUpload = DBSUploadPoller(config = config)
        time.sleep(3)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',)])

        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 12)

        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        testDBSUpload.algorithm()
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('Open',)])

        time.sleep(5)
        testDBSUpload.algorithm()
        time.sleep(2)
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('InGlobalDBS',)])

        result = listDatasetFiles(apiRef = globeAPI,
                                  datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)

        sys.stdout = originalOut
        sys.stderr = originalErr

        return