Exemple #1
0
def _genFileKey(fileInfo):
    """
    Generate a dictionary key from information in the File Info object,
    which is either a list with information from ngas_files, or an
    ngamsFileInfo object.

    fileInfo:       File Info as read from the ngas_files table or
                    an instance of ngamsFileInfo (list|ngamsFileInfo).

    Returns:        File key (string).
    """
    if isinstance(fileInfo, (list, tuple)):
        fileId = fileInfo[ngamsDbCore.NGAS_FILES_FILE_ID]
        fileVer = fileInfo[ngamsDbCore.NGAS_FILES_FILE_VER]
    else:
        fileId = fileInfo.getFileId()
        fileVer = fileInfo.getFileVersion()
    return ngamsLib.genFileKey(None, fileId, fileVer)
Exemple #2
0
def checkFileCopiesAndReg(srvObj,
                          minReqCopies,
                          dbFilePat,
                          fileListDbmName=None,
                          diskId=None,
                          ignoreMounted=0):
    """
    The function checks for each file referenced if there are at least
    'minReqCopies' copies available somewhere in this NGAS cluster. For the
    files where this is not the case, an entry is added in a ngasDiskInfo
    object indicating that this file

    If an entire disk is analysed (about to be deleted), it is also checked if
    each file stored on the disk is registered in the DB. Otherwise an NG/AMS
    File List is returned, containing references to the files not registered.

    srvObj:          Instance of the NG/AMS Server Class (ngamsServer).

    minReqCopies:    Minimum number of copies required (integer).

    dbFilePat:       Filename pattern used to build the DBM containing
                     information about the files (string).

    fileListDbmName: Name of DBM DB containing explicit references to files
                     to be checked if they can be deleted. The information
                     in this table is pickled lists with the lay-out defined
                     by ngamsDb._ngasFilesCols (string).

    diskId:          Used to refer to all files stored on a disk (string|None).

    ignoreMounted:   Carry out the check also if the disk is not mounted
                     (integer/0|1).

    Returns:         Tuple contaning the filenames of three DBM DBs with the
                     following information:

                       o Files not having the specified number of copies.
                         The contents of this DB are keys (Disk ID + File ID +
                         File Version), pointing to pickled ngamsFileInfo
                         objects.

                       o Files found on the disk but not registered.
                         This contains the complete filenames of files found
                         on the disk, which are not registered. These filenames
                         are the keys of this DBM DB.

                       o Complete list of files referenced in connection
                         with the query. The contents of this DB are keys,
                         which are a simple counter pointing to pickled
                         list containing the information as returned by
                         ngamsDb.getFileSummary1().

                     (tuple/string).
    """
    T = TRACE()

    if ((not fileListDbmName) and (not diskId)):
        errMsg = "ngamsSrvUtils.checkFileCopiesAndReg(): Must specify " +\
                 "either a DBM with files to be checked or a Disk ID"
        logger.warning(errMsg)
        raise Exception(errMsg)

    # Create DBMs:

    # - DB containing information about files having less then the
    # specified number of copies.
    fileMisCopyDbmName = os.path.normpath(dbFilePat + "_MISSING_COPIES")
    fileMisCopyDbm = ngamsDbm.ngamsDbm(fileMisCopyDbmName, writePerm=1)

    # - DB that contains information about files stored on the DB,
    # which are not registered in the NGAS DB. At the end of the function,
    # this will contain information about files found on the disk but
    # not registered in the NGAS DB.
    filesOnDiskDicDbmName = os.path.normpath(dbFilePat + "_FILES_ON_DISK")
    filesOnDiskDicDbm = ngamsDbm.ngamsDbm(filesOnDiskDicDbmName, writePerm=1)

    # - DB with information about files referenced by the query.
    if (not fileListDbmName):
        locFileListDbmName = os.path.normpath(dbFilePat + "_FILE_LIST")
        fileListDbm = ngamsDbm.ngamsDbm(locFileListDbmName, writePerm=1)
    else:
        fileListDbm = ngamsDbm.ngamsDbm(fileListDbmName, writePerm=0)

    # - Temporary DBM containing information about all File IDs defined
    # by the query.
    fileIdDbmName = os.path.normpath(dbFilePat + "_FILE_IDS")
    fileIdDbm = ngamsDbm.ngamsDbm(fileIdDbmName, writePerm=1)

    # - Temporary DBM containing information about all files available in
    # the system with the File ID/File Version defined by the query.
    complFileListDbmName = os.path.normpath(dbFilePat + "_COMPL_FILE_LIST")
    complFileListDbm = ngamsDbm.ngamsDbm(complFileListDbmName, writePerm=1)

    # - Temporary DBM that is used to figure out the number of independent
    # copies of each file concerned by the query.
    checkDicDbmName = os.path.normpath(dbFilePat + "_CHECK_DIC")
    checkDicDbm = ngamsDbm.ngamsDbm(checkDicDbmName, writePerm=1)

    # A Disk ID but no file references are given. Retrieve information
    # about files concerned from the DB.
    if (diskId):
        logger.debug("Retrieving information about files on disk with ID: %s",
                     diskId)
        for f in srvObj.db.getFileSummary1(None, [diskId], [],
                                           ignore=0,
                                           fileStatus=[]):
            fileListDbm.addIncKey(f)
            fileId = f[ngamsDbCore.SUM1_FILE_ID]
            fileIdDbm.add(str(fileId), "")
            fileVersion = f[ngamsDbCore.SUM1_VERSION]
            fileKey = ngamsLib.genFileKey(None, fileId, fileVersion)
            checkDicDbm.add(fileKey, {})
        fileListDbm.sync()
        fileIdDbm.sync()
        checkDicDbm.sync()

        # Get the list of files located on the disk. Later on, remove entries
        # from this dictionary as the files are parsed, based on their DB info,
        # further down in this method.
        #
        # Key in this dictionary is the complete filename of the file.
        logger.debug("Get list of files stored on disk ...")
        tmpDiskInfo = srvObj.getDb().getDiskInfoFromDiskId(diskId)
        diskInfoObj = ngamsDiskInfo.ngamsDiskInfo().\
                      unpackSqlResult(tmpDiskInfo)
        if ((not ignoreMounted) and (not diskInfoObj.getMounted())):
            errMsg = "Rejecting request for removing disk with ID: " +\
                     diskId + " - disk not mounted!"
            raise Exception(errMsg)
        if (not ignoreMounted):
            basePath = os.path.normpath(diskInfoObj.getMountPoint())
            pattern = "/*"
            logger.debug(
                "Generating list with files on disk with base path: %s",
                basePath)
            while (1):
                tmpFileList = glob.glob(basePath + pattern)
                if (len(tmpFileList) == 0):
                    break
                else:
                    for filename in tmpFileList:
                        if (os.path.isfile(filename) and
                            (os.path.basename(filename) != NGAMS_DISK_INFO)
                                and (os.path.basename(filename) !=
                                     NGAMS_VOLUME_ID_FILE)
                                and (os.path.basename(filename) !=
                                     NGAMS_VOLUME_INFO_FILE)):
                            filesOnDiskDicDbm.add(str(filename), "")
                    pattern += "/*"

    # Generate File ID DBM in case a file list DBM is given.
    if (fileListDbmName):
        logger.debug("Handling file list DBM given in the function call ...")
        fileListDbm.initKeyPtr()
        while (1):
            key, tmpFileInfo = fileListDbm.getNext()
            if (not key): break

            # Update the File ID DBM.
            fileId = tmpFileInfo[ngamsDbCore.SUM1_FILE_ID]
            fileIdDbm.add(str(fileId), "")

            # Update the DBM with references to File ID/Version sets.
            fileVersion = tmpFileInfo[ngamsDbCore.SUM1_VERSION]
            fileKey = ngamsLib.genFileKey(None, fileId, fileVersion)
            checkDicDbm.add(fileKey, {})
        fileIdDbm.sync()
        checkDicDbm.sync()

    # We need to generate a list with all files available in the system
    # with the given File ID/File Version.
    logger.debug("Retrieving information about all files available with the " +\
         "File ID/File Version as defined by the query")

    # Due to the limitation of the size of SQL queries, we have to split up
    # the SQL query in several sub-queries. The max. length of an SQL query
    # is defined by NGAMS_MAX_SQL_QUERY_SZ, we subtract 512 from this for
    # the general part of the query, and for each filename we calculate a
    # length of len(File ID) + 4 as contribution to the SQL query.
    maxQuerySize = (NGAMS_MAX_SQL_QUERY_SZ - 512)
    queryIds = []
    querySize = 0
    noOfFileIds = fileIdDbm.getCount()
    fileIdCount = 0
    fileIdDbm.initKeyPtr()
    fileId = "INIT"
    while (fileId):
        fileId, dummy = fileIdDbm.getNext()
        if (fileId):
            queryIds.append(utils.b2s(fileId))
            fileIdCount += 1
            querySize += (len(fileId) + 4)

        if queryIds and (querySize >= maxQuerySize
                         or fileIdCount == noOfFileIds):
            for f in srvObj.db.getFileSummary1(fileIds=queryIds,
                                               fileStatus=[]):
                # Take only a sub-result if that File ID + Version
                # is concerned by the query.
                tmpFileId = f[ngamsDbCore.SUM1_FILE_ID]
                tmpFileVersion = f[ngamsDbCore.SUM1_VERSION]
                tmpFileKey = ngamsLib.genFileKey(None, tmpFileId,
                                                 tmpFileVersion)
                if (checkDicDbm.hasKey(tmpFileKey)):
                    complFileListDbm.addIncKey(f)
            complFileListDbm.sync()
            queryIds = []
            querySize = 0

    # Now, go through the files found and order these such that we end up with
    # a Dictionary with "<File ID>_<File Version>" as keys referring
    # to a dictionary with the Disk IDs of the disks hosting the files as
    # keys, and the information for each file on that disk as a tupple.
    #
    # It is the intention to figure out how many copies we have of each file
    # identified by File ID + File Version stored ON DIFFERENT STORAGE MEDIAS
    # + on different hosts.
    logger.debug("Generate DBM DB with info about independent file copies ...")
    complFileListDbm.initKeyPtr()
    while (1):
        fileKey, fileInfo = complFileListDbm.getNext()
        if (not fileKey): break
        checkDicKey = ngamsLib.genFileKey(None,
                                          fileInfo[ngamsDbCore.SUM1_FILE_ID],
                                          fileInfo[ngamsDbCore.SUM1_VERSION])
        tmpDic = checkDicDbm.get(checkDicKey)
        tmpDic[fileInfo[ngamsDbCore.SUM1_DISK_ID]] = fileInfo
        checkDicDbm.add(checkDicKey, tmpDic)

    # Check if there are at least minReqCopies occurrences of the files +
    # check that all files are registered (if a Disk ID is specified).
    logger.debug("Check for files with less copies than: %s",
                 str(minReqCopies))
    checkDicDbm.initKeyPtr()
    while (1):
        checkDicKey, tmpDic = checkDicDbm.getNext()
        if (not checkDicKey): break

        tmpDicKeys = list(tmpDic)
        noOfCopies = len(tmpDicKeys)
        if (noOfCopies < minReqCopies):
            tmpFileInfo = tmpDic[tmpDicKeys[0]]
            fileId = tmpFileInfo[ngamsDbCore.SUM1_FILE_ID]
            fileVersion = tmpFileInfo[ngamsDbCore.SUM1_VERSION]
            tmpFileObj = ngamsFileInfo.ngamsFileInfo().\
                         setFileId(fileId).\
                         setFileVersion(fileVersion).\
                         setTag("Independent copies: " + str(noOfCopies))
            fileKey = ngamsLib.genFileKey(None, fileId, fileVersion)
            fileMisCopyDbm.add(str(fileKey), tmpFileObj)

        # Remove this file from the Files On Disk DBM - do this only
        # if a Disk ID is specified.
        if (diskId):
            if (diskId in tmpDic):
                fileInfo = tmpDic[diskId]
                filename = os.path.\
                           normpath(fileInfo[ngamsDbCore.SUM1_MT_PT] +\
                                    "/" + fileInfo[ngamsDbCore.SUM1_FILENAME])
                filename = str(filename)
                if (filesOnDiskDicDbm.hasKey(filename)):
                    filesOnDiskDicDbm.rem(filename)

    # Close all DBM objects.
    del fileMisCopyDbm
    del filesOnDiskDicDbm
    del fileListDbm
    del fileIdDbm
    del complFileListDbm
    del checkDicDbm

    # The DBM filesOnDiskDicDbmName now contains references to files,
    # which are found on the disk but not registered in the DB.
    return (fileMisCopyDbmName, filesOnDiskDicDbmName, complFileListDbmName)
Exemple #3
0
def cloneCheckDiskSpace(srvObj, cloneListDbmName, tmpFilePat, targetDiskId=""):
    """
    Check if there is enough disk space available on this NGAS host for
    carrying out the Clone Request.

    srvObj:           Reference to instance of Server Object (ngamsServer).

    cloneListDbmName: Name of DBM containing the information about
                      the files to be cloned. This DB has an index number
                      as key pointing to pickled information about each file.
                      This pickled information is

                        [<File Info Object>, <Host ID>, <Mount Point>]

                      (string)

    tmpFilePat:       File pattern to be used for generating temporary
                      files (string).

    targetDiskId:     ID of disk to where the files cloned should be
                      written (string).

    Returns:          Void.
    """
    T = TRACE()

    # Make a dictionary with the available amount of space per disk.
    logger.debug("Generating dictionary with available space per disk")
    availDiskSpaceDic = {}
    mtRootDir = srvObj.getCfg().getRootDirectory()
    if (targetDiskId):
        tmpDiskInfoObjList = [ngamsDiskInfo.ngamsDiskInfo().\
                              read(srvObj.getDb(), targetDiskId)]
    else:
        tmpDiskInfoObjList = ngamsDiskUtils.\
                             getDiskInfoForMountedDisks(srvObj.getDb(),
                                                        srvObj.getHostId(),
                                                        mtRootDir)
    for diskInfoObj in tmpDiskInfoObjList:
        mtPt = diskInfoObj.getMountPoint()
        diskId = diskInfoObj.getDiskId()
        availDiskSpaceDic[diskId] = getDiskSpaceAvail(mtPt, format="B")

    # Now simulate the execution of the clone job by going sequentially
    # through the files selected for cloning and subtract the required
    # disk space from the available amount for each disk.
    #
    # When a disk reaches the threshold for considering the disk as
    # completed, it is removed from the list of available disks.

    cloneListDbm = ngamsDbm.ngamsDbm(cloneListDbmName)

    # This dictionary contains a list of files that have been simulated
    # successfully cloned. Since they are not updated in the DB, we keep
    # track of this locally.
    cloneSucDbmName = tmpFilePat + "_CLONE_SUCCESS_DB"
    rmFile(cloneSucDbmName + "*")
    cloneSucDbm = ngamsDbm.ngamsDbm(cloneSucDbmName,
                                    cleanUpOnDestr=1,
                                    writePerm=1)

    # This is used to keep track of the files that could not be cloned
    # due to lack of space.
    cloneFailDbmName = tmpFilePat + "_CLONE_FAILED_DB"
    rmFile(cloneFailDbmName + "*")
    cloneFailDbm = ngamsDbm.ngamsDbm(cloneFailDbmName,
                                     cleanUpOnDestr=1,
                                     writePerm=1)

    # This is used to simulate disk completion. If a specific target
    # disk is defined, all other disks should be ignored (=we set them to
    # completed).
    cloneSimDiskCompl = []
    if (targetDiskId):
        tmpDiskList = ngamsDiskUtils.\
                      getDiskInfoForMountedDisks(srvObj.getDb(), srvObj.getHostId(),
                                                 mtRootDir)
        for idx in range(len(tmpDiskList)):
            if (tmpDiskList[idx].getDiskId() != targetDiskId):
                cloneSimDiskCompl.append(tmpDiskList[idx].getDiskId())

    # Carry out the simulated clone process.
    ngamsDiskUtils.findTargetDiskResetCache()
    key = 0
    while (1):
        if (not cloneListDbm.hasKey(str(key))): break
        fileInfo = cloneListDbm.get(str(key))
        key += 1
        fio = fileInfo[0]
        hostName = fileInfo[1]

        text = "Simulating cloning of file - File ID: %s/%d, on disk " +\
               "with ID: %s on host: %s"
        logger.debug(text, fio.getFileId(), fio.getFileVersion(),
                     fio.getDiskId(), hostName)

        diskExemptList = cloneSimDiskCompl + [fio.getDiskId()]
        trgDiskInfo = None
        while (1):
            try:
                trgDiskInfo = ngamsDiskUtils.\
                              findTargetDisk(srvObj.getHostId(),
                                             srvObj.getDb(), srvObj.getCfg(),
                                             fio.getFormat(), 0,
                                             diskExemptList, 1)
            except Exception, e:
                if (str(e).find("NGAMS_AL_NO_STO_SETS") != -1):
                    # No more candidate Target Disks for this type
                    # of data - this file cannot be cloned.
                    cloneFailDbm.addIncKey(fio)
                break

            # Check if a file with that ID + version is already
            # stored on the selected Target Disk.
            fileInDb = srvObj.getDb().fileInDb(trgDiskInfo.getDiskId(),
                                               fio.getFileId(),
                                               fio.getFileVersion())
            fileKey = ngamsLib.genFileKey(trgDiskInfo.getDiskId(),
                                          fio.getFileId(),
                                          fio.getFileVersion())
            fileSimCloned = cloneSucDbm.hasKey(fileKey)
            if (fileInDb or fileSimCloned):
                # This file is already stored on the given disk.
                # Add to the exempt list.
                diskExemptList.append(trgDiskInfo.getDiskId())
            else:
                # OK, this disk should be OK, stop looking for a
                # suitable Target Disk.
                logger.debug("Found suitable disk with ID: %s/Slot ID: %s",
                             trgDiskInfo.getDiskId(), trgDiskInfo.getSlotId())
                cloneSucDbm.add(fileKey, "")
                break

        # We now subtract the size of the file from the available amount of
        # disk space for the selected Target Disk. When the amount of available
        # space goes below the threshold defined for this NG/AMS system that
        # disk is considered to be completed.
        if (trgDiskInfo):
            diskId = trgDiskInfo.getDiskId()
            availDiskSpaceDic[diskId] -= float(fio.getFileSize())
            if ((availDiskSpaceDic[diskId] / 1048576.0) < \
                srvObj.getCfg().getFreeSpaceDiskChangeMb()):
                cloneSimDiskCompl.append(diskId)
Exemple #4
0
def checkUpdateDbSnapShots(srvObj, stopEvt):
    """
    Check if a DB Snapshot exists for the DB connected. If not, this is
    created according to the contents of the NGAS DB (if possible). During
    this creation it is checked if the file are physically stored on the
    disk.

    srvObj:        Reference to NG/AMS server class object (ngamsServer).

    Returns:       Void.
    """
    snapshotDbm = None
    tmpSnapshotDbm = None

    if (not srvObj.getCfg().getDbSnapshot()):
        logger.debug("NOTE: DB Snapshot Feature is switched off")
        return

    logger.debug("Generate list of disks to check ...")
    tmpDiskIdMtPtList = srvObj.getDb().getDiskIdsMtPtsMountedDisks(
        srvObj.getHostId())
    diskIdMtPtList = []
    for diskId, mtPt in tmpDiskIdMtPtList:
        diskIdMtPtList.append([mtPt, diskId])
    diskIdMtPtList.sort()
    logger.debug("Generated list of disks to check: %s", str(diskIdMtPtList))

    # Generate temporary snapshot filename.
    ngasId = srvObj.getHostId()
    tmpDir = ngamsHighLevelLib.getTmpDir(srvObj.getCfg())

    # Temporary DBM with file info from the DB.
    tmpSnapshotDbmName = os.path.normpath(tmpDir + "/" + ngasId + "_" +\
                                          NGAMS_DB_NGAS_FILES)

    # Temporary DBM to contain information about 'lost files', i.e. files,
    # which are registered in the DB and found in the DB Snapshot, but
    # which are not found on the disk.
    logger.debug("Create DBM to hold information about lost files ...")
    lostFileRefsDbmName = os.path.normpath(tmpDir + "/" + ngasId +\
                                           "_LOST_FILES")
    rmFile(lostFileRefsDbmName + "*")
    lostFileRefsDbm = ngamsDbm.ngamsDbm(lostFileRefsDbmName, writePerm=1)

    # Carry out the check.
    for mtPt, diskId in diskIdMtPtList:

        checkStopJanitorThread(stopEvt)

        logger.debug("Check/create/update DB Snapshot for disk with " +\
             "mount point: %s", mtPt)

        try:
            snapshotDbm = _openDbSnapshot(srvObj.getCfg(), mtPt)
            if (snapshotDbm == None):
                continue

            # The scheme for synchronizing the Snapshot and the DB is:
            #
            # - Loop over file entries in the Snapshot:
            #  - If in DB:
            #    - If file on disk     -> OK, do nothing.
            #    - If file not on disk -> Accumulate + issue collective warning.
            #
            #  - If entry not in DB:
            #    - If file on disk     -> Add entry in DB.
            #    - If file not on disk -> Remove entry from Snapshot.
            #
            # - Loop over entries for that disk in the DB:
            #  - If entry in Snapshot  -> OK, do nothing.
            #  - If entry not in Snapshot:
            #    - If file on disk     -> Add entry in Snapshot.
            #    - If file not on disk -> Remove entry from DB.

            # Create a temporary DB Snapshot with the files from the DB.
            try:
                rmFile(tmpSnapshotDbmName + "*")
                tmpSnapshotDbm = bsddb.hashopen(tmpSnapshotDbmName, "c")

                for fileInfo in srvObj.db.getFileInfoList(diskId, ignore=None):
                    fileKey = _genFileKey(fileInfo)
                    encFileInfoDic = _encFileInfo(srvObj.getDb(),
                                                  tmpSnapshotDbm, fileInfo)
                    _addInDbm(tmpSnapshotDbm, fileKey, encFileInfoDic)
                    checkStopJanitorThread(stopEvt)
                tmpSnapshotDbm.sync()
            except:
                rmFile(tmpSnapshotDbmName)
                raise

            #####################################################################
            # Loop over the possible entries in the DB Snapshot and compare
            # these against the DB.
            #####################################################################
            logger.debug("Loop over file entries in the DB Snapshot - %s ...",
                         diskId)
            count = 0
            try:
                key, pickleValue = snapshotDbm.first()
            except Exception as e:
                msg = "Exception raised accessing DB Snapshot for disk: %s. " +\
                      "Error: %s"
                logger.debug(msg, diskId, str(e))
                key = None
                snapshotDbm.dbc = None

            # Create a DBM which is used to keep the list of files to remove
            # from the DB Snapshot.
            snapshotDelDbmName = ngamsHighLevelLib.\
                                 genTmpFilename(srvObj.getCfg(),
                                                NGAMS_DB_NGAS_FILES)
            snapshotDelDbm = ngamsDbm.ngamsDbm(snapshotDelDbmName,
                                               cleanUpOnDestr=1,
                                               writePerm=1)

            #################################################################################################
            #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
            #          when iterating at the end of the table that are prone to corrupt the hash table object
            #while (key):
            for key, pickleValue in snapshotDbm.iteritems():
                #################################################################################################
                value = cPickle.loads(pickleValue)

                # Check if an administrative element, if yes add it if necessary.
                if b"___" in key:
                    if (not tmpSnapshotDbm.has_key(key)):
                        tmpSnapshotDbm[key] = pickleValue
                else:
                    tmpFileObj = _encFileInfo2Obj(srvObj.getDb(), snapshotDbm,
                                                  value)
                    if (tmpFileObj is None):
                        continue
                    complFilename = os.path.normpath(mtPt + "/" +\
                                                     tmpFileObj.getFilename())

                    # Is the file in the DB?
                    if (tmpSnapshotDbm.has_key(key)):
                        # Is the file on the disk?
                        if (not os.path.exists(complFilename)):
                            fileVer = tmpFileObj.getFileVersion()
                            tmpFileObj.setTag(complFilename)
                            fileKey = ngamsLib.genFileKey(
                                tmpFileObj.getDiskId(), tmpFileObj.getFileId(),
                                fileVer)
                            lostFileRefsDbm.add(fileKey, tmpFileObj)
                            lostFileRefsDbm.sync()
                    elif (not tmpSnapshotDbm.has_key(key)):
                        tmpFileObj = _encFileInfo2Obj(srvObj.getDb(),
                                                      snapshotDbm, value)
                        if (tmpFileObj is None):
                            continue

                        # Is the file on the disk?
                        if (os.path.exists(complFilename)):
                            # Add this entry in the NGAS DB.
                            tmpFileObj.write(srvObj.getHostId(),
                                             srvObj.getDb(), 0, 1)
                            tmpSnapshotDbm[key] = pickleValue
                        else:
                            # Remove this entry from the DB Snapshot.
                            msg = "Scheduling entry: %s in DB Snapshot " +\
                                  "for disk with ID: %s for removal"
                            logger.debug(msg, diskId, key)
                            # Add entry in the DB Snapshot Deletion DBM marking
                            # the entry for deletion.
                            if (_updateSnapshot(srvObj.getCfg())):
                                snapshotDelDbm.add(key, 1)

                        del tmpFileObj

                # Be friendly and sync the DB file every now and then
                count += 1
                if (count % 100) == 0:
                    if _updateSnapshot(srvObj.getCfg()):
                        snapshotDbm.sync()
                    checkStopJanitorThread(stopEvt)
                    tmpSnapshotDbm.sync()

                #################################################################################################
                #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
                #          when iterating at the end of the table that are prone to corrupt the hash table object
                #try:
                #    key, pickleValue = snapshotDbm.next()
                #except:
                #    key = None
                #    snapshotDbm.dbc = None
                #################################################################################################

            # Now, delete entries in the DB Snapshot if there are any scheduled for
            # deletion.

            #################################################################################################
            #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
            #          when iterating at the end of the table that are prone to corrupt the hash table object
            #snapshotDelDbm.initKeyPtr()
            #while (True):
            #    key, value = snapshotDelDbm.getNext()
            #    if (not key): break
            for key, value in snapshotDelDbm.iteritems():
                # jagonzal: We need to reformat the values and skip administrative elements #################
                if b'__' in key:
                    continue
                #############################################################################################
                msg = "Removing entry: %s from DB Snapshot for disk with ID: %s"
                logger.debug(msg, key, diskId)
                del snapshotDbm[key]
            #################################################################################################
            del snapshotDelDbm

            logger.debug("Looped over file entries in the DB Snapshot - %s",
                         diskId)
            # End-Loop: Check DB against DB Snapshot. ###########################
            if (_updateSnapshot(srvObj.getCfg())): snapshotDbm.sync()
            tmpSnapshotDbm.sync()

            logger.info(
                "Checked/created/updated DB Snapshot for disk with mount point: %s",
                mtPt)

            #####################################################################
            # Loop over the entries in the DB and compare these against the
            # DB Snapshot.
            #####################################################################
            logger.debug("Loop over the entries in the DB - %s ...", diskId)
            count = 0
            try:
                key, pickleValue = tmpSnapshotDbm.first()
            except:
                key = None
                tmpSnapshotDbm.dbc = None

            #################################################################################################
            #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
            #          when iterating at the end of the table that are prone to corrupt the hash table object
            #while (key):
            for key, pickleValue in tmpSnapshotDbm.iteritems():
                #################################################################################################
                value = cPickle.loads(pickleValue)

                # Check if it is an administrative element, if yes add it if needed
                if b"___" in key:
                    if (not snapshotDbm.has_key(key)):
                        snapshotDbm[key] = pickleValue
                else:
                    # Is the file in the DB Snapshot?
                    if (not snapshotDbm.has_key(key)):
                        tmpFileObj = _encFileInfo2Obj(srvObj.getDb(),
                                                      tmpSnapshotDbm, value)
                        if (tmpFileObj is None):
                            continue

                        # Is the file on the disk?
                        complFilename = os.path.normpath(mtPt + "/" +\
                                                         tmpFileObj.getFilename())
                        if (os.path.exists(complFilename)):
                            # Add this entry in the DB Snapshot.
                            if (_updateSnapshot(srvObj.getCfg())):
                                snapshotDbm[key] = pickleValue
                        else:
                            # Remove this entry from the DB (if it is there).
                            _delFileEntry(srvObj.getHostId(), srvObj.getDb(),
                                          tmpFileObj)
                        del tmpFileObj
                    else:
                        # We always update the DB Snapshot to ensure it is
                        # in-sync with the DB entry.
                        if (_updateSnapshot(srvObj.getCfg())):
                            snapshotDbm[key] = pickleValue

                # Be friendly and sync the DB file every now and then
                count += 1
                if (count % 100) == 0:
                    if _updateSnapshot(srvObj.getCfg()):
                        snapshotDbm.sync()
                    checkStopJanitorThread(stopEvt)

                #################################################################################################
                #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
                #          when iterating at the end of the table that are prone to corrupt the hash table object
                #try:
                #    key, pickleValue = tmpSnapshotDbm.next()
                #except:
                #    key = None
                #################################################################################################
            logger.debug("Checked DB Snapshot against DB - %s", diskId)
            # End-Loop: Check DB Snapshot against DB. ###########################
            if (_updateSnapshot(srvObj.getCfg())):
                snapshotDbm.sync()

        finally:
            if snapshotDbm:
                snapshotDbm.close()

            if tmpSnapshotDbm:
                tmpSnapshotDbm.close()

    # Check if lost files found.
    logger.debug("Check if there are Lost Files ...")
    noOfLostFiles = lostFileRefsDbm.getCount()
    if (noOfLostFiles):
        statRep = os.path.normpath(tmpDir + "/" + ngasId +\
                                   "_LOST_FILES_NOTIF_EMAIL.txt")
        fo = open(statRep, "w")
        timeStamp = toiso8601()
        tmpFormat = "JANITOR THREAD - LOST FILES DETECTED:\n\n" +\
                    "==Summary:\n\n" +\
                    "Date:                       %s\n" +\
                    "NGAS Host ID:               %s\n" +\
                    "Lost Files:                 %d\n\n" +\
                    "==File List:\n\n"
        fo.write(tmpFormat % (timeStamp, srvObj.getHostId(), noOfLostFiles))

        tmpFormat = "%-32s %-32s %-12s %-80s\n"
        fo.write(tmpFormat %
                 ("Disk ID", "File ID", "File Version", "Expected Path"))
        fo.write(tmpFormat % (32 * "-", 32 * "-", 12 * "-", 80 * "-"))

        # Loop over the files an generate the report.
        lostFileRefsDbm.initKeyPtr()
        while (1):
            key, fileInfoObj = lostFileRefsDbm.getNext()
            if (not key): break
            diskId = fileInfoObj.getDiskId()
            fileId = fileInfoObj.getFileId()
            fileVersion = fileInfoObj.getFileVersion()
            filename = fileInfoObj.getTag()
            fo.write(tmpFormat % (diskId, fileId, fileVersion, filename))
        fo.write("\n\n==END\n")
        fo.close()
        ngamsNotification.notify(srvObj.getHostId(), srvObj.getCfg(),
                                 NGAMS_NOTIF_DATA_CHECK,
                                 "LOST FILE(S) DETECTED", statRep, [], 1,
                                 NGAMS_TEXT_MT, "JANITOR_THREAD_LOST_FILES", 1)
        rmFile(statRep)
    logger.debug("Number of lost files found: %d", noOfLostFiles)

    # Clean up.
    del lostFileRefsDbm
    rmFile(lostFileRefsDbmName + "*")
Exemple #5
0
def _dumpFileInfo(srvObj, disks_to_check, tmpFilePat, stopEvt):
    """
    Function that dumps the information about the files. One DBM is created
    per disk. This is named:

       <Mount Root Point>/cache/DATA-CHECK-THREAD_QUEUE_<Disk ID>.bsddb

    If problems are found for a file, these are stored in DBM files named:

       <Mount Root Point>/cache/DATA-CHECK-THREAD_ERRORS_<Disk ID>.bsddb

    The function handles the DBM files in the following way:

       1. Check for each DBM file found, if this disk is still in the system.
          If not, the Queue and Error DBM files are removed.

       2. Go through the list of disks in the system. If they don't have
          the two DBM files listed above, these are initialized. The file
          information for all the files stored on the disk is dumped into
          the Queue DBM file. Only files marked to be ignored are not dumped.

       3. Finally, build up a DBM with references to all files found
          on this system

    srvObj:       Reference to server object (ngamsServer).

    tmpFilePat:   Pattern for temporary files (string).

    Returns:      Void.
    """
    T = TRACE()

    cacheDir = os.path.join(srvObj.getCfg().getRootDirectory(),
                            NGAMS_CACHE_DIR)
    checkCreatePath(os.path.normpath(cacheDir))

    ###########################################################################
    # Loop over the Queue/Error DBM files found, check if the disk is
    # still in the system/scheduled for checking.
    ###########################################################################
    logger.debug("Loop over/check existing Queue/Error DBM Files ...")
    dbmFileList = glob.glob(cacheDir + "/" + NGAMS_DATA_CHECK_THR +\
                            "_QUEUE_*.bsddb")
    dbmObjDic = {}
    for dbmFile in dbmFileList:
        _stopDataCheckThr(stopEvt)
        diskId = dbmFile.split("_")[-1].split(".")[0]
        if diskId not in disks_to_check:
            filePat = "%s/%s*%s.bsddb" % (cacheDir, NGAMS_DATA_CHECK_THR,
                                          diskId)
            rmFile(filePat)
        else:
            # Add references to Queue/Error DBM.
            queueDbmFile = "%s/%s_QUEUE_%s.bsddb" %\
                           (cacheDir, NGAMS_DATA_CHECK_THR, diskId)
            queueDbm = ngamsDbm.ngamsDbm(queueDbmFile, 0, 1)
            errorDbmFile = "%s/%s_ERRORS_%s.bsddb" %\
                           (cacheDir, NGAMS_DATA_CHECK_THR, diskId)
            errorDbm = ngamsDbm.ngamsDbm(errorDbmFile, 0, 1)
            dbmObjDic[diskId] = (queueDbm, errorDbm)
    logger.debug("Looped over/checked existing Queue/Error DBM Files")
    ###########################################################################

    ###########################################################################
    # Loop over the disks mounted in this system and check if they have a
    # Queue/Error DBM file. In case the DBM files are not available, create
    # these.
    ###########################################################################
    logger.debug("Create DBM files for disks to be checked ...")
    startDbFileRd = time.time()
    for diskId in disks_to_check.keys():
        _stopDataCheckThr(stopEvt)

        if diskId in dbmObjDic:
            continue

        # The disk is ripe for checking but still has no Queue/Error DBM
        # DBs allocated.
        queueDbmFile = "%s/%s_QUEUE_%s.bsddb" %\
                       (cacheDir, NGAMS_DATA_CHECK_THR, diskId)
        tmpQueueDbmFile = tmpFilePat + "_" + os.path.basename(queueDbmFile)
        queueDbm = ngamsDbm.ngamsDbm(tmpQueueDbmFile, 0, 1)

        # Now, retrieve the files on the given disk, and store the info
        # in the Queue DBM file.
        files = srvObj.getDb().getFileSummary1(diskIds=[diskId],
                                               ignore=0,
                                               fileStatus=[],
                                               lowLimIngestDate=None,
                                               order=0)
        for fileInfo in files:
            fileId = fileInfo[ngamsDbCore.SUM1_FILE_ID]
            fileVer = fileInfo[ngamsDbCore.SUM1_VERSION]
            fileKey = ngamsLib.genFileKey(None, fileId, fileVer)
            queueDbm.add(fileKey, fileInfo)
        queueDbm.sync()

        # Rename DCC Queue DBM from the temporary to the final name.
        mvFile(tmpQueueDbmFile, queueDbmFile)
        queueDbm = ngamsDbm.ngamsDbm(queueDbmFile, 0, 1)

        # Create Error DBM + add these in the DBM Dictionary for the disk.
        errorDbmFile = "%s/%s_ERRORS_%s.bsddb" %\
                       (cacheDir, NGAMS_DATA_CHECK_THR, diskId)
        errorDbm = ngamsDbm.ngamsDbm(errorDbmFile, 0, 1)
        dbmObjDic[diskId] = (queueDbm, errorDbm)

        _stopDataCheckThr(stopEvt)
    logger.debug("Queried info for files to be checked from DB. Time: %.3fs",
                 time.time() - startDbFileRd)
    logger.debug("Checked that disks scheduled for checking have DBM files")
    ###########################################################################

    # These are all files recursively found on the disks
    # Later on we check whether they are registered or not, and check them (or not)
    start = time.time()
    files_on_disk = collect_files_on_disk(stopEvt, disks_to_check)
    end = time.time()
    logger.debug("Collected references to %d files on disks in %.3f [s]",
                 len(files_on_disk), end - start)

    # Don't take these into account
    logger.debug("Retrieving information about files to be ignored ...")
    files = srvObj.getDb().getFileSummarySpuriousFiles1(srvObj.getHostId(),
                                                        fetch_size=1000)
    for fileInfo in files:
        if (fileInfo[ngamsDbCore.SUM1_FILE_IGNORE]):
            filename = os.path.\
                       normpath(fileInfo[ngamsDbCore.SUM1_MT_PT] + "/" +\
                                fileInfo[ngamsDbCore.SUM1_FILENAME])
            if filename in files_on_disk:
                del files_on_disk[filename]
    logger.debug("Retrieved information about files to be ignored")
    ###########################################################################

    ###########################################################################
    # Initialize the statistics parameters for the checking.
    ###########################################################################
    logger.debug("Initialize the statistics for the checking cycle ...")
    amountMb = 0.0
    noOfFiles = 0
    for diskId in disks_to_check.keys():
        queueDbm = dbmObjDic[diskId][0]
        #################################################################################################
        #jagonzal: Replace looping aproach to avoid exceptions coming from the next() method underneath
        #          when iterating at the end of the table that are prone to corrupt the hash table object
        #queueDbm.initKeyPtr()
        #while (1):
        #    fileKey, fileInfo = queueDbm.getNext()
        #    if (not fileKey): break
        for fileKey, dbVal in queueDbm.iteritems():
            # jagonzal: We need to reformat the values and skip administrative elements #################
            if (str(fileKey).find("__") != -1): continue
            fileInfo = cPickle.loads(dbVal)
            #############################################################################################
            noOfFiles += 1
            amountMb += float(fileInfo[ngamsDbCore.SUM1_FILE_SIZE]) / 1048576.0
        #################################################################################################

    stats = _initFileCheckStatus(srvObj, amountMb, noOfFiles)
    ###########################################################################

    return files_on_disk, dbmObjDic, stats
Exemple #6
0
def _updateFileCheckStatus(srvObj,
                           fileSize,
                           diskId,
                           fileId,
                           fileVersion,
                           report,
                           stats,
                           dbmObjDic,
                           force=0):
    """
    Update the status of the DCC.

    srvObj:       Reference to instance of ngamsServer object (ngamsServer).

    fileSize:     Size of file (in bytes) that was checked (integer)

    diskId:       ID of disk hosting file checked (string).

    fileId:       ID of file concerned (string).

    fileVersion:  Version of file concered (integer).

    report:       List containing the result of the file checking.
                  Refer to documentation for ngamsFileUtils.checkFile()
                  for futher information (list).

    force:        If set to 1 a DB update will be forced (integer/0|1).

    Returns:      Void.
    """

    now = time.time()
    with stats.lock:

        # Calculate the new values.
        if (fileId):
            stats.mbs_checked += float(fileSize) / 1048576.0
            stats.files_checked += 1

        checkTime = now - stats.time_start
        stats.check_rate = stats.mbs_checked / checkTime
        if stats.check_rate > 0:
            stats.remainding_time = (stats.mbs -
                                     stats.mbs_checked) / stats.check_rate
            statEstimTime = stats.mbs / stats.check_rate
        else:
            stats.remainding_time = 0
            statEstimTime = 0

        # Update DB only every 10s.
        if force or now - stats.last_db_update >= 10:
            srvObj.getDb().updateDataCheckStat(
                srvObj.getHostId(), stats.time_start, stats.time_remaining,
                statEstimTime, stats.check_rate, stats.mbs, stats.mbs_checked,
                stats.files, stats.files_checked)
            stats.last_db_update = now

        # Update report if an error was found.
        if (diskId and report):
            fileKey = ngamsLib.genFileKey(None, fileId, fileVersion)
            dbmObjDic[diskId][1].add(fileKey, report)

        statFormat = "DCC Status: Time Remaining (s): %d, " +\
                     "Rate (MB/s): %.3f, " +\
                     "Volume/Checked (MB): %.3f/%.3f, Files/Checked: %d/%d"
        logger.debug(statFormat, stats.time_remaining, stats.check_rate,
                     stats.mbs_checked, stats.mbs, stats.files,
                     stats.files_checked)
Exemple #7
0
def check_copies(disk_id, notification_email):
    """
    Check the total number of copies of the files found on the referred disk
    in the NGAS DB. Generate a report indicating the number of copies found.

    :param disk_id: ID of disk for which to check files (string)
    :param notification_email: Comma separated list of recipients of the report generated (string)
    """
    interface, server, db, user, password = ngasUtilsLib.get_db_parameters()
    password = base64.b64decode(password)
    params = {
        "dsn": db,
        "user": user,
        "password": password,
        "threaded": True
    }
    connection = ngamsDb.ngamsDb(interface, params)

    # Get the information about the files on the referenced disk
    logger.info("Retrieving info about files on disk: %s ...", disk_id)
    disk_file_generator = connection.getFileSummary1(diskIds=[disk_id])

    # Get all files found in the system with the given File ID/File Version
    logger.info("Retrieving info about all File IDs/Versions in the system on disk: %s ...", disk_id)
    disk_file_list = []
    disk_count = 0
    for file_info in disk_file_generator:
        file_id = file_info[ngamsDbCore.SUM1_FILE_ID]
        disk_file_list.append(file_id)
        disk_count += 1
    print("")
    logger.info("Retrieved info about %d files on disk: %s", disk_count, disk_id)

    logger.info("Retrieving info about all File IDs/Versions in the system on disk: %s ...", disk_id)
    glob_file_list = []
    file_count = 0
    glob_file_generator = connection.getFileSummary1(fileIds=disk_file_list)
    for file_info in glob_file_generator:
        glob_file_list.append(file_info)
        file_count += 1
    print("")
    logger.info("Retrieved info about %d File IDs/Versions in the system on disk: %s", file_count, disk_id)


    # Now, go through this list, and generate a dictionary with File ID/File Version as keys
    glob_file_dict = {}
    for file_info in glob_file_list:
        file_key = ngamsLib.genFileKey(None, file_info[ngamsDbCore.SUM1_FILE_ID], file_info[ngamsDbCore.SUM1_VERSION])
        if file_key not in glob_file_dict:
            glob_file_dict[file_key] = []
        glob_file_dict[file_key].append(file_info)

    # Order the list according to (1) Number of copies and (2) Alphabetically
    file_key_list = glob_file_dict.keys()
    file_key_list.sort()
    sort_file_dict = {}
    for file_key in file_key_list:
        file_info_list = glob_file_dict[file_key]
        num_copies = len(file_info_list)
        if num_copies not in sort_file_dict:
            sort_file_dict[num_copies] = {}
        sort_file_dict[num_copies][file_key] = file_info_list

    # Go through the global file dictionary and check each File ID and File Version the requested information
    report = "FILE COPIES CHECK REPORT:\n\n"
    report += "Disk ID: " + disk_id + "\n\n"
    message_format = "{:60.60s} {:7.7s} {:5.5s} {:4.4s}\n"
    report += message_format.format("File ID", "Version", "Total", "Good")
    report += 50 * "-" + "\n"
    no_file_key_list = sort_file_dict.keys()
    no_file_key_list.sort()
    for no_file_key in no_file_key_list:
        no_file_key_dict = sort_file_dict[no_file_key]
        file_key_list = no_file_key_dict.keys()
        file_key_list.sort()
        for file_key in file_key_list:
            total_copies = 0
            good_copies = 0
            for file_info in no_file_key_dict[file_key]:
                total_copies += 1
                if file_info[ngamsDbCore.SUM1_FILE_STATUS][0] == "0" and file_info[ngamsDbCore.SUM1_FILE_IGNORE] == 0:
                    good_copies += 1
            file_id = file_info[ngamsDbCore.SUM1_FILE_ID]
            file_version = file_info[ngamsDbCore.SUM1_VERSION]
            report += message_format.format(file_id, str(file_version), str(total_copies), str(good_copies))

    if len(no_file_key_list):
        report += 50 * "-" + "\n\n"
    else:
        report += "No files found on the given disk!\n\n"
    print("\n" + report)

    if notification_email:
        ngasUtilsLib.send_email("ngasCheckFileCopies: FILE COPIES CHECK REPORT (%s)".format(disk_id),
                                notification_email, report)