Esempio n. 1
0
def putFile(req, file):
    if not Protocol.isSecure(req):
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    _logger.debug("putFile : start %s %s" %
                  (req.subprocess_env['SSL_CLIENT_S_DN'], file.filename))
    # size check
    fullSizeLimit = 768 * 1024 * 1024
    if not file.filename.startswith('sources.'):
        noBuild = True
        sizeLimit = 100 * 1024 * 1024
    else:
        noBuild = False
        sizeLimit = fullSizeLimit
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            _logger.error("cannot get CL : %s" %
                          req.headers_in["content-length"])
        else:
            _logger.error("no CL")
    _logger.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (
            contentLength, sizeLimit)
        if noBuild:
            errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit"
        else:
            errStr += " Please remove redundant files from your workarea"
        _logger.error(errStr)
        _logger.debug("putFile : end")
        return errStr
    try:
        fileFullPath = '%s/%s' % (panda_config.cache_dir,
                                  file.filename.split('/')[-1])
        # avoid overwriting
        if os.path.exists(fileFullPath):
            # touch
            os.utime(fileFullPath, None)
            # send error message
            errStr = "ERROR : Cannot overwrite file"
            _logger.debug('putFile : cannot overwrite file %s' % file.filename)
            _logger.debug("putFile : end")
            return errStr
        # write
        fo = open(fileFullPath, 'wb')
        fileContent = file.file.read()
        fo.write(fileContent)
        fo.close()
    except Exception:
        errStr = "ERROR : Cannot write file"
        _logger.error(errStr)
        _logger.debug("putFile : end")
        return errStr
    # checksum
    try:
        # decode Footer
        footer = fileContent[-8:]
        checkSum, isize = struct.unpack("II", footer)
        _logger.debug("CRC from gzip Footer %s" % checkSum)
    except Exception:
        # calculate on the fly
        """
        import zlib
        checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF
        """
        # use None to avoid delay for now
        checkSum = None
        _logger.debug("CRC calculated %s" % checkSum)
    # file size
    fileSize = len(fileContent)
    # user name
    username = cleanUserID(req.subprocess_env['SSL_CLIENT_S_DN'])
    _logger.debug("putFile : written dn=%s file=%s size=%s crc=%s" % \
                  (username,file.filename,fileSize,checkSum))
    # put file info to DB
    statClient, outClient = Client.insertSandboxFileInfo(
        username, file.filename, fileSize, checkSum)
    if statClient != 0 or outClient.startswith("ERROR"):
        _logger.error("putFile : failed to put sandbox to DB with %s %s" %
                      (statClient, outClient))
        #_logger.debug("putFile : end")
        #return "ERROR : Cannot insert sandbox to DB"
    else:
        _logger.debug("putFile : inserted sandbox to DB with %s" % outClient)
    # store to cassandra
    if hasattr(panda_config,
               'cacheUseCassandra') and panda_config.cacheUseCassandra == True:
        try:
            # time-stamp
            timeNow = datetime.datetime.utcnow()
            creationTime = timeNow.strftime('%Y-%m-%d %H:%M:%S')
            # user name
            username = req.subprocess_env['SSL_CLIENT_S_DN']
            username = username.replace('/CN=proxy', '')
            username = username.replace('/CN=limited proxy', '')
            # file size
            fileSize = len(fileContent)
            # key
            fileKeyName = file.filename.split('/')[-1]
            sizeCheckSum = '%s:%s' % (fileSize, checkSum)
            # insert to cassandra
            import pycassa
            pool = pycassa.ConnectionPool(panda_config.cacheKeySpace)
            filefamily = pycassa.ColumnFamily(pool,
                                              panda_config.cacheFileTable)
            # avoid overwriting
            gotoNextCassa = True
            if filefamily.get_count(fileKeyName) > 0:
                # touch
                touchFlag = touchFileCassa(filefamily, fileKeyName, timeNow)
                if touchFlag:
                    gotoNextCassa = False
                    # send error message
                    errStr = "ERROR : Cannot overwrite file in Cassandra"
                    _logger.error(errStr)
                    if not panda_config.cacheIgnoreCassandraError:
                        _logger.debug("putFile : end")
                        return errStr
            # check uniqueness with size and checksum
            if gotoNextCassa:
                try:
                    uniqExp = pycassa.index.create_index_expression(
                        'uniqID', sizeCheckSum)
                    userExp = pycassa.index.create_index_expression(
                        'user', username)
                    tmpClause = pycassa.index.create_index_clause(
                        [uniqExp, userExp])
                    tmpResults = filefamily.get_indexed_slices(
                        tmpClause, columns=['creationTime'])
                    for oldFileKeyName, tmpDict in tmpResults:
                        _logger.debug('The same size and chksum %s found in old:%s and new:%s' % \
                                      (sizeCheckSum,oldFileKeyName,fileKeyName))
                        # touch
                        touchFlag = touchFileCassa(filefamily, oldFileKeyName,
                                                   timeNow)
                        if touchFlag:
                            # make alias
                            _logger.debug('Making alias %s->%s' %
                                          (fileKeyName, oldFileKeyName))
                            insertWithRetryCassa(
                                filefamily, fileKeyName, {
                                    'alias': oldFileKeyName,
                                    'creationTime': creationTime,
                                    'nSplit': 0,
                                },
                                'putFile : make alias for %s' % file.filename)
                            # set time
                            touchFileCassa(filefamily, fileKeyName, timeNow)
                            _logger.debug("putFile : end")
                            return True
                except Exception:
                    gotoNextCassa = False
                    errType, errValue = sys.exc_info()[:2]
                    errStr = "cannot make alias for %s due to %s %s" % (
                        fileKeyName, errType, errValue)
                    _logger.error(errStr)
                    if not panda_config.cacheIgnoreCassandraError:
                        _logger.debug("putFile : end")
                        return errStr
            # insert new record
            if gotoNextCassa:
                splitIdx = 0
                splitSize = 5 * 1024 * 1024
                nSplit, tmpMod = divmod(len(fileContent), splitSize)
                if tmpMod != 0:
                    nSplit += 1
                _logger.debug('Inserting %s with %s blocks' %
                              (fileKeyName, nSplit))
                for splitIdx in range(nSplit):
                    # split to small chunks since cassandra is not good at large files
                    tmpFileContent = fileContent[splitSize *
                                                 splitIdx:splitSize *
                                                 (splitIdx + 1)]
                    tmpFileKeyName = fileKeyName
                    tmpAttMap = {
                        'file': tmpFileContent,
                        'user': username,
                        'creationTime': creationTime,
                    }
                    if splitIdx == 0:
                        tmpAttMap['size'] = fileSize
                        tmpAttMap['nSplit'] = nSplit
                        tmpAttMap['uniqID'] = sizeCheckSum
                        tmpAttMap['checkSum'] = str(checkSum)
                    else:
                        tmpFileKeyName += '_%s' % splitIdx
                        tmpAttMap['size'] = 0
                        tmpAttMap['nSplit'] = 0
                    # insert with retry
                    insertWithRetryCassa(filefamily, tmpFileKeyName, tmpAttMap,
                                         'putFile : insert %s' % file.filename)
                # set time
                touchFileCassa(filefamily, fileKeyName, timeNow)
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            errStr = "cannot put %s into Cassandra due to %s %s" % (
                fileKeyName, errType, errValue)
            _logger.error(errStr)
            # send error message
            errStr = "ERROR : " + errStr
            if not panda_config.cacheIgnoreCassandraError:
                _logger.debug("putFile : end")
                return errStr
    _logger.debug("putFile : %s end" % file.filename)
    return True
Esempio n. 2
0
def putFile(req, file):
    tmpLog = LogWrapper(_logger, 'putFile-{}'.format(datetime.datetime.utcnow().isoformat('/')))
    if not Protocol.isSecure(req):
        tmpLog.error('No SSL_CLIENT_S_DN')
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    # user name
    username = CoreUtils.clean_user_id(req.subprocess_env['SSL_CLIENT_S_DN'])
    tmpLog.debug("start %s %s" % (username, file.filename))
    # size check
    fullSizeLimit = 768*1024*1024
    if not file.filename.startswith('sources.'):
        noBuild = True
        sizeLimit = 100*1024*1024
    else:
        noBuild = False
        sizeLimit = fullSizeLimit
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"])
        else:
            tmpLog.error("no CL")
    tmpLog.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (contentLength,sizeLimit)
        if noBuild:
            errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit"
        else:
            errStr += " Please remove redundant files from your workarea"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    try:
        fileName = file.filename.split('/')[-1]
        fileFullPath = '%s/%s' % (panda_config.cache_dir, fileName)

        # avoid overwriting
        if os.path.exists(fileFullPath):
            # touch
            os.utime(fileFullPath,None)
            # send error message
            errStr = "ERROR : Cannot overwrite file"
            tmpLog.debug('cannot overwrite file %s' % fileName)
            tmpLog.debug("end")
            return errStr
        # write
        fo = open(fileFullPath,'wb')
        fileContent = file.file.read()
        if hasattr(panda_config, 'compress_file_names') and \
                [True for patt in panda_config.compress_file_names.split(',') if re.search(patt, fileName) is not None]:
            fileContent = gzip.compress(fileContent)
        fo.write(fileContent)
        fo.close()
    except Exception:
        errStr = "ERROR : Cannot write file"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    # checksum
    try:
        # decode Footer
        footer = fileContent[-8:]
        checkSum,isize = struct.unpack("II",footer)
        tmpLog.debug("CRC from gzip Footer %s" % checkSum)
    except Exception:
        # calculate on the fly
        """
        import zlib
        checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF
        """
        # use None to avoid delay for now
        checkSum = None
        tmpLog.debug("CRC calculated %s" % checkSum)
    # file size
    fileSize = len(fileContent)
    tmpLog.debug("written dn=%s file=%s size=%s crc=%s" % \
                  (username, fileFullPath, fileSize, checkSum))
    # put file info to DB
    if panda_config.record_sandbox_info:
        to_insert = True
        for patt in IGNORED_SUFFIX:
            if file.filename.endswith(patt):
                to_insert = False
                break
        if not to_insert:
            tmpLog.debug("skipped to insert to DB")
        else:
            statClient,outClient = Client.insertSandboxFileInfo(username,file.filename,
                                                                fileSize,checkSum)
            if statClient != 0 or outClient.startswith("ERROR"):
                tmpLog.error("failed to put sandbox to DB with %s %s" % (statClient,outClient))
                #_logger.debug("putFile : end")
                #return "ERROR : Cannot insert sandbox to DB"
            else:
                tmpLog.debug("inserted sandbox to DB with %s" % outClient)
    tmpLog.debug("end")
    return True