def putFile(req, file): if not Protocol.isSecure(req): return False if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']: return False _logger.debug("putFile : start %s %s" % (req.subprocess_env['SSL_CLIENT_S_DN'], file.filename)) # size check fullSizeLimit = 768 * 1024 * 1024 if not file.filename.startswith('sources.'): noBuild = True sizeLimit = 100 * 1024 * 1024 else: noBuild = False sizeLimit = fullSizeLimit # get file size contentLength = 0 try: contentLength = long(req.headers_in["content-length"]) except Exception: if "content-length" in req.headers_in: _logger.error("cannot get CL : %s" % req.headers_in["content-length"]) else: _logger.error("no CL") _logger.debug("size %s" % contentLength) if contentLength > sizeLimit: errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % ( contentLength, sizeLimit) if noBuild: errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit" else: errStr += " Please remove redundant files from your workarea" _logger.error(errStr) _logger.debug("putFile : end") return errStr try: fileFullPath = '%s/%s' % (panda_config.cache_dir, file.filename.split('/')[-1]) # avoid overwriting if os.path.exists(fileFullPath): # touch os.utime(fileFullPath, None) # send error message errStr = "ERROR : Cannot overwrite file" _logger.debug('putFile : cannot overwrite file %s' % file.filename) _logger.debug("putFile : end") return errStr # write fo = open(fileFullPath, 'wb') fileContent = file.file.read() fo.write(fileContent) fo.close() except Exception: errStr = "ERROR : Cannot write file" _logger.error(errStr) _logger.debug("putFile : end") return errStr # checksum try: # decode Footer footer = fileContent[-8:] checkSum, isize = struct.unpack("II", footer) _logger.debug("CRC from gzip Footer %s" % checkSum) except Exception: # calculate on the fly """ import zlib checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF """ # use None to avoid delay for now checkSum = None _logger.debug("CRC calculated %s" % checkSum) # file size fileSize = len(fileContent) # user name username = cleanUserID(req.subprocess_env['SSL_CLIENT_S_DN']) _logger.debug("putFile : written dn=%s file=%s size=%s crc=%s" % \ (username,file.filename,fileSize,checkSum)) # put file info to DB statClient, outClient = Client.insertSandboxFileInfo( username, file.filename, fileSize, checkSum) if statClient != 0 or outClient.startswith("ERROR"): _logger.error("putFile : failed to put sandbox to DB with %s %s" % (statClient, outClient)) #_logger.debug("putFile : end") #return "ERROR : Cannot insert sandbox to DB" else: _logger.debug("putFile : inserted sandbox to DB with %s" % outClient) # store to cassandra if hasattr(panda_config, 'cacheUseCassandra') and panda_config.cacheUseCassandra == True: try: # time-stamp timeNow = datetime.datetime.utcnow() creationTime = timeNow.strftime('%Y-%m-%d %H:%M:%S') # user name username = req.subprocess_env['SSL_CLIENT_S_DN'] username = username.replace('/CN=proxy', '') username = username.replace('/CN=limited proxy', '') # file size fileSize = len(fileContent) # key fileKeyName = file.filename.split('/')[-1] sizeCheckSum = '%s:%s' % (fileSize, checkSum) # insert to cassandra import pycassa pool = pycassa.ConnectionPool(panda_config.cacheKeySpace) filefamily = pycassa.ColumnFamily(pool, panda_config.cacheFileTable) # avoid overwriting gotoNextCassa = True if filefamily.get_count(fileKeyName) > 0: # touch touchFlag = touchFileCassa(filefamily, fileKeyName, timeNow) if touchFlag: gotoNextCassa = False # send error message errStr = "ERROR : Cannot overwrite file in Cassandra" _logger.error(errStr) if not panda_config.cacheIgnoreCassandraError: _logger.debug("putFile : end") return errStr # check uniqueness with size and checksum if gotoNextCassa: try: uniqExp = pycassa.index.create_index_expression( 'uniqID', sizeCheckSum) userExp = pycassa.index.create_index_expression( 'user', username) tmpClause = pycassa.index.create_index_clause( [uniqExp, userExp]) tmpResults = filefamily.get_indexed_slices( tmpClause, columns=['creationTime']) for oldFileKeyName, tmpDict in tmpResults: _logger.debug('The same size and chksum %s found in old:%s and new:%s' % \ (sizeCheckSum,oldFileKeyName,fileKeyName)) # touch touchFlag = touchFileCassa(filefamily, oldFileKeyName, timeNow) if touchFlag: # make alias _logger.debug('Making alias %s->%s' % (fileKeyName, oldFileKeyName)) insertWithRetryCassa( filefamily, fileKeyName, { 'alias': oldFileKeyName, 'creationTime': creationTime, 'nSplit': 0, }, 'putFile : make alias for %s' % file.filename) # set time touchFileCassa(filefamily, fileKeyName, timeNow) _logger.debug("putFile : end") return True except Exception: gotoNextCassa = False errType, errValue = sys.exc_info()[:2] errStr = "cannot make alias for %s due to %s %s" % ( fileKeyName, errType, errValue) _logger.error(errStr) if not panda_config.cacheIgnoreCassandraError: _logger.debug("putFile : end") return errStr # insert new record if gotoNextCassa: splitIdx = 0 splitSize = 5 * 1024 * 1024 nSplit, tmpMod = divmod(len(fileContent), splitSize) if tmpMod != 0: nSplit += 1 _logger.debug('Inserting %s with %s blocks' % (fileKeyName, nSplit)) for splitIdx in range(nSplit): # split to small chunks since cassandra is not good at large files tmpFileContent = fileContent[splitSize * splitIdx:splitSize * (splitIdx + 1)] tmpFileKeyName = fileKeyName tmpAttMap = { 'file': tmpFileContent, 'user': username, 'creationTime': creationTime, } if splitIdx == 0: tmpAttMap['size'] = fileSize tmpAttMap['nSplit'] = nSplit tmpAttMap['uniqID'] = sizeCheckSum tmpAttMap['checkSum'] = str(checkSum) else: tmpFileKeyName += '_%s' % splitIdx tmpAttMap['size'] = 0 tmpAttMap['nSplit'] = 0 # insert with retry insertWithRetryCassa(filefamily, tmpFileKeyName, tmpAttMap, 'putFile : insert %s' % file.filename) # set time touchFileCassa(filefamily, fileKeyName, timeNow) except Exception: errType, errValue = sys.exc_info()[:2] errStr = "cannot put %s into Cassandra due to %s %s" % ( fileKeyName, errType, errValue) _logger.error(errStr) # send error message errStr = "ERROR : " + errStr if not panda_config.cacheIgnoreCassandraError: _logger.debug("putFile : end") return errStr _logger.debug("putFile : %s end" % file.filename) return True
def putFile(req, file): tmpLog = LogWrapper(_logger, 'putFile-{}'.format(datetime.datetime.utcnow().isoformat('/'))) if not Protocol.isSecure(req): tmpLog.error('No SSL_CLIENT_S_DN') return False if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']: return False # user name username = CoreUtils.clean_user_id(req.subprocess_env['SSL_CLIENT_S_DN']) tmpLog.debug("start %s %s" % (username, file.filename)) # size check fullSizeLimit = 768*1024*1024 if not file.filename.startswith('sources.'): noBuild = True sizeLimit = 100*1024*1024 else: noBuild = False sizeLimit = fullSizeLimit # get file size contentLength = 0 try: contentLength = long(req.headers_in["content-length"]) except Exception: if "content-length" in req.headers_in: tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"]) else: tmpLog.error("no CL") tmpLog.debug("size %s" % contentLength) if contentLength > sizeLimit: errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (contentLength,sizeLimit) if noBuild: errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit" else: errStr += " Please remove redundant files from your workarea" tmpLog.error(errStr) tmpLog.debug("end") return errStr try: fileName = file.filename.split('/')[-1] fileFullPath = '%s/%s' % (panda_config.cache_dir, fileName) # avoid overwriting if os.path.exists(fileFullPath): # touch os.utime(fileFullPath,None) # send error message errStr = "ERROR : Cannot overwrite file" tmpLog.debug('cannot overwrite file %s' % fileName) tmpLog.debug("end") return errStr # write fo = open(fileFullPath,'wb') fileContent = file.file.read() if hasattr(panda_config, 'compress_file_names') and \ [True for patt in panda_config.compress_file_names.split(',') if re.search(patt, fileName) is not None]: fileContent = gzip.compress(fileContent) fo.write(fileContent) fo.close() except Exception: errStr = "ERROR : Cannot write file" tmpLog.error(errStr) tmpLog.debug("end") return errStr # checksum try: # decode Footer footer = fileContent[-8:] checkSum,isize = struct.unpack("II",footer) tmpLog.debug("CRC from gzip Footer %s" % checkSum) except Exception: # calculate on the fly """ import zlib checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF """ # use None to avoid delay for now checkSum = None tmpLog.debug("CRC calculated %s" % checkSum) # file size fileSize = len(fileContent) tmpLog.debug("written dn=%s file=%s size=%s crc=%s" % \ (username, fileFullPath, fileSize, checkSum)) # put file info to DB if panda_config.record_sandbox_info: to_insert = True for patt in IGNORED_SUFFIX: if file.filename.endswith(patt): to_insert = False break if not to_insert: tmpLog.debug("skipped to insert to DB") else: statClient,outClient = Client.insertSandboxFileInfo(username,file.filename, fileSize,checkSum) if statClient != 0 or outClient.startswith("ERROR"): tmpLog.error("failed to put sandbox to DB with %s %s" % (statClient,outClient)) #_logger.debug("putFile : end") #return "ERROR : Cannot insert sandbox to DB" else: tmpLog.debug("inserted sandbox to DB with %s" % outClient) tmpLog.debug("end") return True