def checkFile(fileHash, fileContent, backend): '''Checks that a tar file and its contents are correct. Called from the dropBox to check a file after it was received correctly. The received file is guaranteed to be already checksummed. ''' logging.debug('check::checkFile(%s, %s [len], %s)', fileHash, len(fileContent), backend) logging.info( 'checkFile(): %s: Checking whether the file is a valid tar file...', fileHash) fileObject = cStringIO.StringIO() fileObject.write(fileContent) fileObject.seek(0) try: tarFile = tarfile.open(fileHash, 'r:bz2', fileobj=fileObject) except tarfile.TarError as e: raise dropBox.DropBoxError('The file is not a valid tar file.') with tempfile.NamedTemporaryFile() as temporaryDataFile: try: logging.info( 'checkFile(): %s: Checking whether the tar file contains the and only the expected file names...', fileHash) if tarFile.getnames() != [dataFilename, metadataFilename]: raise dropBox.DropBoxError( 'The file tar file does not contain the and only the expected file names.' ) logging.info( 'checkFile(): %s: Checking whether each file has the expected attributes...', fileHash) for tarInfo in tarFile.getmembers(): if tarInfo.mode != 0400 \ or tarInfo.uid != 0 \ or tarInfo.gid != 0 \ or tarInfo.mtime != 0 \ or tarInfo.uname != 'root' \ or tarInfo.gname != 'root': raise dropBox.DropBoxError( 'The file %s has unexpected attributes.' % tarInfo.name) logging.info('checkFile(): %s: Extracting metadata in memory...', fileHash) metadata = tarFile.extractfile(metadataFilename).read() logging.info('checkFile(): %s: Extracting data in file %s...', fileHash, temporaryDataFile.name) temporaryDataFile.write(tarFile.extractfile(dataFilename).read()) temporaryDataFile.flush() # We do not need os.fsync(temporaryDataFile.fileno()) since # we do not care if the file is in the disk or not if we lose it, # it is a temporary file. Avoiding the fsync() will perform better. # However, we do need flush(), otherwise some files will fail # (e.g. small files that do not fill the buffer). finally:
def uploadFile(self, uploadedFile, backend, fileName): '''Uploads a file to the dropbox. Called from offline. ''' logging.debug('=' * 80) # Check that the parameter is a file if not hasattr(uploadedFile, 'file') or not hasattr( uploadedFile, 'filename'): raise dropBox.DropBoxError( 'The parameter must be an uploaded file.') logging.debug('server::uploadFile(%s, %s, %s)', uploadedFile.filename, backend, fileName) logging.info( 'server::uploadFile(): Checking whether the backend is allowed...') allowedBackends = config.allowedBackends[ service.settings['productionLevel']] if backend not in allowedBackends: raise dropBox.DropBoxError( 'The given backend %s is not in the allowed ones for this server: %s.' % (backend, allowedBackends)) # Take out any paths in the fileName fileName = os.path.basename(fileName) # Most UNIX filesystems restrict names to 255 bytes, so if we get here # someone is trying to do something probably wrong, therefore we just stop logging.info( 'server::uploadFile(): Checking whether the length of the filename does not exceed 255 bytes...' ) if len(fileName) > 255: raise dropBox.DropBoxError( 'The length of the filename exceeds 255 bytes.') dropBox.uploadFile(uploadedFile.filename, uploadedFile.file.read(), getUsername(), backend, fileName)
def checkSynchronization(synchronizeTo, destinationDatabase, tag, gtHandle, productionGTsDict): '''Checks if a connection string and a tag are compatible with the synchronization for a workflow against the production Global Tags. If the destination account and the destination tags are not in the production Global Tags, any synchronization is valid. If the destination account and the destination tags are in at least one Global Tag, the synchronization must be exactly the same as the one of the Global Tag. Raises if the dictionary for production workflow is malformed, or if the synchronization is not correct. ''' workflow = gtHandle.getWorkflowForTagAndDB(destinationDatabase, tag, productionGTsDict) #connection string and tag are not in any production Global Tags #connection string and tag are in the production Global Tag for the same workflow specified #pcl is a particular case for prompt if workflow is None \ or synchronizeTo == workflow \ or (synchronizeTo == 'pcl' and workflow == 'prompt'): return raise dropBox.DropBoxError( 'The synchronization "%s" for tag "%s" in database "%s" provided in the metadata does not match the one in the global tag for workflow "%s".' % (synchronizeTo, tag, destinationDatabase, workflow))
def checkContents(fileHash, dataPath, metadata, backend): '''Checks whether the data and metadata are correct. data is the filename of the sqlite file. metadata is a string with the metadata file itself. Note: Update the wizard on the upload.py script if the structure changes. ''' logging.debug('check::checkContents(%s, %s, %s)', fileHash, dataPath, repr(metadata)) logging.info('checkContents(): %s: Checking metadata structure...', fileHash) workflows = (u'offline', u'hlt', u'express', u'prompt', u'pcl') structure = { u'destinationDatabase': (True, unicode), u'inputTag': (True, unicode), u'since': (True, (int, type(None))), u'emails': (False, [unicode]), u'userText': (True, unicode), u'destinationTags': (True, { unicode: { u'synchronizeTo': (True, workflows), u'dependencies': (False, { unicode: workflows, }), }, }) } try: typeMatch.match(structure, metadata) except typeMatch.MatchError as e: raise dropBox.DropBoxError('In the metadata, ' + str(e)) logging.info( 'checkContents(): %s: Checking data with respect to metadata...', fileHash) db = conditionDatabase.ConditionDBChecker('sqlite_file:%s' % dataPath, '') try: # Corrupted file try: tags = db.getAllTags() except conditionError.ConditionError as e: raise dropBox.DropBoxError( 'The file is corrupted, as it was not possible to get the list of tags inside it.' ) # Empty file if not tags: raise dropBox.DropBoxError( 'The file does not contain any tags, so it is likely not hosting any Condition payloads.' ) # Wrong input tag if metadata['inputTag'] not in tags: raise dropBox.DropBoxError( 'The input tag "%s" is not in the input SQLite file.' % metadata['inputTag']) # Unsupported service destinationDatabase = metadata['destinationDatabase'] if not destinationDatabase.startswith('oracle:'): raise dropBox.DropBoxError('Oracle is the only supported service.') # Invalid connection string connectionDictionary = service.getProtocolServiceAndAccountFromConnectionString( destinationDatabase) if connectionDictionary is None: raise dropBox.DropBoxError('The connection string is not correct.') # Destination database not supported allowedServices = config.allowedServices[backend] if allowedServices is not None: ok = False for allowedService in allowedServices: if connectionDictionary[ 'service'] in databaseServices.services[ allowedService]['oracle']: ok = True if not ok: raise dropBox.DropBoxError( 'The destination database is not supported.') # Invalid since since = metadata['since'] if since is not None: firstSince = db.iovSequence(metadata['inputTag']).firstSince() if since < firstSince: raise dropBox.DropBoxError( 'The since value "%d" specified in the metadata cannot be smaller than the first IOV since "%d"' % (since, firstSince)) # Invalid synchronizations gtHandle = globalTagHandler.GlobalTagHandler( service.getFrontierConnectionString( service.secrets['connections']['dev']['global_tag']), service.getCxOracleConnectionString( service.secrets['connections']['dev']['run_control']), service.getFrontierConnectionString( service.secrets['connections']['dev']['run_info']), 'runinfo_start_31X_hlt', 'runinfo_31X_hlt', '', 'https://samir-wmcore.cern.ch/t0wmadatasvc/replay', 30, 3, 90, ) productionGTsDict = config.productionGlobalTags for tag, synchronizationDict in metadata['destinationTags'].items(): checkSynchronization(synchronizationDict['synchronizeTo'], destinationDatabase, tag, gtHandle, productionGTsDict) for dependentTag, synchronizeTo in synchronizationDict.get( 'dependencies', {}).items(): checkSynchronization(synchronizeTo, destinationDatabase, dependentTag, gtHandle, productionGTsDict) # firstConditionSafeRun from Tier-0 not available -- only checked # if it is going to be used, i.e. if there is any synchronization # to prompt or pcl. usingFcsr = False for tag, synchronizationDict in metadata['destinationTags'].items(): if synchronizationDict['synchronizeTo'] in ('prompt', 'pcl'): usingFcsr = True break for dependentTag, synchronizeTo in synchronizationDict.get( 'dependencies', {}).items(): if synchronizeTo in ('prompt', 'pcl'): usingFcsr = True break if usingFcsr: try: tier0.Tier0Handler(config.tier0URL, 5, 2, 5, None, False).getFirstSafeRun() except ValueError: # We got an answer but it is invalid. So far this usually means # "None" which is not JSON, when the Tier0 is stopped. raise dropBox.DropBoxError( 'Impossible to upload to synchronize to prompt or pcl while Tier-0 is returning an invalid First Condition Safe Run. %s' % config.fcsrProblemMessage) except tier0.Tier0Error: # Impossible to get anything from the server after retries, # i.e. unreachable, so no data. raise dropBox.DropBoxError( 'Impossible to upload to synchronize to prompt or pcl while Tier-0 is unreachable. Try again after a bit. If this does not help: %s' % config.fcsrProblemMessage) finally: db.close()
fileHash, temporaryDataFile.name) temporaryDataFile.write(tarFile.extractfile(dataFilename).read()) temporaryDataFile.flush() # We do not need os.fsync(temporaryDataFile.fileno()) since # we do not care if the file is in the disk or not if we lose it, # it is a temporary file. Avoiding the fsync() will perform better. # However, we do need flush(), otherwise some files will fail # (e.g. small files that do not fill the buffer). finally: tarFile.close() try: logging.info('checkFile(): %s: Parsing JSON metadata...', fileHash) metadata = json.loads(metadata) except ValueError: raise dropBox.DropBoxError('The metadata is not valid JSON.') try: logging.info( 'checkFile(): %s: Checking the data is a valid SQLite 3 database...', fileHash) dataConnection = sqlite3.connect(temporaryDataFile.name) dataConnection.execute('select 1 from sqlite_master') except sqlite3.DatabaseError as e: raise dropBox.DropBoxError( 'The data is not a valid SQLite 3 database.') except Exception as e: raise dropBox.DropBoxError('The data could not be read.') finally: dataConnection.close()