class TestClientProductionTestCase(unittest.TestCase): def setUp(self): self.prodClient = ProductionClient() self.transClient = TransformationClient() self.fc = FileCatalog() # ## Add metadata fields to the DFC self.MDFieldDict = { 'particle': 'VARCHAR(128)', 'analysis_prog': 'VARCHAR(128)', 'tel_sim_prog': 'VARCHAR(128)', 'outputType': 'VARCHAR(128)', 'zenith': 'int', 'data_level': 'int' } for MDField in self.MDFieldDict: MDFieldType = self.MDFieldDict[MDField] res = self.fc.addMetadataField(MDField, MDFieldType) self.assert_(res['OK']) def tearDown(self): # Delete meta data fields for MDField in self.MDFieldDict: res = self.fc.deleteMetadataField(MDField) self.assert_(res['OK'])
class TestClientProductionTestCase(unittest.TestCase): def setUp(self): self.prodClient = ProductionClient() self.transClient = TransformationClient() self.fc = FileCatalog() # ## Add metadata fields to the DFC self.MDFieldDict = { "particle": "VARCHAR(128)", "analysis_prog": "VARCHAR(128)", "tel_sim_prog": "VARCHAR(128)", "outputType": "VARCHAR(128)", "zenith": "int", "data_level": "int", } for MDField in self.MDFieldDict: MDFieldType = self.MDFieldDict[MDField] res = self.fc.addMetadataField(MDField, MDFieldType) self.assert_(res["OK"]) def tearDown(self): # Delete meta data fields for MDField in self.MDFieldDict: res = self.fc.deleteMetadataField(MDField) self.assert_(res["OK"])
prodStep.Inputquery = inputQuery prodStep.Outputquery = outputQuery return prodStep # Set meta data fields in the DFC fc = FileCatalog() MDFieldDict = { "application": "VARCHAR(128)", "image_format": "VARCHAR(128)", "image_width": "int", "image_height": "int", } for MDField in MDFieldDict.keys(): MDFieldType = MDFieldDict[MDField] res = fc.addMetadataField(MDField, MDFieldType) if not res["OK"]: gLogger.error("Failed to add metadata fields", res["Message"]) exit(-1) # Instantiate the ProductionClient prodClient = ProductionClient() # Create the first production step and add it to the Production outputquery = {"application": "mandelbrot", "image_format": "ascii", "image_width": 7680, "image_height": 200} prodStep1 = createProductionStep("ImageProd", "MCSimulation", outputQuery=outputquery) body = createWorkflowBodyStep1() prodStep1.Body = body res = prodClient.addProductionStep(prodStep1) if not res["OK"]: gLogger.error("Failed to add production step", res["Message"])
transformation.setOutputDirectories(['/dirac/outConfigName/configVersion/LOG/00000000', '/dirac/outConfigName/configVersion/RAW/00000000', '/dirac/outConfigName/configVersion/CORE/00000000']) # Set directory meta data and create a transformation with a meta-data filter if UseFilter: fc = FileCatalog() dm = DataManager() metaCatalog = 'DIRACFileCatalog' # Set meta data fields in the DFC MDFieldDict = {'particle': 'VARCHAR(128)', 'timestamp': 'VARCHAR(128)'} for MDField in MDFieldDict.keys(): MDFieldType = MDFieldDict[MDField] res = fc.addMetadataField(MDField, MDFieldType) if not res['OK']: gLogger.error("Failed to add metadata fields", res['Message']) exit(-1) # Set directory meta data timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) MDdict1 = {'particle': 'gamma', 'timestamp': timestamp} res = fc.setMetadata(directory, MDdict1) if not res['OK']: gLogger.error("Failed to set metadata", res['Message']) exit(-1) # Set the transformation meta data filter MDdict1b = {'particle': 'gamma', 'timestamp': timestamp} mqJson1b = json.dumps(MDdict1b)
class Prod3DataManager(object) : """ Manage data and meta-data """ def __init__(self,catalogs=['DIRACFileCatalog']): """ Constructor """ self.setupCatalogClient( catalogs ) self.printCatalogConfig( catalogs ) self.setupDataManagerClient( catalogs ) def setupCatalogClient( self, catalogs ): """ Init FileCatalog client Ideally we would like to use only FileCatalog but it doesn't work for setMetadata because the returned value has a wrong format. So use of FileCatalogClient instead """ self.fc = FileCatalog( catalogs ) self.fcc = FileCatalogClient() def printCatalogConfig( self, catalogs ): """ Dumps catalog configuration """ for catalog in catalogs: res = self.fc._getCatalogConfigDetails( catalog ) DIRAC.gLogger.info( 'CatalogConfigDetails:', res ) def setupDataManagerClient( self, catalogs ): """ Init DataManager client """ self.dm = DataManager( catalogs ) def _getSEList( self, SEType = 'ProductionOutputs', DataType = 'SimtelProd' ): """ get from CS the list of available SE for data upload """ opsHelper = Operations() optionName = os.path.join( SEType, DataType ) SEList = opsHelper.getValue( optionName , [] ) SEList = List.randomize( SEList ) DIRAC.gLogger.notice( 'List of %s SE: %s ' % ( SEType, SEList ) ) # # Check if the local SE is in the list. If yes try it first by reversing list order localSEList = [] res = getSEsForSite( DIRAC.siteName() ) if res['OK']: localSEList = res['Value'] retainedlocalSEList = [] for localSE in localSEList: if localSE in SEList: DIRAC.gLogger.notice( 'The local Storage Element is an available SE: ', localSE ) retainedlocalSEList.append( localSE ) SEList.remove( localSE ) SEList = retainedlocalSEList + SEList if len( SEList ) == 0: return DIRAC.S_ERROR( 'Error in building SEList' ) return DIRAC.S_OK( SEList ) def _checkemptydir( self, path ): """ check that the directory is not empty """ if len ( glob.glob( path ) ) == 0: error = 'Empty directory: %s' % path return DIRAC.S_ERROR( error ) else: return DIRAC.S_OK() def _getRunPath( self, filemetadata ): """ format path to string with 1 digit precision run_number is taken from filemetadata filemetadata can be a dict or the run_number itself """ if type( filemetadata ) == type( dict() ): run_number = int( filemetadata['runNumber'] ) else: run_number = int( filemetadata ) run_numberMod = run_number % 1000 runpath = '%03dxxx' % ( ( run_number - run_numberMod ) / 1000 ) return runpath def _formatPath( self, path ): """ format path to string with 1 digit precision """ if type( path ) == float or type( path ) == int: path = '%.1f' % path return str( path ) def createTarLogFiles ( self, inputpath, tarname ): """ create tar of log and histogram files """ tarmode = 'w:gz' tar = tarfile.open( tarname, tarmode ) for subdir in ['Log/*', 'Histograms/*']: logdir = os.path.join( inputpath, subdir ) res = self._checkemptydir( logdir ) if not res['OK']: return res for localfile in glob.glob( logdir ): tar.add( localfile, arcname = localfile.split( '/' )[-1] ) tar.close() return DIRAC.S_OK() def createMDStructure( self, metadata, metadatafield, basepath, program_category ): """ create meta data structure """ # ## Add metadata fields to the DFC mdfield = json.loads( metadatafield ) for key, value in mdfield.items(): res = self.fc.addMetadataField( key, value ) if not res['OK']: return res # ## Create the directory structure md = json.loads( metadata , object_pairs_hook = collections.OrderedDict ) path = basepath process_program = program_category + '_prog' for key, value in collections.OrderedDict( ( k, md[k] ) for k in ( 'site', 'particle', process_program ) if k in md ).items(): path = os.path.join( path, self._formatPath( value ) ) res = self.fc.createDirectory( path ) if not res['OK']: return res # Set directory metadata for each subdir: 'site', 'particle', 'process_program' res = self.fcc.setMetadata( path, {key:value} ) if not res['OK']: return res # Create the TransformationID subdir and set MD # ## Get the TransformationID if os.environ.has_key( 'JOBID' ): jobID = os.environ['JOBID'] dirac = Dirac() res = dirac.getJobJDL( jobID ) TransformationID = '0000' if res['Value'].has_key( 'TransformationID' ): TransformationID = res['Value']['TransformationID'] path = os.path.join( path, TransformationID ) res = self.fc.createDirectory( path ) if not res['OK']: return res process_program_version = process_program + '_version' res = self.fcc.setMetadata( path, dict( ( k, md[k] ) for k in ( 'phiP', 'thetaP', 'array_layout', process_program_version ) if k in md ) ) if not res['OK']: return res # Create the Data and Log subdirs and set MD Transformation_path = path for subdir in ['Data', 'Log']: path = os.path.join( Transformation_path, subdir ) res = self.fc.createDirectory( path ) if not res['OK']: return res # Set metadata if not already defined res = self.fcc.getDirectoryUserMetadata( path ) if not res['OK']: return res if 'outputType' not in res['Value']: res = self.fcc.setMetadata( path, {'outputType':subdir} ) if not res['OK']: return res # MD for the Data directory - data_level and configuration_id path = os.path.join(Transformation_path, 'Data') # Set metadata if not already defined res = self.fcc.getDirectoryUserMetadata( path ) if not res['OK']: return res if 'data_level' and 'configuration_id' not in res['Value']: res = self.fcc.setMetadata(path, {'data_level': md['data_level'], 'configuration_id': md['configuration_id'] }) if not res['OK']: return res return DIRAC.S_OK( Transformation_path ) def putAndRegister( self, lfn, localfile, filemetadata, DataType = 'SimtelProd' ): """ put and register one file and set metadata """ # ## Get the list of Production SE res = self._getSEList( 'ProductionOutputs', DataType ) if res['OK']: ProductionSEList = res['Value'] else: return res # ## Get the list of Failover SE res = self._getSEList( 'ProductionOutputsFailover', DataType ) if res['OK']: FailoverSEList = res['Value'] else: return res # ## Upload file to a Production SE res = self._putAndRegisterToSEList( lfn, localfile, ProductionSEList ) if not res['OK']: DIRAC.gLogger.error( 'Failed to upload file to any Production SE: %s' % ProductionSEList ) # ## Upload file to a Failover SE res = self._putAndRegisterToSEList( lfn, localfile, FailoverSEList ) if not res['OK']: return DIRAC.S_ERROR( 'Failed to upload file to any Failover SE: %s' % FailoverSEList ) # ## Set file metadata: jobID, subarray, sct if res['OK']: fmd = json.loads( filemetadata ) if os.environ.has_key( 'JOBID' ): fmd.update( {'jobID':os.environ['JOBID']} ) filename = os.path.basename( localfile ) # set subarray and sct md from filename p = re.compile( 'subarray-\d+' ) if p.search( filename ) != None: subarray = p.search( filename ).group() fmd.update( {'subarray':subarray} ) sct = 'False' p = re.compile( 'nosct' ) psct = re.compile( 'sct' ) if p.search( filename ) == None and psct.search( filename ) != None: sct = 'True' # ## Set sct flag only for production data res = self.fcc.getFileUserMetadata( lfn ) if DataType == 'SimtelProd' and res['Value']['outputType'] == 'Data': fmd.update( {'sct':sct} ) res = self.fc.setMetadata( lfn, fmd ) if not res['OK']: return res return DIRAC.S_OK() def _putAndRegisterToSEList( self, lfn, localfile, SEList ): """ put and register one file to one SE in the SEList """ # ## Try to upload file to a SE in the list for SE in SEList: msg = 'Try to upload local file: %s \nwith LFN: %s \nto %s' % ( localfile, lfn, SE ) DIRAC.gLogger.notice( msg ) res = self.dm.putAndRegister( lfn, localfile, SE ) DIRAC.gLogger.notice(res) # ## check if failed if not res['OK']: DIRAC.gLogger.error( 'Failed to putAndRegister %s \nto %s \nwith message: %s' % ( lfn, SE, res['Message'] ) ) DIRAC.gLogger.notice('Trying to clean up %s' % lfn) res = self.dm.removeFile(lfn) if res['OK']: DIRAC.gLogger.notice('Successfully removed %s \n that was not supposed to have been uploaded successfully' % lfn) continue elif res['Value']['Failed'].has_key( lfn ): DIRAC.gLogger.error( 'Failed to putAndRegister %s to %s' % ( lfn, SE ) ) DIRAC.gLogger.notice('Trying to clean up %s' % lfn) res = self.dm.removeFile(lfn) if res['OK']: DIRAC.gLogger.notice('Successfully removed %s \n that was not supposed to have been uploaded successfully' % lfn) continue else: return DIRAC.S_OK() return DIRAC.S_ERROR() def cleanLocalFiles ( self, datadir, pattern = '*' ): """ remove files matching pattern in datadir """ for localfile in glob.glob( os.path.join( datadir, pattern ) ): DIRAC.gLogger.notice( 'Removing local file: ', localfile ) os.remove( localfile ) return DIRAC.S_OK()