Ejemplos de BookkeepingClient en Python

Lenguaje de programación: Python

Namespace/Package Name: LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient

Clase / Tipo: BookkeepingClient

Ejemplos en hotexamples.com: 26

Python BookkeepingClient - 26 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient.BookkeepingClient extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

BookkeepingClient(30)

getFileMetadata(8)

getFiles(4)

getProductionInformations(3)

getFileTypeVersion(3)

getProductionFiles(3)

getFileAncestors(3)

getAvailableDataQuality(2)

getAvailableFileTypes(2)

getAvailableSteps(2)

getDirectoryMetadata(2)

getFileTypes(2)

getFilesWithGivenDataSets(1)

getProductionsFromView(1)

getProductionProcessingPass(1)

getProcessingPass(1)

getNbOfJobsBySites(1)

getFilesWithMetadata(1)

bulkJobInfo(1)

getFilesSummary(1)

bulkinsertEventType(1)

addProduction(1)

getEventTypes(1)

getConditions(1)

getAvailableProductions(1)

getAvailableEventTypes(1)

bulkupdateEventType(1)

getRunAndProcessingPass(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: dirac-bookkeeping-production-files.py Proyecto: antolu/LHCbDIRAC

]))
Script.parseCommandLine()
args = Script.getPositionalArgs()

if not len(args) == 2:
    Script.showHelp()

try:
    prodID = int(args[0])
except:
    Script.showHelp()
filetype = args[1]

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient

client = BookkeepingClient()
res = client.getProductionFiles(prodID, filetype)
if not res['OK']:
    print 'ERROR: Failed to retrieve production files: %s' % res['Message']
else:
    if not res['Value']:
        print 'No files found for production %s with type %s' % (prodID,
                                                                 filetype)
    else:
        print '%s %s %s %s %s' % ('FileName'.ljust(100), 'Size'.ljust(10),
                                  'GUID'.ljust(40), 'Replica'.ljust(8),
                                  'Visible'.ljust(8))
        for lfn in sorted(res['Value']):
            size = res['Value'][lfn]['FileSize']
            guid = res['Value'][lfn]['GUID']
            hasReplica = res['Value'][lfn]['GotReplica']

Ejemplo n.º 2

Mostrar archivo

########################################################################
"""
  List simulation conditions from the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
exitCode = 0

res = bk.getSimConditions()
if res['OK']:
    dbresult = res['Value']
    for record in dbresult:
        print 'SimId: ' + str(record[0]).ljust(10)
        print '  SimDescription: ' + str(record[1]).ljust(10)
        print '  BeamCond: ' + str(record[2]).ljust(10)
        print '  BeamEnergy: ' + str(record[3]).ljust(10)
        print '  Generator: ' + str(record[4]).ljust(10)
        print '  MagneticField: ' + str(record[5]).ljust(10)
        print '  DetectorCond: ' + str(record[6]).ljust(10)
        print '  Luminosity: ' + str(record[7]).ljust(10)
        print '  G4settings: ' + str(record[8]).ljust(10)

Ejemplo n.º 3

Mostrar archivo

Archivo: DiracLHCb.py Proyecto: antolu/LHCbDIRAC

class DiracLHCb(Dirac):

  #############################################################################
  def __init__(self, withRepo=False, repoLocation='', operationsHelperIn=None):
    """Internal initialization of the DIRAC API.
    """

    super(DiracLHCb, self).__init__(withRepo=withRepo, repoLocation=repoLocation)
    self.tier1s = []

    if not operationsHelperIn:
      self.opsH = Operations()
    else:
      self.opsH = operationsHelperIn

    self._bkQueryTemplate = {'SimulationConditions': 'All',
                             'DataTakingConditions': 'All',
                             'ProcessingPass': '******',
                             'FileType': 'All',
                             'EventType': 'All',
                             'ConfigName': 'All',
                             'ConfigVersion': 'All',
                             'Production': 0,
                             'StartRun': 0,
                             'EndRun': 0,
                             'DataQuality': 'All',
                             'Visible': 'Yes'}
    self._bkClient = BookkeepingClient()  # to expose all BK client methods indirectly

  #############################################################################
  def addRootFile(self, lfn, fullPath, diracSE, printOutput=False):
    """ Add a Root file to Grid storage, an attempt is made to retrieve the
        POOL GUID of the file prior to upload.

       Example Usage:

       >>> print dirac.addFile('/lhcb/user/p/paterson/myRootFile.tar.gz','myFile.tar.gz','CERN-USER')
       {'OK': True, 'Value':{'Failed': {},
        'Successful': {'/lhcb/user/p/paterson/test/myRootFile.tar.gz': {'put': 64.246301889419556,
                                                                    'register': 1.1102778911590576}}}}

       @param lfn: Logical File Name (LFN)
       @type lfn: string
       @param diracSE: DIRAC SE name e.g. CERN-USER
       @type diracSE: strin
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def addFile(self, lfn, fullPath, diracSE, printOutput=False):  # pylint: disable=arguments-differ
    """ Copy of addRootFile
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def getBKAncestors(self, lfns, depth=1, replica=True):
    """ This function allows to retrieve ancestor files from the Bookkeeping.

        Example Usage:

        >>> dirac.getBKAncestors('/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',2)
        {'OK': True, 'Value': ['/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',
        '/lhcb/data/2009/RAW/FULL/LHCb/COLLISION09/63807/063807_0000000004.raw']}

       @param lfn: Logical File Name (LFN)
       @type lfn: string or list
       @param depth: Ancestor depth
       @type depth: integer
    """

    result = self._bkClient.getFileAncestors(lfns, depth, replica=replica)
    if not result['OK']:
      self.log.error('Could not get ancestors', result['Message'])
      return result
    ancestors = set(x['FileName'] for ancestors in result['Value']['Successful'].itervalues() for x in ancestors)

    return S_OK(lfns + list(ancestors))

  #############################################################################
  def bkQueryRunsByDate(self, bkPath, startDate, endDate, dqFlag='All', selection='Runs'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ConfigurationName>/<Configuration Version>/<Condition Description><Processing Pass>/<Event Type>/<File Type>

        so an example for 2016 collisions data would be:

        /LHCb/Collision09//LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real Data/Reco16/Stripping26/90000000/EW.DST

        The startDate and endDate must be specified as yyyy-mm-dd.

        Runs can be selected based on their status e.g. the selection parameter
        has the following possible attributes:
         - Runs - data for all runs in the range are queried (default)
         - ProcessedRuns - data is retrieved for runs that are processed
         - NotProcessed - data is retrieved for runs that are not yet processed.

       Example Usage:

       >>> dirac.bkQueryRunsByDate('/LHCb/Collision16//Real Data/90000000/RAW',
                                   '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

      dirac.bkQueryRunsByDate('/LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real'
                              'Data/Reco16/Stripping26/90000000/EW.DST',
                              '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @param startDate: Start date  yyyy-mm-dd
       @param startDate: string
       @param endDate: End date  yyyy-mm-dd
       @param endDate: string
       @param selection: Either Runs, ProcessedRuns or NotProcessed
       @param selection: string
       @return: S_OK,S_ERROR
    """
    runSelection = ['Runs', 'ProcessedRuns', 'NotProcessed']
    if selection not in runSelection:
      return S_ERROR('Expected one of %s not "%s" for selection' % (', '.join(runSelection), selection))

    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkQuery = BKQuery().buildBKQuery(bkPath)
    if not bkQuery:
      return S_ERROR(
          'Please provide a BK path: '
          '/<ConfigurationName>/<Configuration Version>/<Condition Description>/<Processing Pass>'
          '/<Event Type>/<File Type>')

    if not startDate or not endDate:
      return S_ERROR('Expected both start and end dates to be defined in format: yyyy-mm-dd')

    if not isinstance(startDate, str) or not isinstance(endDate, str):
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    if not len(startDate.split('-')) == 3 or not len(endDate.split('-')) == 3:
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    start = time.time()
    result = self._bkClient.getRunsForAGivenPeriod({'StartDate': startDate, 'EndDate': endDate})
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)
    if not result['OK']:
      self.log.info('Could not get runs with given dates from BK with result: "%s"' % result)
      return result

    if not result['Value']:
      self.log.info('No runs selected from BK for specified dates')
      return result

    if selection not in result['Value']:
      return S_ERROR('No %s runs for specified dates' % (selection))

    runs = result['Value'][selection]
    self.log.info('Found the following %s runs:\n%s' % (len(runs), ', '.join([str(i) for i in runs])))
    # temporary until we can query for a discrete list of runs
    selectedData = []
    for run in runs:
      query = bkQuery.copy()
      query['StartRun'] = run
      query['EndRun'] = run
      query['CheckRunStatus'] = True if selection in ['ProcessedRuns', 'NotProcessed'] else False
      if dqFlag:
        check = self.__checkDQFlags(dqFlag)
        if not check['OK']:
          return check
        dqFlag = check['Value']
        query['DataQuality'] = dqFlag
      start = time.time()
      result = self._bkClient.getVisibleFilesWithMetadata(query)
      rtime = time.time() - start
      self.log.info('BK query time: %.2f sec' % rtime)
      self.log.verbose(result)
      if not result['OK']:
        return result
      self.log.info('Selected %s files for run %s' % (len(result['Value']), run))
      if result['Value']['LFNs']:
        selectedData += result['Value']['LFNs'].keys()

    self.log.info('Total files selected = %s' % (len(selectedData)))
    return S_OK(selectedData)

  #############################################################################
  def bkQueryRun(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<Run Number>/<Processing Pass>/<Event Type>/<File Type>

        so an example for 2009 collisions data would be:

       /63566/Real Data + RecoToDST-07/90000000/DST

       In addition users can specify a range of runs using the following convention:

       /<Run Number 1> - <Run Number 2>/<Processing Pass>/<Event Type>/<File Type>

       so extending the above example this would look like:

       /63566-63600/Real Data + RecoToDST-07/90000000/DST

       Example Usage:

       >>> dirac.bkQueryRun('/63566/Real Data/RecoToDST-07/90000000/DST')
       {'OK':True,'Value': ['/lhcb/data/2009/DST/00005842/0000/00005842_00000008_1.dst']}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if not len(bkPath) == 4:
      return S_ERROR('Expected 4 components to the BK path: /<Run Number>/<Processing Pass>/<Event Type>/<File Type>')

    runNumberString = bkPath[0].replace('--', '-').replace(' ', '')
    startRun = 0
    endRun = 0
    if '-' in runNumberString:
      runs = runNumberString.split('-')
      if len(runs) != 2:
        return S_ERROR('Could not determine run range from "%s", try "<Run 1> - <Run2>"' % (runNumberString))
      try:
        start = int(runs[0])
        end = int(runs[1])
      except Exception:
        return S_ERROR('Invalid run range: %s' % runNumberString)
      startRun = min(start, end)
      endRun = max(start, end)
    else:
      try:
        startRun = int(runNumberString)
        endRun = startRun
      except Exception:
        return S_ERROR('Invalid run number: %s' % runNumberString)

    query = self._bkQueryTemplate.copy()
    query['StartRun'] = startRun
    query['EndRun'] = endRun
    query['ProcessingPass'] = bkPath[1]
    query['EventType'] = bkPath[2]
    query['FileType'] = bkPath[3]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryProduction(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ProductionID>/[<Processing Pass>/<Event Type>/]<File Type>

        so an example for 2009 collisions data would be:

       /5842/Real Data + RecoToDST-07/90000000/DST

       Note that neither the processing pass nor the event type should be necessary. So either of them can be omitted

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryProduction('/5842/Real Data/RecoToDST-07/90000000/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if len(bkPath) < 2:
      return S_ERROR('Invalid bkPath: should at least contain /ProductionID/FileType')
    query = self._bkQueryTemplate.copy()
    try:
      query['Production'] = int(bkPath[0])
    except Exception:
      return S_ERROR('Invalid production ID')
    query['FileType'] = bkPath[-1]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    for key, val in query.items():
      if isinstance(val, basestring) and val.lower() == 'all':
        query.pop(key)
    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryPath(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

       /<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>
       /<Processing Pass>/<Event Type>/<File Type>

       so an example for 2009 collsions data would be:

       /LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data + RecoToDST-07/90000000/DST

       or for MC09 simulated data:

       /MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/2010-Sim01Reco01-withTruth/27163001/DST

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryPath('/MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/Sim07/Reco06-withTruth/10012004/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=3)
    if not len(bkPath) == 6:
      return S_ERROR('Expected 6 components to the BK path: '
                     '/<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>'
                     '/<Processing Pass>/<Event Type>/<File Type>')

    query = self._bkQueryTemplate.copy()
    query['ConfigName'] = bkPath[0]
    query['ConfigVersion'] = bkPath[1]
    query['ProcessingPass'] = bkPath[3]
    query['EventType'] = bkPath[4]
    query['FileType'] = bkPath[5]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    # The problem here is that we don't know if it's a sim or data taking condition,
    # assume that if configName=MC this is simulation
    if bkPath[0].lower() == 'mc':
      query['SimulationConditions'] = bkPath[2]
    else:
      query['DataTakingConditions'] = bkPath[2]

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bookkeepingQuery(self, SimulationConditions='All', DataTakingConditions='All',
                       ProcessingPass='******', FileType='All', EventType='All', ConfigName='All',
                       ConfigVersion='All', ProductionID=0, DataQuality='ALL'):
    """ This function will create and perform a BK query using the supplied arguments
        and return a list of LFNs.

        Example Usage:

        >>> dirac.bookkeepingQuery(ConfigName='LHCb',ConfigVersion='Collision09',
        EventType='90000000',ProcessingPass='******',DataTakingConditions='Beam450GeV-VeloOpen-MagDown')
        {'OK':True,'Value':<files>}

       @param  ConfigName: BK ConfigName
       @type ConfigName: string
       @param  EventType: BK EventType
       @type EventType: string
       @param  FileType: BK FileType
       @type FileType: string
       @param  ProcessingPass: BK ProcessingPass
       @type ProcessingPass: string
       @param  ProductionID: BK ProductionID
       @type ProductionID: integer
       @param  DataQuality: BK DataQuality
       @type DataQuality: string
       @param  ConfigVersion: BK ConfigVersion
       @type ConfigVersion: string
       @param  DataTakingConditions: BK DataTakingConditions
       @type DataTakingConditions: string
       @param  SimulationConditions: BK SimulationConditions
       @type SimulationConditions: string
       @return: S_OK,S_ERROR
    """
    query = self._bkQueryTemplate.copy()
    query['SimulationConditions'] = SimulationConditions
    query['DataTakingConditions'] = DataTakingConditions
    query['ProcessingPass'] = ProcessingPass
    query['FileType'] = FileType
    query['EventType'] = EventType
    query['ConfigName'] = ConfigName
    query['ConfigVersion'] = ConfigVersion
    query['Production'] = ProductionID
    query['DataQuality'] = DataQuality
    return self.bkQuery(query)

  #############################################################################
  def bkQuery(self, bkQueryDict):
    """ Developer function. Perform a query to the LHCb Bookkeeping to return
        a list of LFN(s). This method takes a BK query dictionary.

        Example Usage:

        >>> print dirac.bkQuery(query)
        {'OK':True,'Value':<files>}

       @param bkQueryDict: BK query
       @type bkQueryDict: dictionary (see bookkeepingQuery() for keys)
       @return: S_OK,S_ERROR
    """
    problematicFields = []
    # Remove the Visible flag as anyway the method is for visible files ;-)
    # bkQueryDict.setdefault( 'Visible', 'Yes' )
    for name, value in bkQueryDict.items():
      if name not in self._bkQueryTemplate:
        problematicFields.append(name)

    if problematicFields:
      msg = 'The following fields are not valid for a BK query: %s\nValid fields include: %s' % \
            (', '.join(problematicFields), ', '.join(self._bkQueryTemplate.keys()))
      return S_ERROR(msg)

    for name, value in bkQueryDict.items():
      if name == "Production" or name == "EventType" or name == "StartRun" or name == "EndRun":
        if value == 0:
          del bkQueryDict[name]
        else:
          bkQueryDict[name] = str(value)
      elif name == "FileType":
        if value.lower() == "all":
          bkQueryDict[name] = 'ALL'
      else:
        if str(value).lower() == "all":
          del bkQueryDict[name]

    if 'Production' in bkQueryDict or 'StartRun' in bkQueryDict or 'EndRun' in bkQueryDict:
      self.log.verbose('Found a specific query so loosening some restrictions to prevent BK overloading')
    else:
      if 'SimulationConditions' not in bkQueryDict and 'DataTakingConditions' not in bkQueryDict:
        return S_ERROR('A Simulation or DataTaking Condition must be specified for a BK query.')
      if 'EventType' not in bkQueryDict and 'ConfigName' not in bkQueryDict and 'ConfigVersion' not in bkQueryDict:
        return S_ERROR(
            'The minimal set of BK fields for a query is: EventType, ConfigName and ConfigVersion'
            ' in addition to a Simulation or DataTaking Condition')

    self.log.verbose('Final BK query dictionary is:')
    for item in bkQueryDict.iteritems():
      self.log.verbose('%s : %s' % item)

    start = time.time()
    result = self._bkClient.getVisibleFilesWithMetadata(bkQueryDict)
#    result = bk.getFilesWithGivenDataSets(bkQueryDict)
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)

    if not result['OK']:
      return S_ERROR('BK query returned an error: "%s"' % (result['Message']))

    if not result['Value']:
      return self._errorReport('No BK files selected')

    returnedFiles = len(result['Value'])
    self.log.verbose('%s files selected from the BK' % (returnedFiles))
    return result

  #############################################################################
  def __checkDQFlags(self, flags):
    """ Internal function.  Checks the provided flags against the list of
        possible DQ flag statuses from the Bookkeeping.
    """
    dqFlags = []
    if isinstance(flags, list):
      dqFlags = flags
    else:
      dqFlags = [flags]

    bkFlags = self.getAllDQFlags()
    if not bkFlags['OK']:
      return bkFlags

    final = []
    for flag in dqFlags:
      if flag.lower() == 'all':
        final.append(flag.upper())
      else:
        flag = flag.upper()
        if flag not in bkFlags['Value']:
          msg = 'Specified DQ flag "%s" is not in allowed list: %s' % (flag, ', '.join(bkFlags['Value']))
          self.log.error(msg)
          return S_ERROR(msg)
        else:
          final.append(flag)

    # when first coding this it was not possible to use a list ;)
    if len(final) == 1:
      final = final[0]

    return S_OK(final)

  #############################################################################
  def getAllDQFlags(self, printOutput=False):
    """ Helper function.  Returns the list of possible DQ flag statuses
        from the Bookkeeping.

        Example Usage:

        >>> print dirac.getAllDQFlags()
        {'OK':True,'Value':<flags>}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    result = self._bkClient.getAvailableDataQuality()
    if not result['OK']:
      self.log.error('Could not obtain possible DQ flags from BK with result:\n%s' % (result))
      return result

    if printOutput:
      flags = result['Value']
      self.log.info('Possible DQ flags from BK are: %s' % (', '.join(flags)))

    return result

  #############################################################################
  def getDataByRun(self, lfns, printOutput=False):
    """Sort the supplied lfn list by run. An S_OK object will be returned
       containing a dictionary of runs and the corresponding list of LFN(s)
       associated with them.

       Example usage:

       >>> print dirac.getDataByRun(lfns)
       {'OK': True, 'Value': {<RUN>:['<LFN>','<LFN>',...], <RUN>:['<LFN>',..]}}


       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    runDict = {}
    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    for lfn, metadata in result['Value']['Successful'].items():
      if 'RunNumber' in metadata:
        runNumber = metadata['RunNumber']
        runDict.setdefault(runNumber, []).append(lfn)
      else:
        self.log.warn('Could not find run number from BK for %s' % (lfn))

    if printOutput:
      self.log.notice(self.pPrint.pformat(runDict))

    return S_OK(runDict)

  #############################################################################
  def bkMetadata(self, lfns, printOutput=False):
    """Return metadata for the supplied lfn list. An S_OK object will be returned
       containing a dictionary of LFN(s) and the corresponding metadata associated
       with them.

       Example usage:

       >>> print dirac.bkMetadata(lfns)
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    if printOutput:
      self.log.notice(self.pPrint.pformat(result['Value']))

    return result

  #############################################################################

  def lhcbProxyInit(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-init script
    """
    os.system("dirac-proxy-init -o LogLevel=NOTICE -t --rfc %s" % "' '".join(args))

  #############################################################################

  def lhcbProxyInfo(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-info script
    """
    os.system("dirac-proxy-info -o LogLevel=NOTICE %s" % "' '".join(args))
  #############################################################################

  def gridWeather(self, printOutput=False):
    """This method gives a snapshot of the current Grid weather from the perspective
       of the DIRAC site and SE masks.  Tier-1 sites are returned with more detailed
       information.

       Example usage:

       >>> print dirac.gridWeather()
       {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    lcgSites = gConfig.getSections('/Resources/Sites/LCG')
    if not lcgSites['OK']:
      return lcgSites

    for lcgSite in lcgSites['Value']:

      tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2)
      if tier in (0, 1):
        self.tier1s.append(lcgSite)

    siteInfo = self.checkSites()
    if not siteInfo['OK']:
      return siteInfo
    siteInfo = siteInfo['Value']

    seInfo = self.checkSEs()
    if not seInfo['OK']:
      return seInfo
    seInfo = seInfo['Value']

    tierSEs = {}
    for site in self.tier1s:
      tierSEs[site] = getSEsForSite(site)['Value']

    tierInfo = {}
    for site, seList in tierSEs.items():
      tierInfo[site] = {}
      for se in seList:
        if se in seInfo:
          tierSEInfo = seInfo[se]
          tierInfo[site][se] = tierSEInfo
      if site in siteInfo['AllowedSites']:
        tierInfo[site]['MaskStatus'] = 'Allowed'
      else:
        tierInfo[site]['MaskStatus'] = 'Banned'

    if printOutput:
      self.log.notice('========> Tier-1 status in DIRAC site and SE masks')
      for site in sorted(self.tier1s):
        self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus']))
        self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))
        for se in sorted(tierSEs[site]):
          if se in tierInfo[site]:
            self.log.notice('%s %s %s' % (se.ljust(25),
                                          tierInfo[site][se]['ReadStatus'].rjust(15),
                                          tierInfo[site][se]['WriteStatus'].rjust(15))
                            )

      self.log.notice('\n========> Tier-2 status in DIRAC site mask\n')
      allowedSites = siteInfo['AllowedSites']
      bannedSites = siteInfo['BannedSites']
      for site in self.tier1s:
        if site in allowedSites:
          allowedSites.remove(site)
        if site in bannedSites:
          bannedSites.remove(site)
      self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites)))

    summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo}
    return S_OK(summary)

  #############################################################################
  def checkSites(self, printOutput=False):  # pylint: disable=no-self-use
    """Return the list of sites in the DIRAC site mask and those which are banned.

       Example usage:

       >>> print dirac.checkSites()
       {'OK': True, 'Value': {'AllowedSites':['<Site>',...],'BannedSites':[]}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    res = getSites()
    if not res['OK']:
      self.log.error('Could not get list of sites from CS', res['Message'])
      return res
    totalList = res['Value']

    res = DiracAdmin().getSiteMask()
    if not res['OK']:
      return res

    sites = res['Value']
    bannedSites = []
    for site in totalList:
      if site not in sites:
        bannedSites.append(site)

    if printOutput:
      self.log.notice('\n========> Allowed Sites\n')
      self.log.notice('\n'.join(sites))
      self.log.notice('\n========> Banned Sites\n')
      self.log.notice('\n'.join(bannedSites))
      self.log.notice('\nThere is a total of %s allowed sites and %s banned sites in the system.' % (len(sites),
                                                                                                     len(bannedSites)))

    return S_OK({'AllowedSites': sites, 'BannedSites': bannedSites})

  #############################################################################
  def checkSEs(self, printOutput=False):  # pylint: disable=no-self-use
    """Check the status of read and write operations in the DIRAC SE mask.

       Example usage:

       >>> print dirac.checkSEs()
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    res = gConfig.getSections('/Resources/StorageElements', True)

    if not res['OK']:
      self.log.error('Failed to get storage element information', res['Message'])
      return res

    if printOutput:
      self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))

    seList = sorted(res['Value'])
    result = {}
    rss = ResourceStatus()
    for se in seList:
      res = rss.getElementStatus(se, 'StorageElement')
      if not res['OK']:
        self.log.error("Failed to get StorageElement status for %s" % se)
      else:
        readState = res['Value'].get('ReadAccess', 'Active')
        writeState = res['Value'].get('WriteAccess', 'Active')
        result[se] = {'ReadStatus': readState, 'WriteStatus': writeState}
        if printOutput:
          self.log.notice('%s %s %s' % (se.ljust(25), readState.rjust(15), writeState.rjust(15)))

    return S_OK(result)

  def splitInputDataBySize(self, lfns, maxSizePerJob=20, printOutput=False):
    """Split the supplied lfn list by the replicas present at the possible
       destination sites, based on a maximum size.
       An S_OK object will be returned containing a list of
       lists in order to create the jobs.

       Example usage:

       >>> d.splitInputDataBySize(lfns,10)
       {'OK': True, 'Value': [['<LFN>'], ['<LFN>']]}


       @param lfns: Logical File Name(s) to split
       @type lfns: list
       @param maxSizePerJob: Maximum size (in GB) per bunch
       @type maxSizePerJob: integer
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    sitesForSE = {}
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except TypeError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    if not isinstance(maxSizePerJob, int):
      try:
        maxSizePerJob = int(maxSizePerJob)
      except ValueError as x:
        return self._errorReport(str(x), 'Expected integer for maxSizePerJob')
    maxSizePerJob *= 1000 * 1000 * 1000

    replicaDict = self.getReplicas(lfns)
    if not replicaDict['OK']:
      return replicaDict
    replicas = replicaDict['Value']['Successful']
    if not replicas:
      return self._errorReport(replicaDict['Value']['Failed'].items()[0],
                               'Failed to get replica information')
    siteLfns = {}
    for lfn, reps in replicas.items():
      possibleSites = set(site
                          for se in reps
                          for site in sitesForSE.setdefault(se, getSitesForSE(se).get('Value', [])))
      siteLfns.setdefault(','.join(sorted(possibleSites)), []).append(lfn)

    if '' in siteLfns:
      # Some files don't have active replicas
      return self._errorReport('No active replica found for', str(siteLfns['']))
    # Get size of files
    metadataDict = self.getLfnMetadata(lfns, printOutput)
    if not metadataDict['OK']:
      return metadataDict
    fileSizes = dict((lfn, metadataDict['Value']['Successful'].get(lfn, {}).get('Size', maxSizePerJob))
                     for lfn in lfns)

    lfnGroups = []
    # maxSize is in GB
    for files in siteLfns.values():
      # Now get bunches of files,
      # Sort in decreasing size
      files.sort(cmp=(lambda f1, f2: fileSizes[f2] - fileSizes[f1]))
      while files:
        # print [( lfn, fileSizes[lfn] ) for lfn in files]
        group = []
        sizeTot = 0
        for lfn in list(files):
          size = fileSizes[lfn]
          if size >= maxSizePerJob:
            lfnGroups.append([lfn])
          elif sizeTot + size < maxSizePerJob:
            sizeTot += size
            group.append(lfn)
            files.remove(lfn)
        if group:
          lfnGroups.append(group)

    if printOutput:
      self.log.notice(self.pPrint.pformat(lfnGroups))
    return S_OK(lfnGroups)

    #############################################################################

  def getAccessURL(self, lfn, storageElement, protocol=None, printOutput=False):
    """Allows to retrieve an access URL for an LFN replica given a valid DIRAC SE
       name.  Contacts the file catalog and contacts the site SRM endpoint behind
       the scenes.

       Example Usage:

       >>> print dirac.getAccessURL('/lhcb/data/CCRC08/DST/00000151/0000/00000151_00004848_2.dst','CERN-RAW')
       {'OK': True, 'Value': {'Successful': {'srm://...': {'SRM2': 'rfio://...'}}, 'Failed': {}}}

       :param lfn: Logical File Name (LFN)
       :type lfn: str or python:list
       :param storageElement: DIRAC SE name e.g. CERN-RAW
       :type storageElement: string
       :param printOutput: Optional flag to print result
       :type printOutput: boolean
       :returns: S_OK,S_ERROR
    """
    ret = self._checkFileArgument(lfn, 'LFN')
    if not ret['OK']:
      return ret
    lfn = ret['Value']
    if isinstance(lfn, basestring):
      lfn = [lfn]
    results = getAccessURL(lfn, storageElement, protocol=protocol)
    if printOutput:
      printDMResult(results, empty="File not at SE", script="dirac-dms-lfn-accessURL")
    return results

  #############################################################################

  def _getLocalInputData(self, parameters):
    """ LHCb extension of DIRAC API's _getLocalInputData. Only used for handling ancestors.
    """
    inputData = parameters.get('InputData')
    if inputData:
      self.log.debug("DiracLHCb._getLocalInputData. InputData: %s" % inputData)
      if isinstance(inputData, basestring):
        inputData = inputData.split(';')
      inputData = [lfn.strip('LFN:') for lfn in inputData]
      ancestorsDepth = int(parameters.get('AncestorDepth', 0))
      if ancestorsDepth:
        self.log.debug("DiracLHCb._getLocalInputData. ancestorsDepth: %d" % ancestorsDepth)
        res = self._bkClient.getFileAncestors(inputData, ancestorsDepth)
        if not res['OK']:
          self.log.error("Can't get ancestors", res['Message'])
          return res
        ancestorsLFNs = []
        for ancestorsLFN in res['Value']['Successful'].itervalues():
          ancestorsLFNs += [i['FileName'] for i in ancestorsLFN]
        self.log.info("DiracLHCb._getLocalInputData: adding %d ancestors" % len(ancestorsLFNs))
        self.log.verbose("%s", ', '.join(ancestorsLFNs))
        inputData += ancestorsLFNs

    return S_OK(inputData)

Ejemplo n.º 4

Mostrar archivo

Archivo: dirac-test-plugin.py Proyecto: antolu/LHCbDIRAC

    transformation.setPlugin(plugin)
    transformation.setBkQuery(bkQueryDict)

    from LHCbDIRAC.TransformationSystem.Agent.TransformationPlugin import TransformationPlugin
    transID = -9999
    pluginParams['TransformationID'] = transID
    pluginParams['Status'] = "Active"
    pluginParams['Type'] = transType
    # Create a fake transformation client
    fakeClient = fakeClient(transformation, transID, lfns, asIfProd)
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    oplugin = TransformationPlugin(plugin,
                                   transClient=fakeClient,
                                   dataManager=DataManager(),
                                   bkClient=BookkeepingClient())
    pluginParams['TransformationID'] = transID
    pluginParams.update(pluginSEParams)
    oplugin.setParameters(pluginParams)
    replicas = fakeClient.getReplicas()
    # Special case of RAW files registered in CERN-RDST...
    if plugin == "AtomicRun":
        for lfn in [lfn for lfn in replicas if "CERN-RDST" in replicas[lfn]]:
            ses = {}
            for se in replicas[lfn]:
                pfn = replicas[lfn][se]
                if se == "CERN-RDST":
                    se = "CERN-RAW"
                ses[se] = pfn
            replicas[lfn] = ses
    files = fakeClient.getFiles()

Ejemplo n.º 5

Mostrar archivo

Archivo: dirac-bookkeeping-setdataquality-files.py Proyecto: antolu/LHCbDIRAC

import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... LFN|File Flag' % Script.scriptName,
    'Arguments:', '  LFN:      Logical File Name',
    '  File:     Name of the file with a list of LFNs',
    '  Flag:     Quality Flag'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

if len(args) < 2:
    result = bk.getAvailableDataQuality()
    if not result['OK']:
        print 'ERROR: %s' % (result['Message'])
        DIRAC.exit(2)
    flags = result['Value']
    print "Available Data Quality Flags"
    for flag in flags:
        print flag
    Script.showHelp()

exitCode = 0
filename = args[0]
flag = args[1]

Ejemplo n.º 6

Mostrar archivo

Archivo: dirac-dms-check-directory-content.py Proyecto: antolu/LHCbDIRAC

                      " output file name [dirac-dms-chec-dir-cont.out]")
Script.registerSwitch("v", "Verbose",
                      " use this option for verbose output [False]")

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1],
    'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName,
]))

Script.parseCommandLine(ignoreErrors=False)

from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
from DIRAC.Resources.Storage.StorageElement import StorageElement
from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bkClient = BookkeepingClient()

outputFileName = 'dirac-dms-chec-dir-cont.out'

for switch in Script.getUnprocessedSwitches():
    if switch[0].lower() == "u" or switch[0].lower() == "unit":
        unit = switch[1]
    if switch[0] == "D" or switch[0].lower() == "dir":
        dir = switch[1]
    if switch[0] == "f" or switch[0].lower() == "output":
        outputFile = switch[1]
        outputFileName = outputFile
    if switch[0] == "v" or switch[0].lower() == "verbose":
        verbose = True

if verbose:

Ejemplo n.º 7

Mostrar archivo

#!/usr/bin/env python

import sys
import random
import time

from DIRAC.Core.Base.Script import parseCommandLine
parseCommandLine()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
cl = BookkeepingClient()

res = cl.getAvailableProductions()
if not res['OK']:
  print res['Message']
  sys.exit(0)

allproductions = sorted([i[0] for i in res['Value']], reverse=True)
productions = allproductions[:10000]


class Transaction(object):

  def __init__(self):
    self.custom_timers = {}

  def run(self):
    # print len(datasets)
    i = random.randint(0, len(productions) - 1)
    production = productions[i]
    # print dataset

Ejemplo n.º 8

Mostrar archivo

# DIRAC.exit(2)

#
# Processing pass needs to start as "/Real Data" for FULL stream flagging
#

if realData not in processing:
    print 'You forgot /Real Data in the processing pass:  '******'OK']:
    gLogger.error('Cannot load the information for run %s' % (run))
    gLogger.error(res['Message'])
    DIRAC.exit(2)

dtd = res['Value']['DataTakingDescription']
configName = res['Value']['Configuration Name']
configVersion = res['Value']['Configuration Version']

bkDict = {
    'ConfigName': configName,
    'ConfigVersion': configVersion,
    'ConditionDescription': dtd
}

Ejemplo n.º 9

Mostrar archivo

Archivo: dirac-bookkeeping-set-visibility.py Proyecto: antolu/LHCbDIRAC

    ]))

    Script.parseCommandLine(ignoreErrors=False)
    dumpList = False
    for switch in Script.getUnprocessedSwitches():
        if switch[0] == 'List':
            dumpList = True

    bkQuery = dmScript.getBKQuery()
    lfns = dmScript.getOption('LFNs', [])
    if not bkQuery and not lfns:
        gLogger.error("No BKQuery and no files given...")
        dExit(1)
    # Invert the visibility flag as want to set Invisible those that are visible and vice-versa
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    bk = BookkeepingClient()

    visibilityFlag = dmScript.getOption('Visibility', None)
    if visibilityFlag is None:
        gLogger.error('Visibility option should be given')
        dExit(2)
    visibilityFlag = str(visibilityFlag).lower() == 'yes'
    if bkQuery:
        # Query with visibility opposite to what is requested to be set ;-)
        bkQuery.setOption('Visible', 'No' if visibilityFlag else 'Yes')
        gLogger.notice("BQ query:", bkQuery)
        lfns += bkQuery.getLFNs()
    if not lfns:
        gLogger.notice("No files found...")
    else:
        res = {'OK': True}

Ejemplo n.º 10

Mostrar archivo

class ProcessingProgress( object ):

  def __init__( self, cacheFile = None ):
    if not cacheFile:
      self.prodStatFile = os.path.join( os.environ['HOME'], ".dirac/work", "dirac-production-stats.pkl" )
    else:
      self.prodStatFile = cacheFile
    self.cacheVersion = '0.0'
    self.clearCache = []
    self.cachedInfo = {}

    # Recuperate the previous cached information
    self.readCache()

    self.bk = BookkeepingClient()
    self.transClient = TransformationClient()

  def setClearCache( self, clearCache ):
    self.clearCache = clearCache

  def __getProdBkDict( self, prod ):
    res = self.transClient.getBookkeepingQuery( prod )
    if not res['OK']:
      gLogger.error( "Couldn't get BK query on production %d" % prod )
      return {}
    prodBKDict = res['Value']
    return prodBKDict

  def getFullStats( self, bkQuery, printResult = False ):
    processingPass = bkQuery.getProcessingPass()
    if printResult:
      gLogger.info( "\nStatistics for processing %s, condition %s\n" % ( processingPass, bkQuery.getConditions() ) )
    prodStats = []
    processingPass = processingPass.split( '/' )
    if len( processingPass ) != 4 or processingPass[1] != "Real Data":
      gLogger.error( "Processing pass should be /Real Data/<Reco>/<Stripping>" )
      return []

    # Get production numbers for the Reco
    recoBKQuery = BKQuery( bkQuery )
    recoBKQuery.setProcessingPass( '/'.join( processingPass[0:3] ) )
    recoList = recoBKQuery.getBKProductions( visible = False )
    recoRunRanges = {}
    recoDQFlags = []
    for prod in recoList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        recoRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
        dqFlags = prodBKDict.get( "DataQualityFlag", ['UNCHECKED', 'EXPRESS_OK', 'OK'] )
        if isinstance( dqFlags, basestring ):
          dqFlags = dqFlags.split( ',' )
        recoDQFlags += [fl for fl in dqFlags if fl not in recoDQFlags]
      else:
        recoRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      recoList.sort( cmp = ( lambda p1, p2: int( recoRunRanges[p1][0] - recoRunRanges[p2][1] ) ) )
    except:
      print "Exception in sorting productions:"
      for p in recoList:
        print p, recoRunRanges[p]
    gLogger.verbose( "Reconstruction productions found (%d): %s" % ( len( recoList ), str( sorted( recoList ) ) ) )
    gLogger.verbose( "Reconstruction DQ flags: %s" % str( recoDQFlags ) )

    # Get productions for merging
    mergeList = []
    mergeStripProds = {}
    # Get stripping productions as parents of merging productions
    stripList = []
    for prod in bkQuery.getBKProductions( visible = False ):
      prodBKDict = self.__getProdBkDict( prod )
      gLogger.verbose( "BK query for production %s: %s" % ( prod, str( prodBKDict ) ) )
      mergedTypes = prodBKDict.get( 'FileType' )
      if type( mergedTypes ) != type( [] ):
        mergedTypes = [mergedTypes]
      if [ft for ft in bkQuery.getFileTypeList() if ft in mergedTypes] and 'ProductionID' in prodBKDict:
        mergeList.append( prod )
        prods = prodBKDict['ProductionID']
        if type( prods ) != type( [] ):
          prods = [prods]
        stripList += prods
        mergeStripProds[prod] = [str( p ) for p in prods]
      else:
        _msgTuple = ( str( bkQuery.getFileTypeList() ), prod, str( prodBKDict ) )
        gLogger.verbose( "Could not find production or filetype %s in BKquery of production %d (%s)" % _msgTuple )
    mergeList.sort( cmp = ( lambda p1, p2: int( mergeStripProds[p1][0] ) - int( mergeStripProds[p2][0] ) ) )
    gLogger.verbose( "Merging productions found: %s" % str( mergeList ) )

    # get list of stripping productions (from merging)
    stripRunRanges = {}
    for prod in stripList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        stripRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
      else:
        stripRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      stripList.sort( cmp = ( lambda p1, p2: int( stripRunRanges[p1][0] - stripRunRanges[p2][1] ) ) )
    except:
      print "Error when sorting stripping productions:"
      for prodStrip in stripList:
        print prodStrip, stripRunRanges[prodStrip]
    gLogger.verbose( "Stripping productions found (%d): %s" % ( len( stripList ), str( sorted( stripList ) ) ) )

    # Get all runs corresponding to the run range used by the Reco productions
    rawBKQuery = BKQuery( bkQuery )
    rawBKQuery.setProcessingPass( '/Real Data' )
    rawBKQuery.setFileType( "RAW" )
    # get the list of runs (-prodNum)
    fullRunList = rawBKQuery.getBKRuns()
    gLogger.verbose( "Initial list of runs: %s" % str( fullRunList ) )
    recoRunList = []
    openProd = False
    for prod in [p for p in recoList]:
      # Forget fully openProd productions
      # Don't consider productions without a BK query (these are individual files)
      if recoRunRanges[prod][1] == sys.maxint and recoRunRanges[prod][0] != -sys.maxint:
        openProd = True
        # Try and find if that open production overlaps with a closed one, in which case, remove it
        # Do nothing for derived productions
        for p in [p for p in recoList if p != prod and recoRunRanges[prod] != recoRunRanges[p]]:
          if recoRunRanges[prod][0] < recoRunRanges[p][1] and recoRunRanges[p][1] != sys.maxint:
            openProd = False
            gLogger.verbose( "Production %s was removed as redundant..." % str( prod ) )
            recoList.remove( prod )
            break
        if not openProd: continue
      recoRunList += [run for run in fullRunList if run not in recoRunList and run >= recoRunRanges[prod][0] and run <= recoRunRanges[prod][1]]
    gLogger.verbose( "List of runs matching Reco (%d): %s" % ( len( recoRunList ), str( sorted( recoRunList ) ) ) )

    restrictToStripping = True
    if restrictToStripping and not openProd and stripList:
      runList = []
      for prod in stripList:
        runList += [run for run in recoRunList if run not in runList and run >= stripRunRanges[prod][0] and run <= stripRunRanges[prod][1]]
    else:
      runList = recoRunList
    gLogger.verbose( "Final list of runs matching Reco and Stripping (%d): %s" % ( len( runList ), str( sorted( runList ) ) ) )

    # Now get statistics from the runs
    info, runInfo = self._getStatsFromRuns( int( bkQuery.getEventTypeList()[0] ), runList, recoDQFlags )
    rawInfo = StatInfo( processingPass[1], info )
    prodStats.append( rawInfo )
    if printResult:
      gLogger.info( "%s - All runs in Reco productions" % processingPass[1] )
      for fileInfo in runInfo:
        if runInfo[fileInfo]:
          gLogger.info( "%s runs (%d): %s" % ( fileInfo, len( runInfo[fileInfo] ), str( runInfo[fileInfo] ) ) )
      summStr = "%s files, " % rawInfo.getItemAsString( 'Files' )
      summStr += "%s events in " % rawInfo.getItemAsString( 'Events' )
      summStr += "%s runs, luminosity (pb-1):All=%s, Bad=%s, OK=%s" % ( rawInfo.getItemAsString( 'Runs' ),
                                                                        rawInfo.getItemAsString( 'Lumi' ),
                                                                        rawInfo.getItemAsString( 'BadLumi' ),
                                                                        rawInfo.getItemAsString( 'OKLumi' ) )
      gLogger.info( summStr )

    # Create the info for the 3 sets of productions
    prodSets = []
    fileType = bkQuery.getFileTypeList()[0]
    prodSets.append( {'Name': processingPass[2], 'FileType': ['SDST', 'FULL.DST'], 'List':recoList,
                      'RunRange':recoRunRanges, 'MotherProds':None, 'AllReplicas':False } )
    prodSets.append( {'Name': processingPass[3], 'FileType': fileType, 'List':stripList,
                      'RunRange':stripRunRanges, 'MotherProds':None, 'AllReplicas':True, 'StatForOK':False } )
    prodSets.append( {'Name': "Merging (%s)" % fileType.split( '.' )[0], 'FileType': fileType, 'List':mergeList,
                      'RunRange':None, 'MotherProds':mergeStripProds, 'AllReplicas':False } )

    prevInfo = rawInfo
    for prodSet in prodSets:
      info = StatInfo( prodSet['Name'], self._getProdInfo( prodSet, runList, printResult = printResult ) )
      info.setRawInfo( rawInfo )
      info.setPrevInfo( prevInfo )
      prevInfo = info
      prodStats.append( info )

    self.saveCache()
    return prodStats

  @staticmethod
  def __sumProdInfo( info, totInfo ):
    for inf in info:
      for flag in info[inf]:
        if inf == 'Runs':
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, [] ) + info[inf][flag]
        else:
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, 0 ) + info[inf][flag]
    return totInfo

  def _getProdInfo( self, prodSet, runList, printResult = False ):
    totInfo = {}
    if printResult:
      gLogger.info( "" )
    for prod in prodSet['List']:
      info, runInfo = self._getStatsFromBK( prod, prodSet['FileType'], runList, prodSet['AllReplicas'] )
      if info['Files'][''] == 0:
        continue
      if not prodSet.get( 'StatForOK', True ):
        for item in info:
          for fl in info[item]:
            if fl == 'OK':
              info[item][fl] = 0 if not item == 'Runs' else []
      runRange = prodSet['RunRange']
      if runRange and prod in runRange and runRange[prod][0] == 0 and runRange[prod][1] == 0:
        for flag in info['Runs']:
          info['Runs'][flag] = []
      totInfo = self.__sumProdInfo( info, totInfo )
      if printResult:
        summStr = "%s production %d -" % ( prodSet['Name'], prod )
        if runRange and prod in runRange:
          firstRun = runRange[prod][0]
          lastRun = runRange[prod][1]
          if firstRun:
            summStr += " From run %d" % int( firstRun )
          if lastRun and lastRun != sys.maxint:
            summStr += " Up to run %d" % int( lastRun )
          if firstRun == 0 and lastRun == 0:
            summStr += "No run range specified"
        motherProds = prodSet['MotherProds']
        if motherProds and prod in motherProds:
          summStr += " from productions %s" % motherProds[prod]
        gLogger.info( summStr )
        for inf in runInfo:
          if runInfo[inf]:
            gLogger.info( "%s runs (%d): %s" % ( inf, len( runInfo[inf] ), str( runInfo[inf] ) ) )
        summStr = "%d files, " % info['Files']['']
        if info['Events']:
          summStr += "%d events in " % info['Events']['']
        _msgTuple = ( len( info['Runs'][''] ), info['Lumi'][''], info['Lumi']['Bad'], info['Lumi']['OK'] )
        summStr += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f" % _msgTuple
        gLogger.info( summStr )
    for flag in totInfo.get( 'Runs', [] ):
      totInfo['Runs'][flag] = len( totInfo['Runs'][flag] )
    return totInfo

  @staticmethod
  def outputResults( conditions, processingPass, prodStats ):
    outputString = ""
    _msgTuple = ( conditions, ",", processingPass, "on", time.ctime( time.time() ) )
    outputString += "\nProduction progress for %s %s %s %s %s\n" % _msgTuple
    if len( prodStats ) < 4:
      outputString += "No statistics found for this BK query"
      return outputString
    for i in xrange( 4 ):
      info = prodStats[i]
      if not info:
        continue
      name = info.getName()
      outputString += "\nSummary for %s\n" % name
      outputString += "%d files, " % info.getItem( 'Files' )
      if info.getItem( 'Events' ):
        outputString += "%d events in " % info.getItem( 'Events' )
      _msgTuple = ( info.getItem( 'Runs' ), info.getItem( 'Lumi' ), info.getItem( 'BadLumi' ), info.getItem( 'OKLumi' ) )
      outputString += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f\n" % _msgTuple
      prevStats = prodStats[:i]
      prevStats.reverse()
      for prevInfo in prevStats:
        name = prevInfo.getName()
        if prevInfo.getItem( 'Runs' ) == 0:
          outputString += "From %s : - No runs...\n" % name
        else:
          outputString += "From %s : %.1f%% files, " % ( name, 100.*info.getItem( 'Files' ) / prevInfo.getItem( 'Files' ) )
          if info.getItem( 'Events' ) and prevInfo.getItem( 'Events' ):
            outputString += "%.1f%% events\n" % ( 100.*info.getItem( 'Events' ) / prevInfo.getItem( 'Events' ) )
          outputString += "%.1f%% runs, %.1f%% luminosity\n" \
                          % ( 100. * info.getItem( 'Runs' ) / prevInfo.getItem( 'Runs' ),
                              100. * info.getItem( 'Lumi' ) / prevInfo.getItem( 'Lumi' ) )
    return outputString

  def __getRunsDQFlag( self, runList, evtType ):
    res = self.bk.getRunFilesDataQuality( runList )
    runFlags = {}
    if res['OK']:
      dqFlags = res['Value']
      for dq in dqFlags:
        if dq[2] == evtType:
          runFlags.setdefault( dq[0], [] ).append( dq[1] )
    runDQFlags = {}
    flags = ( 'BAD', 'OK', 'EXPRESS_OK', 'UNCHECKED' )
    for run in runFlags:
      for fl in flags:
        if fl in runFlags[run]:
          runDQFlags[run] = fl
          break
    return runDQFlags

  def _getStatsFromRuns( self, evtType, runList, recoDQFlags ):
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )
    now = datetime.datetime.utcnow()
    # Set to True to renew the cache
    clearCache = 'RAW' in self.clearCache
    newRuns = [ run for run in runList if clearCache
                or run not in self.cachedInfo
                or 'DQFlag' not in self.cachedInfo[run]
                or ( now - self.cachedInfo[run]['Time'] ) < datetime.timedelta( days = 2 )   ]
    if newRuns:
      runFlags = self.__getRunsDQFlag( newRuns, evtType )
    else:
      runFlags = {}
    runsByDQFlag = {}
    runInfo = {}
    for run in runList:
      cached = self.cachedInfo.get( run, {} )
      cachedTime = cached.get( 'Time', None )
      if run not in newRuns:
        cachedFiles = cached.get( 'Files', 0 )
        cachedEvents = cached.get( 'EventStat', 0 )
        cachedLumi = cached.get( 'Luminosity', 0 )
        dqFlag = cached.get( 'DQFlag', None )
      else:
        res = self.bk.getRunInformations( run )
        if res['OK']:
          val = res['Value']
          ind = val['Stream'].index( 90000000 )
          cachedFiles = val['Number of file'][ind]
          cachedEvents = val['Number of events'][ind]
          cachedLumi = val['luminosity'][ind]
          cachedTime = val['RunStart']
        else:
          gLogger.error( "Unable to get run information for run %s" % str( run ) )
          continue
        dqFlag = runFlags[run]
      self.cachedInfo[run] = { 'Time':cachedTime, 'Files':cachedFiles, 'EventStat': cachedEvents,
                              'Luminosity': cachedLumi, 'DQFlag':dqFlag }
      runsByDQFlag[dqFlag] = runsByDQFlag.setdefault( dqFlag, 0 ) + 1
      if dqFlag == "BAD":
        runInfo.setdefault( 'BAD', [] ).append( run )
      elif dqFlag not in recoDQFlags and dqFlag != 'OK' :
        runInfo.setdefault( 'Untagged', [] ).append( run )
      # Now count...
      flags = []
      if dqFlag != 'BAD':
        flags.append( '' )
        # OK in recoDQFlags means we take everything that is not BAD (reprocessing or new convention)
        if dqFlag in recoDQFlags or dqFlag == 'OK':
          flags.append( 'OK' )
      else:
        flags.append( 'Bad' )
      for flag in flags:
        info['Runs'][flag] += 1
        info['Files'][flag] += cachedFiles
        info['Events'][flag] += cachedEvents
        info['Lumi'][flag] += cachedLumi

    # Set lumi in pb-1
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    gLogger.info( "Runs per flag:" )
    for key in runsByDQFlag:
      gLogger.info( "%s : %d" % ( key, runsByDQFlag[key] ) )
    for flag in runInfo:
      runInfo[flag].sort()
    return info, runInfo

  def __getLfnsMetadata( self, lfns ):
    lfnDict = {}
    if len( lfns ):
      gLogger.verbose( "Getting metadata for %d files" % len( lfns ) )
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata, retrying...", res['Message'] )
          else:
            break
        metadata = res['Value']['Successful']
        for lfn in lfnChunk:
          lfnDict[lfn] = {}
          for meta in ( 'EventStat', 'Luminosity', 'DQFlag', 'RunNumber' ):
            lfnDict[lfn][meta] = metadata[lfn][meta]
    return lfnDict

  def _getStatsFromBK( self, prod, fileType, runList, allReplicas ):
    bkQueryDict = { "ProductionID": prod, "FileType": fileType }
    bkStr = str( bkQueryDict )
    bkQuery = BKQuery( bkQueryDict, visible = not allReplicas )
    if allReplicas:
      bkQuery.setOption( 'ReplicaFlag', "All" )
    cached = self.cachedInfo.get( bkStr, {} )
    cachedTime = cached.get( 'Time', None )
    cachedLfns = cached.get( 'Lfns', {} )
    if isinstance( fileType, basestring ):
      fileType = [fileType]
    if set( fileType ).intersection( set( self.clearCache ) ):
      cachedTime = datetime.datetime.utcnow() - datetime.timedelta( days = 8 )
      cachedTime = None
      cachedLfns = {}
      gLogger.verbose( "Cleared cache for production %s, file type %s" % ( str( prod ), fileType ) )
    # Update if needed the cached information on LFNs
    if cachedLfns:
      lfns = [lfn for lfn in cachedLfns if cachedLfns[lfn].get( 'DQFlag' ) not in ( 'OK', 'BAD' )]
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        #  get the DQFlag of files that are not yet OK
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata for cached files, bkQuery %s: %s" % ( bkStr, res['Message'] ) )
          else:
            metadata = res['Value']['Successful']
            for lfn in lfnChunk:
              cachedLfns[lfn]['DQFlag'] = metadata[lfn]['DQFlag']
            break

    # Now get the new files since last time...
    if cachedTime:
      bkQuery.setOption( 'StartDate', cachedTime.strftime( '%Y-%m-%d %H:%M:%S' ) )
    gLogger.verbose( "Getting files for BKQuery %s" % str( bkQuery ) )
    cachedTime = datetime.datetime.utcnow()
    lfns = [lfn for lfn in bkQuery.getLFNs( printOutput = False ) if lfn not in cachedLfns]
    gLogger.verbose( "Returned %d files" % len( lfns ) )
    cachedLfns.update( self.__getLfnsMetadata( lfns ) )

    self.cachedInfo[bkStr] = { 'Time':cachedTime, 'Lfns':cachedLfns }

    # Now sum up all information for the files
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      if inf == 'Runs':
        for flag in ( 'Bad', 'OK', '' ):
          info[inf][flag] = []
      else:
        info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )

    for lfn in cachedLfns:
      lfnInfo = cachedLfns[lfn]
      run = lfnInfo['RunNumber']
      if run in runList and run in self.cachedInfo and self.cachedInfo[run]['DQFlag'] != 'BAD':
        dqFlag = cachedLfns[lfn]['DQFlag']
        flags = []
        if dqFlag != 'BAD':
          flags.append( '' )
        else:
          flags.append( 'Bad' )
        if dqFlag == 'OK':
          flags.append( 'OK' )
        for flag in flags:
          if run not in info['Runs'][flag]:
            info['Runs'][flag].append( run )
          info['Files'][flag] += 1
          info['Events'][flag] += lfnInfo['EventStat']
          info['Lumi'][flag] += lfnInfo['Luminosity']

    runInfo = {}
    if 'BAD' in info['Runs']:
      runInfo['BAD'] = info['Runs']['BAD']
      runInfo['BAD'].sort()
    else:
      runInfo['BAD'] = []
    if '' in info['Runs'] and 'OK' in info['Runs']:
      runInfo['Untagged'] = [run for run in info['Runs'][''] if run not in info['Runs']['OK']]
      runInfo['Untagged'].sort()
    else:
      runInfo['Untagged'] = []
    # for f in info['Runs']:
    #  info['Runs'][f] = len( info['Runs'][f] )
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    return info, runInfo

  def getPreviousStats( self, processingPass ):
    prevStats = self.cachedInfo.get( 'ProdStats', {} ).get( processingPass )
    if prevStats:
      try:
        _name = prevStats['ProdStats'][0][0].getName()
      except:
        prevStats = None
    return prevStats

  def setPreviousStats( self, processingPass, prevStats ):
    self.cachedInfo.setdefault( 'ProdStats', {} )[processingPass] = prevStats
    self.saveCache()

  def readCache( self ):
    if not os.path.exists( self.prodStatFile ):
      gLogger.info( "Created cached file %s" % self.prodStatFile )
      self.cachedInfo = {}
      self.saveCache()
      return
    fileRead = False
    while not fileRead:
      try:
        with FileLock( self.prodStatFile ):
          lFile = open( self.prodStatFile, 'r' )
          cachedVersion = pickle.load( lFile )
          startTime = time.time()
          if cachedVersion == self.cacheVersion:
            self.cachedInfo = pickle.load( lFile )
            _msgTuple = ( self.prodStatFile, time.time() - startTime )
            gLogger.info( "Loaded cached information from %s in %.3f seconds" % _msgTuple )
          else:
            _msgTuple = ( cachedVersion, self.cacheVersion )
            gLogger.info( "Incompatible versions of cache, reset information (%s, expect %s)" % _msgTuple )
            self.cachedInfo = {}
          lFile.close()
          fileRead = True
      except FileLockException, error:
        gLogger.error( "Lock exception: %s while reading pickle file %s" % ( error, self.prodStatFile ) )
      except:

Ejemplo n.º 11

Mostrar archivo

        inputFiles = True
        outputFiles = True
    jobidList = []
    for jobid in args:
        if os.path.exists(jobid):
            bkScript.setJobidsFromFile(jobid)
        else:
            jobidList += jobid.split(',')
    jobidList += bkScript.getOption('JobIDs', [])
    if not jobidList:
        print "No jobID provided!"
        Script.showHelp()
        DIRAC.exit(0)

    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    retVal = BookkeepingClient().getJobInputOutputFiles(jobidList)
    if retVal['OK']:
        success = retVal['Value']['Successful']
        for job in success:
            # Remove from input the files that are also output! This happens because the output of step 1 can be the input of step 2...
            # only worth if input files are requested though
            if inputFiles:
                success[job]['InputFiles'] = sorted(
                    set(success[job]['InputFiles']) -
                    set(success[job]['OutputFiles']))

            if not inputFiles or not outputFiles:
                success[job].pop(
                    'InputFiles' if not inputFiles else 'OutputFiles')

    printDMResult(retVal, empty="File does not exists in the Bookkeeping")

Ejemplo n.º 12

Mostrar archivo

Archivo: dirac-transformation-set-run-destination.py Proyecto: antolu/LHCbDIRAC

def execute():
    """
  Parse the options and execute the script
  """
    bkQuery = dmScript.getBKQuery()
    fileType = bkQuery.getFileTypeList()
    if not set(fileType) & {'FULL.DST', 'RDST', 'SDST'}:
        gLogger.error("Please provide a reconstruction BK path")
        DIRAC.exit(1)

    from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    from DIRAC.Core.Utilities.List import breakListIntoChunks
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers, resolveSEGroup

    bk = BookkeepingClient()
    tr = TransformationClient()
    dm = DataManager()
    dmsHelper = DMSHelpers()

    bkQueryDict = bkQuery.getQueryDict()
    gLogger.notice("For BK Query:", str(bkQueryDict))
    progressBar = ProgressBar(1, title="Running BK query...", step=1)
    res = bk.getFilesWithMetadata(bkQueryDict)
    if not res['OK']:
        gLogger.error("Error getting files from BK", res['Message'])
        DIRAC.exit(2)

    if 'ParameterNames' in res.get('Value', {}):
        parameterNames = res['Value']['ParameterNames']
        info = res['Value']['Records']
        progressBar.endLoop("Obtained %d files" % len(info))
    else:
        gLogger.error('\nNo metadata found')
        DIRAC.exit(3)
    lfns = []
    runLFNs = {}
    for item in info:
        metadata = dict(zip(parameterNames, item))
        lfn = metadata['FileName']
        lfns.append(lfn)
        runLFNs.setdefault(metadata['RunNumber'], []).append(lfn)

    chunkSize = 1000
    progressBar = ProgressBar(len(lfns),
                              title='Getting replicas of %d files' % len(lfns),
                              chunk=chunkSize)
    replicas = {}
    errors = {}
    for lfnChunk in breakListIntoChunks(lfns, chunkSize):
        progressBar.loop()
        res = dm.getReplicas(lfnChunk, getUrl=False)
        if not res['OK']:
            errors.setdefault(res['Message'], []).extend(lfnChunk)
        else:
            replicas.update(res['Value']['Successful'])
            for lfn, error in res['Value']['Failed'].iteritems():
                errors.setdefault(error, []).append(lfn)
    progressBar.endLoop()
    for error, lfns in errors.iteritems():
        gLogger.error(error, 'for %d files' % len(lfns))

    tier1RDST = set(resolveSEGroup('Tier1-RDST'))
    setOK = 0
    errors = {}
    progressBar = ProgressBar(len(runLFNs),
                              title='Defining destination for %d runs' %
                              len(runLFNs),
                              step=10)
    for run, lfns in runLFNs.iteritems():
        progressBar.loop()
        res = tr.getDestinationForRun(run)
        if res.get('Value'):
            errors.setdefault('Destination already set', []).append(str(run))
            continue
        # print 'Run', run, len( lfns ), 'Files', lfns[:3]
        seCounts = {}
        for lfn in lfns:
            for se in tier1RDST.intersection(replicas.get(lfn, [])):
                seCounts[se] = seCounts.setdefault(se, 0) + 1
        # print seCounts
        maxi = 0
        seMax = None
        for se, count in seCounts.iteritems():
            if count > maxi:
                seMax = se
                maxi = count
        if not seMax:
            errors.setdefault('No SE found, use CERN-RDST',
                              []).append(str(run))
            seMax = 'CERN-RDST'
        # SE found, get its site
        res = dmsHelper.getLocalSiteForSE(seMax)
        if res['OK']:
            site = res['Value']
            res = tr.setDestinationForRun(run, site)
            if not res['OK']:
                errors.setdefault(res['Message'], []).append(str(run))
            else:
                setOK += 1
    progressBar.endLoop('Successfully set destination for %d runs' % setOK)
    for error, runs in errors.iteritems():
        gLogger.error(error, 'for runs %s' % ','.join(runs))

Ejemplo n.º 13

Mostrar archivo

# Author :  Zoltan Mathe
########################################################################
"""
  List file types from the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile]' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
exitCode = 0

mfiletypes = []
res = bk.getAvailableFileTypes()

if res['OK']:
    dbresult = res['Value']
    print 'Filetypes:'
    for record in dbresult['Records']:
        print str(record[0]).ljust(30) + str(record[1])

DIRAC.exit(exitCode)

Ejemplo n.º 14

Mostrar archivo

Archivo: dirac-bookkeeping-simulationconditions-insert.py Proyecto: antolu/LHCbDIRAC

########################################################################
"""
  Insert a new set of simulation conditions in the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

exitCode = 0

desc = raw_input("SimDescription: ")
beamcond = raw_input("BeamCond: ")
beamEnergy = raw_input("BeamEnergy: ")
generator = raw_input("Generator: ")
magneticField = raw_input("MagneticField: ")
detectorCond = raw_input("DetectorCond: ")
luminosity = raw_input("Luminosity: ")
g4settings = raw_input("G4settings: ")
print 'Do you want to add these new simulation conditions? (yes or no)'
value = raw_input('Choice:')
choice = value.lower()
if choice in ['yes', 'y']:

Ejemplo n.º 15

Mostrar archivo

Archivo: bookKeepingDB.py Proyecto: flavioarchilli/offline

    class BookkeepingDB:
    
        def __init__(self):
            self.bkClient = BookkeepingClient()
            self.tfClient = TransformationClient()
#            self.err = err
            
            
        def MakeRunLFN(self, runNmbr, cfgVersion, prodId):
            try:
                padding = "%08d" % int(prodId)
    
                lfn = LFN_FORMAT_STRING  %(
                    cfgVersion, runNmbr, runNmbr, padding)
                
                return lfn
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None     
            
        def getTCK(self, runNmbr):
            try:
                print runNmbr
                res = self.getRunsMetadata(runNmbr)
                
                pprint.pprint(res)
                
                if res != None and hasattr(res, 'Value') \
                        and hasattr(res['Value'], runNmbr) \
                        and hasattr(res['Value'][runNmbr], "TCK"):
                    return res['Value'][runNmbr]["TCK"]
                else:
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
            
            
        def getRunsMetadata(self, runNmbr):
            try:
                res = self.tfClient.getRunsMetadata(int(runNmbr))
                if res['OK']:
                    return res
                else:
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
    
        def getInformation(self, run):
            try:
                res = self.bkClient.getRunInformations(run)
    
                if res['OK']:
                    result = dict()
                
                    val = res['Value']
            
                    result = {"runstart": val.get('RunStart', 'Unknown'), "runend": val.get('RunEnd', 'Unknown'),
                        "configname": val.get('Configuration Name', 'Unknown'), "configversion": val.get('Configuration Version', 'Unknown'),
                        "fillnb" : val.get('FillNumber', 'Unknown'), "datataking" : val.get('DataTakingDescription', 'Unknown'),
                        "datataking" : val.get('DataTakingDescription', 'Unknown'), "processing" : val.get('ProcessingPass', 'Unknown'),
                        "stream" : val.get('Stream', 'Unknown'), "fullstat" : val.get('FullStat', 'Unknown'), 
                        "nbofe" : val.get('Number of events', 'Unknown'), "nboff" : val.get('Number of file', 'Unknown'),
                        "fsize" : val.get('File size', 'Unknown')
            
                    }
            
                    return result
                else:
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
                
        def getListOfRecos(self, runNmbr):
            try:
                d = {'RunNumber' : runNmbr}
        
                res = self.bkClient.getRunAndProcessingPass(d)
        
                results = list()
        
                if res['OK'] == True:
                    recosList = res["Value"]
            
                    for recoEntry in recosList:
                        recoPath = recoEntry[1]
                
                        if recoEntry[0] == runNmbr \
                            and recoPath.count("/") == 2 \
                            and "Reco" in recoPath :
                    
                            results.append(recoPath)
            
                    return results
                else:
                    pprint.pprint(res)
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
            
        def getProcessId(self, runNmbr, recoVersion):
            try:
                d = {'RunNumber' : runNmbr,
                'ProcessingPass':  recoVersion}
        
                res = self.bkClient.getProductionsFromView(d)
        
                if res["OK"] == True:
                    return res["Value"][0][0]
                else:
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
                
        #recoVersion is just Reco13 and not the full path!!
        def makeReferenceROOTFileName(self, recoVersion, runNmbr):
            try:
                basePath = REFERENCE_BASE_PATH + recoVersion +"/"
            
                #nasty stuff!!
                #the problem is tck retrieved from db
                #0x790038
                #but in the file it looks like
                #TCK_0x00760037_1.root
                #so do padding here
                tck = self.getTCK(runNmbr)
                
                #sometimes no tck set, then take default file
                if tck != None:
                    tckDecimal = int(tck, 0)
                    tckHexPaddedFileName = "TCK_0x" + str(format(tckDecimal, '08x')) + "_"
            
                #if we have multiple files like
                #TCK_0x00790038_1.root
                #TCK_0x00790038_2.root
                #we want the file with the highest subindex, so in this example _2
                possibleTCKList = list()
            
                #store all possible files
                for file in os.listdir(basePath):
                    if tck != None \
                    and file.endswith(".root") \
                    and file != "default_1.root" \
                    and tckHexPaddedFileName in file:
                        possibleTCKList.append(file)
    
                #if we haven't foun anything, look for the default files and choose the one with the highest index
                if len(possibleTCKList) == 0:
            
                    #store all possible files
                    for file in os.listdir(basePath):
                        if file.endswith(".root") \
                        and "default_" in file:
                            possibleTCKList.append(file)
            
                #now sort this list, to find the highest subindex               
                possibleTCKList.sort()
    
                return basePath+possibleTCKList.pop()
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None

Ejemplo n.º 16

Mostrar archivo

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... ProdID' % Script.scriptName, 'Arguments:',
    '  ProdID:   Production ID'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()

if len(args) < 1:
    Script.showHelp()

exitCode = 0

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
try:
    prod = long(args[0])
except:
    Script.showHelp()
    DIRAC.exit(1)

res = bk.getNbOfJobsBySites(prod)

if res['OK']:
    if not res['Value']:
        print "No jobs for production", prod
        DIRAC.exit(0)
    sites = dict([(site, num) for num, site in res['Value']])
    shift = 0
    for site in sites:

Ejemplo n.º 17

Mostrar archivo

Archivo: bookKeepingDB.py Proyecto: flavioarchilli/offline

 def __init__(self):
     self.bkClient = BookkeepingClient()
     self.tfClient = TransformationClient()

Ejemplo n.º 18

Mostrar archivo

Archivo: ProductionRequest.py Proyecto: antolu/LHCbDIRAC

class ProductionRequest(object):
    """ Production request class - objects are usually created starting from a production request
  """
    def __init__(self, bkkClientIn=None, diracProdIn=None):
        """ c'tor

        Some variables are defined here. A production request is made of:
        stepsList, productionsTypes, and various parameters of those productions
    """

        if bkkClientIn is None:
            self.bkkClient = BookkeepingClient()
        else:
            self.bkkClient = bkkClientIn

        if diracProdIn is None:
            self.diracProduction = DiracProduction()
        else:
            self.diracProduction = diracProdIn

        self.rpcProductionRequest = RPCClient(
            'ProductionManagement/ProductionRequest')
        self.tc = TransformationClient()

        self.logger = gLogger.getSubLogger('ProductionRequest')

        self.opsH = Operations()

        # parameters of the request
        self.requestID = 0
        self.parentRequestID = 0
        self.appendName = '1'
        self.outConfigName = ''
        self.prodsToLaunch = []  # productions to launch
        self.stepsListDict = []  # list of dict of steps
        self.stepsInProds = []  # a list of lists
        # parameters of the input data
        self.processingPass = ''
        self.dataTakingConditions = ''
        self.eventType = ''
        self.bkFileType = []
        self.dqFlag = ''
        self.startRun = 1
        self.endRun = 2
        self.runsList = ''
        self.configName = 'test'
        self.configVersion = 'certification'
        # parameters of the first production
        self.publishFlag = True
        self.testFlag = False
        self.derivedProduction = 0
        self.previousProdID = 0  # optional prod from which to start
        self.fullListOfOutputFileTypes = []

        # parameters that are the same for each productions
        self.prodGroup = ''  # This ends up being 'ProcessingType', workflow parameter, and it is used for accounting
        self.visibility = ''  # For BKQuery
        self.fractionToProcess = 0
        self.minFilesToProcess = 0
        self.modulesList = None  # Usually:
        # ['GaudiApplication', 'AnalyseXMLSummary',
        # 'ErrorLogging', 'BookkeepingReport', 'StepAccounting' ]

        # parameters of each production (the length of each list has to be the same as the number of productions
        self.events = []
        self.stepsList = []
        self.extraOptions = {}
        self.prodsTypeList = []
        self.bkQueries = []  # list of bk queries
        self.removeInputsFlags = []
        self.priorities = []
        self.cpus = []
        self.inputs = []  # list of lists
        self.outputModes = []
        self.targets = []
        self.outputFileMasks = []
        self.outputFileSteps = []
        self.groupSizes = []
        self.plugins = []
        self.inputDataPolicies = []
        self.previousProds = [
            None
        ]  # list of productions from which to take the inputs (the first is always None)
        self.multicore = [
        ]  # list of flags to override the multi core flags of the steps

        self.outputSEs = []  # a list of StorageElements
        self.specialOutputSEs = []  # a list of dictionaries - might be empty
        self.outputSEsPerFileType = []  # a list of dictionaries - filled later
        self.ancestorDepths = []
        self.compDict = {
            'HIGH': 'Compression-LZMA-4',
            'LOW': 'Compression-ZLIB-1'
        }
        self.compressionLvl = ['']  # List: one compression level each step
        self.appConfig = '$APPCONFIGOPTS/Persistency/'  # Default location of the compression level configuration files
        #
        # These lists define the visibility of the output files produced by each step. For MC productions, the visibility
        # is tied to compression level. VIsible files are compressed at the highest level
        #
        self.outputVisFlag = [
        ]  # List of dictionary with default visibility flag of the output files per single step
        self.specialOutputVisFlag = [
        ]  # List of dictionaries with special visibility flag for given file type

    #############################################################################

    def resolveSteps(self):
        """ Given a list of steps in strings, some of which might be missing,
        resolve it into a list of dictionary of steps (self.stepsListDict)
    """
        outputVisFlag = dict(
            [k, v] for el in self.outputVisFlag for k, v in el.iteritems()
        )  # Transform the list of dictionaries in a dictionary
        specialOutputVisFlag = dict([k, v] for el in self.specialOutputVisFlag
                                    for k, v in el.iteritems())
        count = 0  # Needed to add correctly the optionFiles to the list of dictonaries of steps
        for stepID in self.stepsList:

            stepDict = self.bkkClient.getAvailableSteps({'StepId': stepID})
            if not stepDict['OK']:
                raise ValueError(stepDict['Message'])
            else:
                stepDict = stepDict['Value']

            stepsListDictItem = {}

            s_in = self.bkkClient.getStepInputFiles(stepID)
            if not s_in['OK']:
                raise ValueError(s_in['Message'])
            stepsListDictItem['fileTypesIn'] = [
                fileType[0].strip() for fileType in s_in['Value']['Records']
            ]

            s_out = self.bkkClient.getStepOutputFiles(stepID)
            if not s_out['OK']:
                raise ValueError(s_out['Message'])
            fileTypesList = [
                fileType[0].strip() for fileType in s_out['Value']['Records']
            ]
            self.fullListOfOutputFileTypes = self.fullListOfOutputFileTypes + fileTypesList
            stepsListDictItem['fileTypesOut'] = fileTypesList

            for parameter, value in itertools.izip(stepDict['ParameterNames'],
                                                   stepDict['Records'][0]):
                if parameter.lower() in ['conddb', 'dddb', 'dqtag'] and value:
                    if value.lower() == 'frompreviousstep':
                        value = self.stepsListDict[-1][parameter]

                if parameter == 'OptionFiles':  # Modifying the OptionFiles (for setting the compression level)
                    if 'MDF' not in stepsListDictItem[
                            'fileTypesOut']:  # certain MC produce MDF, which shouldn't be compressed
                        #
                        # If the prod manager sets a compression level for a particular step, either we append the option file
                        # or we overwrite the existing one inherited with the step
                        #
                        if len(self.compressionLvl
                               ) > count and self.compressionLvl[count] != '':
                            persist = re.compile('Compression-[A-Z]{4}-[1-9]')
                            # self.compressionLvl[count] = self.appConfig + self.compressionLvl[count] + '.py'
                            self.compressionLvl[
                                count] = self.appConfig + self.compDict[
                                    self.compressionLvl[count].upper()] + '.py'
                            if not persist.search(value):
                                if value == '':
                                    value = self.compressionLvl[count]
                                else:
                                    value = ";".join(
                                        (value, self.compressionLvl[count]))
                            else:
                                value = persist.sub(
                                    persist.search(
                                        self.compressionLvl[count]).group(),
                                    value)
                        #
                        # If instead the prod manager doesn't declare a compression level, e.g. for intermediate steps,
                        # we check if there is one in the options and in case we delete it. This leaves the default zip level
                        # defined inside Gaudi
                        #
                        elif len(
                                self.compressionLvl
                        ) > count and self.compressionLvl[count] == '':
                            persist = re.compile(
                                r'\$\w+/Persistency/Compression-[A-Z]{4}-[1-9].py;?'
                            )
                            if persist.search(value):
                                value = persist.sub('', value)

                if parameter == 'SystemConfig' and value is not None and re.search(
                        'slc5', value):
                    p = re.compile(
                        r'\$\w+/Persistency/Compression-[A-Z]{4}-[1-9].py;?')
                    if p.search(stepsListDictItem['OptionFiles']):
                        stepsListDictItem['OptionFiles'] = p.sub(
                            '', stepsListDictItem['OptionFiles'])

                stepsListDictItem[parameter] = value  # Fixing what decided

            if stepsListDictItem['StepId'] in self.extraOptions:
                stepsListDictItem['ExtraOptions'] = self.extraOptions[
                    stepsListDictItem['StepId']]
            else:
                stepsListDictItem['ExtraOptions'] = ''

            stepsListDictItem['prodStepID'] = str(stepID) + str(
                stepsListDictItem['fileTypesIn'])

            if 'isMulticore' not in stepsListDictItem:
                stepsListDictItem['isMulticore'] = 'N'

            if 'SystemConfig' not in stepsListDictItem:
                stepsListDictItem['SystemConfig'] = ''

            if 'mcTCK' not in stepsListDictItem:
                stepsListDictItem['mcTCK'] = ''

            # Add visibility info during step resolution
            if 'visibilityFlag' not in stepsListDictItem:
                outputVisList = list({
                    'Visible': outputVisFlag[str(count + 1)],
                    'FileType': ftype
                } for ftype in stepsListDictItem['fileTypesOut'])
                if str(count + 1) in specialOutputVisFlag:
                    for it in outputVisList:
                        if it['FileType'] in specialOutputVisFlag[str(count +
                                                                      1)]:
                            it['Visible'] = specialOutputVisFlag[str(
                                count + 1)][it['FileType']]

                stepsListDictItem['visibilityFlag'] = outputVisList

            self.stepsListDict.append(stepsListDictItem)
            count += 1

    #############################################################################

    def buildAndLaunchRequest(self):
        """ uses _applyOptionalCorrections, _getProdsDescriptionDict,
        _buildProduction, and DiracProduction.launchProduction
    """

        if not self.stepsListDict:
            self.resolveSteps()

        self._applyOptionalCorrections()

        self._determineOutputSEs()

        prodsDict = self._getProdsDescriptionDict()

        stepsListDict = list(self.stepsListDict)

        fromProd = self.previousProdID
        prodsLaunched = []

        self.logger.debug(prodsDict)
        # now we build and launch each productions
        for prodIndex, prodDict in prodsDict.items():

            if self.prodsToLaunch:
                if prodIndex not in self.prodsToLaunch:
                    continue

            # build the list of steps in a production
            stepsInProd = []
            for stepID in prodDict['stepsInProd-ProdName']:
                for step in stepsListDict:
                    if step['prodStepID'] == stepID:
                        stepsInProd.append(
                            stepsListDict.pop(stepsListDict.index(step)))
            # NOT READY (alternative to previous 5 lines)
            # build the DAG of steps in a production
            # stepsInProdDAG = self._getStepsInProdDAG( prodDict, stepsListDict )
            # Here, for today it is just convert to a list
            # TODO: fix this in order to properly use DAGs (now it's only sequential)
            # FIXME: using getIndexNodes we can't assure the order is respected
            # stepsInProd = stepsInProdDAG.getIndexNodes()

            if prodDict['previousProd'] is not None:
                fromProd = prodsLaunched[prodDict['previousProd'] - 1]
                self.previousProdID = fromProd

            prod = self._buildProduction(
                prodType=prodDict['productionType'],
                stepsInProd=stepsInProd,
                outputSE=prodDict['outputSE'],
                priority=prodDict['priority'],
                cpu=prodDict['cpu'],
                inputDataList=prodDict['input'],
                outputMode=prodDict['outputMode'],
                inputDataPolicy=prodDict['inputDataPolicy'],
                outputFileMask=prodDict['outputFileMask'],
                outputFileStep=prodDict['outputFileStep'],
                target=prodDict['target'],
                removeInputData=prodDict['removeInputsFlag'],
                groupSize=prodDict['groupSize'],
                bkQuery=prodDict['bkQuery'],
                plugin=prodDict['plugin'],
                previousProdID=fromProd,
                derivedProdID=prodDict['derivedProduction'],
                transformationFamily=prodDict['transformationFamily'],
                events=prodDict['events'],
                multicore=prodDict['multicore'],
                ancestorDepth=prodDict['ancestorDepth'])

            # if the production is a simulation production type, submit it to the automated testing
            if prodDict['productionType'] in self.opsH.getValue(
                    'Transformations/ExtendableTransfTypes', ['MCSimulation']):
                prodID = self._mcSpecialCase(prod, prodDict)

            else:
                res = self.diracProduction.launchProduction(
                    prod=prod,
                    publishFlag=self.publishFlag,
                    testFlag=self.testFlag,
                    requestID=self.requestID,
                    tracking=prodDict['tracking'])
                if not res['OK']:
                    raise RuntimeError(res['Message'])

                prodID = res['Value']

            prodsLaunched.append(prodID)

            if self.publishFlag:
                self.logger.notice(
                    "For request %d, submitted Production %d, of type %s, ID = %s"
                    % (self.requestID, prodIndex, prodDict['productionType'],
                       str(prodID)))
        return S_OK(prodsLaunched)

    #############################################################################

    def _getStepsInProdDAG(self,
                           prodDict,
                           stepsListDict,
                           stepsOrder='sequential'):
        """ Builds the DAG of steps in a production

        :params dict prodDict: dictionary representing one production
        :params list stepsListDict: list of steps (which are dictionaries) that should be in the production

        :returns: stepsInProd (DAG)
    """
        stepsInProd = DAG()

        inserted = None
        for stepID in prodDict['stepsInProd-ProdName']:
            for step in stepsListDict:
                if step['prodStepID'] == stepID:
                    ind = stepsListDict.index(step)
                    step = stepsListDict.pop(ind)
                    stepsInProd.addNode(step)
                    if inserted and stepsOrder == 'sequential':
                        stepsInProd.addEdge(inserted, step)
                    inserted = step

        return stepsInProd

    def _mcSpecialCase(self, prod, prodDict):
        """ Treating the MC special case for putting MC productions in status "Testing"
    """

        # save the original xml before it is edited for testing
        prod._lastParameters()  # pylint: disable=protected-access

        # launchProduction adds extra parameters, as we 'hot swap' the xml, we
        # need to get these parameters for the un-edited version
        originalProcessingType = prod.prodGroup
        originalPriority = prod.priority

        prodXML = prod.LHCbJob.workflow.toXML()

        prodID = self._modifyAndLaunchMCXML(prod, prodDict)

        # load a production from the original xml to save the priority and processing type
        workflowToSave = fromXMLString(prodXML)
        prod.LHCbJob.workflow = workflowToSave
        prod.setParameter('ProcessingType', 'JDL', str(originalProcessingType),
                          'ProductionGroupOrType')
        prod.setParameter('Priority', 'JDL', str(originalPriority),
                          'Job Priority')

        # original xml to save
        descriptionToStore = prod.LHCbJob.workflow.toXML()

        # saving the original xml in the StoredJobDescription table.
        res = self.tc.addStoredJobDescription(prodID, descriptionToStore)
        if not res['OK']:
            self.logger.error("Error calling addStoredJobDescription",
                              res['Message'])
            self.logger.info("Cleaning created production and exiting")
            self.diracProduction.production(res['Value'], 'cleaning')
            raise RuntimeError(res['Message'])

        return prodID

    def _modifyAndLaunchMCXML(self, prod, prodDict):
        """ Apply modifications to the workflow XML for MC testing case

        :param Production prod: Production object
        :param dict prodDict: dictionary with production info

        :returns: res['OK'] or res['ERROR']
    """
        # set the destination and number of events for testing
        destination = self.opsH.getValue(
            "Productions/MCTesting/MCTestingDestination", 'DIRAC.Test.ch')
        numberOfEvents = self.opsH.getValue(
            "Productions/MCTesting/numberOfEvents", '500')
        extendBy = self.opsH.getValue("Productions/MCTesting/extendBy", 20)

        prod.setJobParameters({'Destination': destination})
        prod.LHCbJob.workflow.removeParameter('BannedSites')
        prod.setParameter('numberOfEvents', 'string', str(numberOfEvents),
                          'Number of events to test')

        # add '1' to the stepMask and add GAUSSHIST to the fileMask
        fileTypesOutLastStep = prod.LHCbJob.workflow.step_instances[
            -2].findParameter('listoutput').getValue()[0]['outputDataType']
        newFileMask = ['GAUSSHIST'] + [
            ftOut.upper() for ftOut in fileTypesOutLastStep.split(';')
        ]
        stepMaskParameter = prod.LHCbJob.workflow.findParameter(
            'outputDataStep')
        if stepMaskParameter:
            stepMask = stepMaskParameter.getValue().replace(' ', '').split(';')
            newOutputFileStep = ';'.join(
                sorted(list(set(['1']).union(set(stepMask)))))
        else:
            newOutputFileStep = '1'
        prod.setFileMask(newFileMask, newOutputFileStep)

        # find the file types out already built, append GAUSSHIST and set the new listoutput
        fileTypesOut = prod.LHCbJob.workflow.step_instances[0].findParameter(
            'listoutput').getValue()[0]['outputDataType']
        fileTypesOut = fileTypesOut.split(', ')
        fileTypesOut.append('GAUSSHIST')
        outputFilesList = prod._constructOutputFilesList(fileTypesOut)  # pylint: disable=protected-access
        prod.LHCbJob.workflow.step_instances[0].setValue(
            'listoutput', outputFilesList)

        # increase the priority to 10
        prod.priority = 10

        # launch the test production
        res = self.diracProduction.launchProduction(
            prod=prod,
            publishFlag=self.publishFlag,
            testFlag=self.testFlag,
            requestID=self.requestID,
            extend=extendBy,
            tracking=prodDict['tracking'],
            MCsimflag=True)
        if not res['OK']:
            self.logger.error("Error launching production", res['Message'])
            raise RuntimeError(res['Message'])

        return res['Value']

    def _determineOutputSEs(self):
        """ Fill outputSEsPerFileType based on outputSEs, fullListOfOutputFileTypes and specialOutputSEs
    """
        for outputSE, specialOutputSEs in itertools.izip(
                self.outputSEs, self.specialOutputSEs):
            outputSEDict = {}
            if not self.fullListOfOutputFileTypes:
                raise ValueError("No steps defined")
            outputSEDict = dict([(fType, outputSE)
                                 for fType in self.fullListOfOutputFileTypes])
            if specialOutputSEs:
                outputSEDict.update(specialOutputSEs)
            self.outputSEsPerFileType.append(outputSEDict)

    def _applyOptionalCorrections(self):
        """ if needed, calls _splitIntoProductionSteps. It also applies other changes
    """
        if len(self.bkQueries) != len(self.prodsTypeList):
            self.bkQueries += ['fromPreviousProd'] * (len(self.prodsTypeList) -
                                                      len(self.bkQueries))

        if len(self.previousProds) != len(self.prodsTypeList):
            self.previousProds += xrange(1, len(self.prodsTypeList))

        if len(self.events) != len(self.prodsTypeList):
            self.events += ['-1'
                            ] * (len(self.prodsTypeList) - len(self.events))

        if not self.removeInputsFlags:
            removeInputsFlags = []
            for prodType in self.prodsTypeList:
                if prodType.lower() == 'merge':
                    removeInputsFlags.append(True)
                else:
                    removeInputsFlags.append(False)
            self.removeInputsFlags = removeInputsFlags

        if not self.outputFileMasks:
            self.outputFileMasks = [''] * len(self.prodsTypeList)

        if not self.outputFileSteps:
            self.outputFileSteps = [''] * len(self.prodsTypeList)

        if not self.inputs:
            self.inputs = [[]] * len(self.prodsTypeList)

        if not self.outputModes:
            self.outputModes = ['Any'] * len(self.prodsTypeList)

        if not self.targets:
            self.targets = [''] * len(self.prodsTypeList)

        if not self.inputDataPolicies:
            self.inputDataPolicies = ['download'] * len(self.prodsTypeList)

        if not self.multicore:
            self.multicore = ['True'] * len(self.prodsTypeList)

        if not self.specialOutputSEs:
            self.specialOutputSEs = [{}] * len(self.prodsTypeList)

        if not self.ancestorDepths:
            self.ancestorDepths = [0] * len(self.prodsTypeList)

        # Checking if we need to split the merging step into many productions
        if 'merge' in [pt.lower() for pt in self.prodsTypeList]:
            i = 0
            indexes = []
            for pt in self.prodsTypeList:
                if pt.lower() == 'merge':
                    indexes.append(i)
                i += 1

            for index in indexes:
                # In this case and only in this case I have to split the merging in many productions
                plugin = self.plugins[index]
                outputSE = self.outputSEs[index]
                specialOutputSE = self.specialOutputSEs[index]
                priority = self.priorities[index]
                cpu = self.cpus[index]
                bkQuery = self.bkQueries[index]
                groupSize = self.groupSizes[index]
                preProd = self.previousProds[index]
                removeInputsFlag = self.removeInputsFlags[index]
                outputFileMask = self.outputFileMasks[index]
                outputFileStep = self.outputFileSteps[index]
                inputs = self.inputs[index]
                idp = self.inputDataPolicies[index]
                stepID = self.stepsList[index]
                events = self.events[index]
                targets = self.targets[index]
                multicore = self.multicore[index]
                outputMode = self.outputModes[index]
                ancestorDepth = self.ancestorDepths[index]
                if plugin.lower(
                ) != 'byrunfiletypesizewithflush' and 'rootmerging' not in plugin.lower(
                ):
                    stepToSplit = self.stepsListDict[index]
                    numberOfProdsToInsert = len(stepToSplit['fileTypesOut'])
                    self.prodsTypeList.remove('Merge')
                    self.plugins.pop(index)
                    self.outputSEs.pop(index)
                    self.specialOutputSEs.pop(index)
                    self.priorities.pop(index)
                    self.cpus.pop(index)
                    self.bkQueries.pop(index)
                    self.previousProds.pop(index)
                    self.groupSizes.pop(index)
                    self.removeInputsFlags.pop(index)
                    self.outputFileMasks.pop(index)
                    self.outputFileSteps.pop(index)
                    self.inputs.pop(index)
                    self.inputDataPolicies.pop(index)
                    self.stepsList.pop(index)
                    self.events.pop(index)
                    self.targets.pop(index)
                    self.multicore.pop(index)
                    self.outputModes.pop(index)
                    self.ancestorDepths.pop(index)
                    newSteps = _splitIntoProductionSteps(stepToSplit)
                    newSteps.reverse()
                    self.stepsListDict.remove(stepToSplit)
                    last = self.stepsInProds.pop(index)[0]
                    for x in xrange(numberOfProdsToInsert):
                        self.prodsTypeList.insert(index, 'Merge')
                        self.plugins.insert(index, plugin)
                        self.outputSEs.insert(index, outputSE)
                        self.specialOutputSEs.insert(index, specialOutputSE)
                        self.priorities.insert(index, priority)
                        self.cpus.insert(index, cpu)
                        self.bkQueries.insert(index, bkQuery)
                        self.groupSizes.insert(index, groupSize)
                        self.removeInputsFlags.insert(index, removeInputsFlag)
                        self.outputFileMasks.insert(index, outputFileMask)
                        self.outputFileSteps.insert(index, outputFileStep)
                        self.inputs.insert(index, inputs)
                        self.inputDataPolicies.insert(index, idp)
                        self.stepsList.insert(index, stepID)
                        self.previousProds.insert(index, preProd)
                        self.stepsListDict.insert(index, newSteps[x])
                        self.stepsInProds.insert(index + x, [last + x])
                        self.events.insert(index, events)
                        self.targets.insert(index, targets)
                        self.multicore.insert(index, multicore)
                        self.outputModes.insert(index, outputMode)
                        self.ancestorDepths.insert(index, ancestorDepth)

        correctedStepsInProds = []
        toInsert = self.stepsInProds[0][0]
        lengths = [len(x) for x in self.stepsInProds]
        for length in lengths:
            li = [toInsert + x for x in xrange(length)]
            toInsert += length
            correctedStepsInProds.append(li)

        self.stepsInProds = correctedStepsInProds

    #############################################################################

    def _getProdsDescriptionDict(self):
        """ Returns a dictionary representing the description of the request (of all the productions in it)
    """

        prodsDict = {}

        prodNumber = 1

        for prodType, stepsInProd, bkQuery, removeInputsFlag, outputSE, priority, \
            cpu, inputD, outputMode, outFileMask, outFileStep, target, groupSize, plugin, idp, \
            previousProd, events, multicore, ancestorDepth in itertools.izip(self.prodsTypeList,
                                                                             self.stepsInProds,
                                                                             self.bkQueries,
                                                                             self.removeInputsFlags,
                                                                             self.outputSEsPerFileType,
                                                                             self.priorities,
                                                                             self.cpus,
                                                                             self.inputs,
                                                                             self.outputModes,
                                                                             self.outputFileMasks,
                                                                             self.outputFileSteps,
                                                                             self.targets,
                                                                             self.groupSizes,
                                                                             self.plugins,
                                                                             self.inputDataPolicies,
                                                                             self.previousProds,
                                                                             self.events,
                                                                             self.multicore,
                                                                             self.ancestorDepths):

            if not self.parentRequestID and self.requestID:
                transformationFamily = self.requestID
            else:
                transformationFamily = self.parentRequestID

            stepsInProdProdNameList = [
                str(self.stepsList[index - 1]) +
                str(self.stepsListDict[index - 1]['fileTypesIn'])
                for index in stepsInProd
            ]
            prodsDict[prodNumber] = {
                'productionType': prodType,
                'stepsInProd':
                [self.stepsList[index - 1] for index in stepsInProd],
                'bkQuery': bkQuery,
                'removeInputsFlag': removeInputsFlag,
                'tracking': 0,
                'outputSE': outputSE,
                'priority': priority,
                'cpu': cpu,
                'input': inputD,
                'outputMode': outputMode,
                'outputFileMask': outFileMask,
                'outputFileStep': outFileStep,
                'target': target,
                'groupSize': groupSize,
                'plugin': plugin,
                'inputDataPolicy': idp,
                'derivedProduction': 0,
                'transformationFamily': transformationFamily,
                'previousProd': previousProd,
                'stepsInProd-ProdName': stepsInProdProdNameList,
                'events': events,
                'multicore': multicore,
                'ancestorDepth': ancestorDepth
            }
            prodNumber += 1

        # tracking the last production(s)
        prodsDict[prodNumber - 1]['tracking'] = 1
        typeOfLastProd = prodsDict[prodNumber - 1]['productionType']
        index = 2
        try:
            while prodsDict[prodNumber -
                            index]['productionType'] == typeOfLastProd:
                prodsDict[prodNumber - index]['tracking'] = 1
                index += 1
        except KeyError:
            pass

        # production derivation, if necessary
        if self.derivedProduction:
            prodsDict[1]['derivedProduction'] = self.derivedProduction

        return prodsDict

    #############################################################################

    def _buildProduction(self,
                         prodType,
                         stepsInProd,
                         outputSE,
                         priority,
                         cpu,
                         inputDataList=None,
                         outputMode='Any',
                         inputDataPolicy='download',
                         outputFileMask='',
                         outputFileStep='',
                         target='',
                         removeInputData=False,
                         groupSize=1,
                         bkQuery=None,
                         plugin='',
                         previousProdID=0,
                         derivedProdID=0,
                         transformationFamily=0,
                         events=-1,
                         multicore='True',
                         ancestorDepth=0):
        """ Wrapper around Production API to build a production, given the needed parameters

        Args:
          prodType (str): production type (e.g. 'DataStripping')
          stepsInProd (list): list of steps in the production
          outputSE (dict): dictionary that holds relation between file type and output SE
          priority (int): production priority
          cpu (int): CPU time, in HS06s for jobs in this production

        Returns:
          prod: a Production object
    """
        prod = Production()

        # non optional parameters
        prod.LHCbJob.setType(prodType)
        try:
            fTypeIn = [ft.upper() for ft in stepsInProd[0]['fileTypesIn']]
        except IndexError:
            fTypeIn = []
        prod.LHCbJob.workflow.setName(
            'Request_%d_%s_%s_EventType_%s_%s_%s' %
            (self.requestID, prodType, self.prodGroup, self.eventType, ''.join(
                [x.split('.')[0] for x in fTypeIn]), self.appendName))
        prod.setBKParameters(configName=self.outConfigName,
                             configVersion=self.configVersion,
                             groupDescriptionOrStepsList=stepsInProd,
                             conditions=self.dataTakingConditions)
        prod.setParameter('eventType', 'string', self.eventType,
                          'Event Type of the production')
        prod.setParameter('numberOfEvents', 'string', str(events),
                          'Number of events requested')

        prod.setParameter('multicore', 'string', multicore,
                          'Flag for enabling gaudi parallel')
        prod.prodGroup = self.prodGroup
        prod.priority = priority
        prod.LHCbJob.workflow.setDescrShort('prodDescription')
        prod.LHCbJob.workflow.setDescription('prodDescription')
        prod.LHCbJob.setCPUTime(cpu)
        prod.plugin = plugin

        # optional parameters
        prod.jobFileGroupSize = groupSize
        if inputDataPolicy:
            prod.LHCbJob.setInputDataPolicy(inputDataPolicy)
        prod.setOutputMode(outputMode)
        if outputFileMask:
            outputFileMask = [
                m.lower() for m in outputFileMask.replace(' ', '').split(',')
            ]
        if outputFileStep:
            if isinstance(outputFileStep, str):
                outputFileStep = [
                    m.lower()
                    for m in outputFileStep.replace(' ', '').split(',')
                ]
        prod.setFileMask(outputFileMask, outputFileStep)
        if target:
            if target == 'Tier2':
                prod.banTier1s()
            elif 'BAN' in target:
                sitesToBan = target.split(':')
                if len(sitesToBan) > 1:
                    prod.banSites(sitesToBan[1:])
            elif target != 'ALL':
                prod.LHCbJob.setDestination(target)
        prod.LHCbJob.setInputData(inputDataList)
        if ancestorDepth:
            prod.LHCbJob.setAncestorDepth(ancestorDepth)
        if derivedProdID:
            prod.ancestorProduction = derivedProdID
        if transformationFamily:
            prod.transformationFamily = transformationFamily
        if self.fractionToProcess:
            prod.setParameter('FractionToProcess', 'string',
                              str(self.fractionToProcess),
                              'Fraction to process')
        if self.minFilesToProcess:
            prod.setParameter('MinFilesToProcess', 'string',
                              str(self.minFilesToProcess),
                              'Min N of Files to process')
        if self.processingPass:
            prod.setParameter('processingPass', 'string', self.processingPass,
                              'Processing pass of input for the request')

        # Adding optional input BK query
        if bkQuery:
            if bkQuery.lower() == 'full':
                prod.inputBKSelection = self._getBKKQuery()
            elif bkQuery.lower() == 'frompreviousprod':
                prod.inputBKSelection = self._getBKKQuery(
                    'frompreviousprod', fTypeIn, previousProdID)

        self.logger.verbose('Launching with BK selection %s' %
                            prod.inputBKSelection)

        prod = self._addStepsToProd(prod,
                                    stepsInProd,
                                    removeInputData=removeInputData)

        for ft, oSE in outputSE.items():
            prod.outputSEs.setdefault(ft, oSE)

        prod.LHCbJob.setDIRACPlatform()

        return prod

    #############################################################################

    def _addStepsToProd(self,
                        prod,
                        stepsInProd,
                        stepsSequence='sequential',
                        removeInputData=False):
        """ Given a Production object, add requested steps (application and finalization)

    Args:
      prod (Production): the Production object to which the steps are added
      stepsInProd (DAG): DAG of steps in a production
      stepsSequence (str or dict): applications steps sequence
      removeInputData (bool): flag that indicates if the input data should be removed (for the finalization step)

    Returns:
      prod with steps added

    """
        # Adding the application steps
        firstStep = stepsInProd.pop(0)
        stepName = prod.addApplicationStep(stepDict=firstStep,
                                           modulesList=self.modulesList)
        prod.gaudiSteps.append(stepName)

        for step in stepsInProd:
            stepName = prod.addApplicationStep(stepDict=step,
                                               inputData='previousStep',
                                               modulesList=self.modulesList)
            prod.gaudiSteps.append(stepName)

        # Adding the finalization step
        if removeInputData:
            prod.addFinalizationStep([
                'UploadOutputData', 'RemoveInputData', 'UploadLogFile',
                'UploadMC', 'FailoverRequest'
            ])
        else:
            prod.addFinalizationStep()

        return prod

    def _getBKKQuery(self, mode='full', fileType=None, previousProdID=0):
        """ simply creates the bkk query dictionary
    """

        if fileType is None:
            fileType = []

        if mode.lower() == 'full':
            bkQuery = {
                'FileType': ';;;'.join(self.bkFileType),
                'EventType': str(self.eventType),
                'ConfigName': self.configName,
                'ConfigVersion': self.configVersion
            }

            if self.dataTakingConditions:
                bkQuery['DataTakingConditions'] = self.dataTakingConditions

            if self.processingPass:
                bkQuery['ProcessingPass'] = self.processingPass

            if self.dqFlag:
                bkQuery['DataQualityFlag'] = self.dqFlag.replace(
                    ',', ';;;').replace(' ', '')

            if self.startRun and self.runsList or self.endRun and self.runsList:
                raise ValueError(
                    "Please don't mix runs list with start/end run")

            if self.endRun and self.startRun:
                if self.endRun < self.startRun:
                    gLogger.error(
                        "Your end run '%d' should be more than your start run '%d'!"
                        % (self.endRun, self.startRun))
                    raise ValueError("Error setting start or end run")

            if self.startRun:
                bkQuery['StartRun'] = self.startRun
            if self.endRun:
                bkQuery['EndRun'] = self.endRun

            if self.runsList:
                bkQuery['RunNumbers'] = self.runsList.replace(',',
                                                              ';;;').replace(
                                                                  ' ', '')

            if self.visibility:
                bkQuery['Visible'] = self.visibility

        elif mode.lower() == 'frompreviousprod':
            bkQuery = {
                'FileType': ';;;'.join(fileType).replace(' ', ''),
                'ProductionID': int(previousProdID)
            }

            if self.eventType:
                bkQuery['EventType'] = str(self.eventType)

            if self.dqFlag:
                bkQuery['DataQualityFlag'] = self.dqFlag.replace(
                    ',', ';;;').replace(' ', '')

        return bkQuery

    ################################################################################
    # properties

    def set_stepsList(self, value):
        listInt = []
        i = 0
        while True:
            try:
                listInt.append(int(value[i]))
                i = i + 1
            except ValueError:
                break
            except IndexError:
                break
        self._stepsList = listInt

    def get_stepsList(self):
        return self._stepsList

    stepsList = property(get_stepsList, set_stepsList)

    def set_startRun(self, value):
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("startRun can not be negative")
        self._startRun = value

    def get_startRun(self):
        return self._startRun

    startRun = property(get_startRun, set_startRun)

    def set_endRun(self, value):
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("endRun can not be negative")
        self._endRun = value

    def get_endRun(self):
        return self._endRun

    endRun = property(get_endRun, set_endRun)

    def set_requestID(self, value):
        if value == '':
            value = 0
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("requestID can not be negative")
        self._requestID = value

    def get_requestID(self):
        return self._requestID

    requestID = property(get_requestID, set_requestID)

    def set_parentRequestID(self, value):
        if value == '':
            value = 0
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("parentRequestID can not be negative")
        self._parentRequestID = value

    def get_parentRequestID(self):
        return self._parentRequestID

    parentRequestID = property(get_parentRequestID, set_parentRequestID)

    def set_bkFileType(self, value):
        if isinstance(value, str):
            value = value.replace(' ', '').split(',')
        self._bkFileType = value

    def get_bkFileType(self):
        return self._bkFileType

    bkFileType = property(get_bkFileType, set_bkFileType)

Ejemplo n.º 19

Mostrar archivo

Archivo: echo.py Proyecto: antolu/LHCbDIRAC

"""

This is a very simple bkk performance test. It calls the service with a message. The service
return the message.

"""
import time

from DIRAC.Core.Base.Script import parseCommandLine
parseCommandLine()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
cl = BookkeepingClient()


class Transaction(object):
    def __init__(self):
        self.custom_timers = {}

    def run(self):
        start_time = time.time()
        retVal = cl.echo("simple test")
        if not retVal['OK']:
            print 'ERROR', retVal['Message']
        end_time = time.time()
        self.custom_timers['Bkk_ResponseTime'] = end_time - start_time
        self.custom_timers['Bkk_Echo'] = end_time - start_time


if __name__ == '__main__':
    trans = Transaction()

Ejemplo n.º 20

Mostrar archivo

Archivo: ProductionRequest.py Proyecto: antolu/LHCbDIRAC

    def __init__(self, bkkClientIn=None, diracProdIn=None):
        """ c'tor

        Some variables are defined here. A production request is made of:
        stepsList, productionsTypes, and various parameters of those productions
    """

        if bkkClientIn is None:
            self.bkkClient = BookkeepingClient()
        else:
            self.bkkClient = bkkClientIn

        if diracProdIn is None:
            self.diracProduction = DiracProduction()
        else:
            self.diracProduction = diracProdIn

        self.rpcProductionRequest = RPCClient(
            'ProductionManagement/ProductionRequest')
        self.tc = TransformationClient()

        self.logger = gLogger.getSubLogger('ProductionRequest')

        self.opsH = Operations()

        # parameters of the request
        self.requestID = 0
        self.parentRequestID = 0
        self.appendName = '1'
        self.outConfigName = ''
        self.prodsToLaunch = []  # productions to launch
        self.stepsListDict = []  # list of dict of steps
        self.stepsInProds = []  # a list of lists
        # parameters of the input data
        self.processingPass = ''
        self.dataTakingConditions = ''
        self.eventType = ''
        self.bkFileType = []
        self.dqFlag = ''
        self.startRun = 1
        self.endRun = 2
        self.runsList = ''
        self.configName = 'test'
        self.configVersion = 'certification'
        # parameters of the first production
        self.publishFlag = True
        self.testFlag = False
        self.derivedProduction = 0
        self.previousProdID = 0  # optional prod from which to start
        self.fullListOfOutputFileTypes = []

        # parameters that are the same for each productions
        self.prodGroup = ''  # This ends up being 'ProcessingType', workflow parameter, and it is used for accounting
        self.visibility = ''  # For BKQuery
        self.fractionToProcess = 0
        self.minFilesToProcess = 0
        self.modulesList = None  # Usually:
        # ['GaudiApplication', 'AnalyseXMLSummary',
        # 'ErrorLogging', 'BookkeepingReport', 'StepAccounting' ]

        # parameters of each production (the length of each list has to be the same as the number of productions
        self.events = []
        self.stepsList = []
        self.extraOptions = {}
        self.prodsTypeList = []
        self.bkQueries = []  # list of bk queries
        self.removeInputsFlags = []
        self.priorities = []
        self.cpus = []
        self.inputs = []  # list of lists
        self.outputModes = []
        self.targets = []
        self.outputFileMasks = []
        self.outputFileSteps = []
        self.groupSizes = []
        self.plugins = []
        self.inputDataPolicies = []
        self.previousProds = [
            None
        ]  # list of productions from which to take the inputs (the first is always None)
        self.multicore = [
        ]  # list of flags to override the multi core flags of the steps

        self.outputSEs = []  # a list of StorageElements
        self.specialOutputSEs = []  # a list of dictionaries - might be empty
        self.outputSEsPerFileType = []  # a list of dictionaries - filled later
        self.ancestorDepths = []
        self.compDict = {
            'HIGH': 'Compression-LZMA-4',
            'LOW': 'Compression-ZLIB-1'
        }
        self.compressionLvl = ['']  # List: one compression level each step
        self.appConfig = '$APPCONFIGOPTS/Persistency/'  # Default location of the compression level configuration files
        #
        # These lists define the visibility of the output files produced by each step. For MC productions, the visibility
        # is tied to compression level. VIsible files are compressed at the highest level
        #
        self.outputVisFlag = [
        ]  # List of dictionary with default visibility flag of the output files per single step
        self.specialOutputVisFlag = [
        ]  # List of dictionaries with special visibility flag for given file type

Ejemplo n.º 21

Mostrar archivo

Archivo: StorageHistoryAgent.py Proyecto: antolu/LHCbDIRAC

class StorageHistoryAgent( AgentModule ):
  def initialize( self ):
    '''Sets defaults
    '''
    self.am_setOption( 'PollingTime', 43200 )
    if self.am_getOption( 'DirectDB', False ):
      from LHCbDIRAC.DataManagementSystem.DB.StorageUsageDB import StorageUsageDB
      self.__stDB = StorageUsageDB()
    else:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      self.__stDB = RPCClient( 'DataManagement/StorageUsage' )
    self.__workDirectory = self.am_getOption( "WorkDirectory" )
    mkDir( self.__workDirectory )
    self.log.info( "Working directory is %s" % self.__workDirectory )

    self.__ignoreDirsList = self.am_getOption( 'Ignore', [] )
    self.log.info( "List of directories to ignore for the DataStorage accounting: %s " % self.__ignoreDirsList )

    self.__bkClient = BookkeepingClient()
    self.__dataUsageClient = DataUsageClient()
    self.cachedMetadata = {}
    # build a dictionary with Event Type descriptions (to be send to accounting, instead of number Event Type ID)
    self.eventTypeDescription = { 'na':'na', 'notInBkk':'notInBkk', 'FailedBkkQuery':'FailedBkkQuery', 'None':'None'}
    self.limitForCommit = self.am_getOption( "LimitForCommit", 1000 )
    self.callsToGetSummary = 0
    self.callsToDirectorySummary = 0
    self.callsToDudbForMetadata = 0
    # keep count of calls to Bkk
    self.callsToBkkgetDirectoryMetadata = 0
    self.callsToBkkGetEvtType = 0

    return S_OK()

  def userStorageAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for user directories " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )

    result = self.__stDB.getUserSummary()
    if not result[ 'OK' ]:
      return result
    userCatalogData = result[ 'Value' ]
    print userCatalogData
    self.log.notice( "Got summary for %s users" % ( len( userCatalogData ) ) )
    result = self.__stDB.getUserSummaryPerSE()
    if not result[ 'OK' ]:
      return result
    userSEData = result[ 'Value' ]
    self.log.notice( "Got SE summary for %s users" % ( len( userSEData ) ) )

    now = Time.dateTime()
    numRows = 0
    for user in sorted( userSEData ):
      if user not in userCatalogData:
        self.log.error( "User has SE data but not Catalog data!", user )
        continue
      for se in sorted( userSEData[ user ] ):
        seData = userSEData[ user ][ se ]
        usRecord = UserStorage()
        usRecord.setStartTime( now )
        usRecord.setEndTime( now )
        usRecord.setValueByKey( "User", user )
        usRecord.setValueByKey( "StorageElement", se )
        usRecord.setValueByKey( "LogicalSize", userCatalogData[ user ][ 'Size' ] )
        usRecord.setValueByKey( "LogicalFiles", userCatalogData[ user ][ 'Files' ] )
        usRecord.setValueByKey( "PhysicalSize", seData[ 'Size' ] )
        usRecord.setValueByKey( "PhysicalFiles", seData[ 'Files' ] )
        usRecord.setValueByKey( "StorageSize", 0 )
        usRecord.setValueByKey( "StorageFiles", 0 )
        gDataStoreClient.addRegister( usRecord )
        numRows += 1

      self.log.notice( " User %s is using %.2f GiB (%s files)" % ( user,
                                                                   userCatalogData[ user ][ 'Size' ] / ( 1024.0 ** 3 ),
                                                                   userCatalogData[ user ][ 'Files' ] ) )
    self.log.notice( "Sending %s records to accounting for user storage" % numRows )
    res = gDataStoreClient.commit()
    if not res[ 'OK' ]:
      self.log.notice( "ERROR: committing UserStorage records: %s " % res )
      return S_ERROR( res )
    else:
      self.log.notice( "%s records for UserStorage type successfully committed" % numRows )

  def topDirectoryAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for top directories " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )

    ftb = 1.0e12

    # get info from the DB about the LOGICAL STORAGE USAGE (from the su_Directory table):
    result = self.__stDB.getSummary( '/lhcb/' )
    if not result[ 'OK' ]:
      return result
    logicalUsage = result['Value']
    topDirLogicalUsage = {}  # build the list of first level directories
    for row in logicalUsage:
      # d, size, files = row
      splitDir = row.split( "/" )
      if len( splitDir ) > 3:  # skip the root directory "/lhcb/"
        firstLevelDir = '/' + splitDir[1] + '/' + splitDir[2] + '/'
        topDirLogicalUsage.setdefault( firstLevelDir, {'Files':0, 'Size':0} )
        topDirLogicalUsage[ firstLevelDir ][ 'Files' ] += logicalUsage[ row ][ 'Files' ]
        topDirLogicalUsage[ firstLevelDir ][ 'Size' ] += logicalUsage[ row ][ 'Size' ]
    self.log.notice( "Summary on logical usage of top directories: " )
    for row in topDirLogicalUsage:
      self.log.notice( "dir: %s size: %.4f TB  files: %d" % ( row, topDirLogicalUsage[row]['Size'] / ftb,
                                                              topDirLogicalUsage[row]['Files'] ) )

    # loop on top level directories (/lhcb/data, /lhcb/user/, /lhcb/MC/, etc..)
    # to get the summary in terms of PHYSICAL usage grouped by SE:
    seData = {}
    for directory in topDirLogicalUsage:
      result = self.__stDB.getDirectorySummaryPerSE( directory )  # retrieve the PHYSICAL usage
      if not result[ 'OK' ]:
        return result
      seData[ directory ] = result[ 'Value' ]
      self.log.notice( "Got SE summary for %s directories " % ( len( seData ) ) )
      self.log.debug( "SEData: %s" % seData )
    # loop on top level directories to send the accounting records
    numRows = 0
    now = Time.dateTime()
    for directory in seData:
      self.log.debug( "dir: %s SEData: %s " % ( directory, seData[ directory ] ) )
      if directory not in topDirLogicalUsage:
        self.log.error( "Dir %s is in the summary per SE, but it is not in the logical files summary!" % directory )
        continue
      for se in sorted( seData[ directory ] ):
        storageRecord = Storage()
        storageRecord.setStartTime( now )
        storageRecord.setEndTime( now )
        storageRecord.setValueByKey( "Directory", directory )
        storageRecord.setValueByKey( "StorageElement", se )
        storageRecord.setValueByKey( "LogicalFiles", topDirLogicalUsage[ directory ][ 'Files' ] )
        storageRecord.setValueByKey( "LogicalSize", topDirLogicalUsage[ directory ][ 'Size' ] )
        try:
          physicalFiles = seData[ directory ][ se ][ 'Files' ]
        except:
          self.log.error( "WARNING! no files replicas for directory %s on SE %s" % ( directory, se ) )
          physicalFiles = 0
        try:
          physicalSize = seData[ directory ][ se ][ 'Size' ]
        except:
          self.log.error( "WARNING! no size for replicas for directory %s on SE %s" % ( directory, se ) )
          physicalSize = 0
        storageRecord.setValueByKey( "PhysicalFiles", physicalFiles )
        storageRecord.setValueByKey( "PhysicalSize", physicalSize )
        gDataStoreClient.addRegister( storageRecord )
        numRows += 1
        self.log.debug( "Directory: %s SE: %s  physical size: %.4f TB (%d files)" % ( directory,
                                                                                      se,
                                                                                      physicalSize / ftb,
                                                                                      physicalFiles ) )

    self.log.notice( "Sending %s records to accounting for top level directories storage" % numRows )
    res = gDataStoreClient.commit()
    if not res[ 'OK' ]:
      self.log.notice( "ERROR: committing Storage records: %s " % res )
      return S_ERROR( res )
    else:
      self.log.notice( "%s records for Storage type successfully committed" % numRows )

  def bkPathAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for DataStorage type " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )


    # counter for DataStorage records, commit to the accounting in bunches of self.limitForCommit records
    self.totalRecords = 0
    self.recordsToCommit = 0
    self.log.notice( " Call the function to create the StorageUsageDB dump.." )
    res = self.generateStorageUsagePerDir()
    if not res[ 'OK' ]:
      self.log.error( "ERROR generating the StorageUsageDB dump per directory" )
      return S_ERROR()

    # Keep a list of all directories in FC that are not found in the Bkk
    self.directoriesNotInBkk = []
    # for debugging purposes build dictionaries with storage usage to compare with the accounting plots
    self.debug_seUsage = {}
    self.debug_seUsage_acc = {}

    # set the time for the accounting records (same time for all records)
    now = Time.dateTime()
    # Get the directory metadata in a bulk query
    metaForList = self.__getMetadataForAcc( self.dirDict.values() )

    # loop on all directories  to get the bkk metadata
    for dirLfn, fullDirectory in self.dirDict.iteritems():
      if dirLfn not in fullDirectory:
        self.log.error( "ERROR: fullDirectory should include the dirname: %s %s " % ( fullDirectory, dirLfn ) )
        continue
      self.log.info( "Processing directory %s " % dirLfn )
      if dirLfn not in self.pfnUsage:
        self.log.error( "ERROR: directory does not have PFN usage %s " % dirLfn )
        continue
      self.log.verbose( "PFN usage: %s " % self.pfnUsage[ dirLfn ] )
      if dirLfn not in self.lfnUsage:
        self.log.error( "ERROR: directory does not have LFN usage %s " % dirLfn )
        continue
      self.log.verbose( "LFN usage: %s " % self.lfnUsage[ dirLfn ] )

      # for DEBUGGING:
      for se in self.pfnUsage[ dirLfn ]:
        self.debug_seUsage.setdefault( se, {'Files': 0 , 'Size' : 0 } )
        self.debug_seUsage[ se ][ 'Files' ] += self.pfnUsage[ dirLfn ][ se ][ 'Files' ]
        self.debug_seUsage[ se ][ 'Size' ] += self.pfnUsage[ dirLfn ][ se ][ 'Size' ]
      # end of DEBUGGING

      # get metadata for this directory
      metaForDir = metaForList.get( fullDirectory, {} )
      if not metaForDir:
        self.log.warn( "Metadata not available for directory %s" % fullDirectory )
        continue

      # Fill in the accounting record
      self.log.info( "Fill the record for %s and metadata: %s " % ( dirLfn, metaForDir ) )
      res = self.fillAndSendAccountingRecord( dirLfn, metaForDir, now )
      if not res['OK']:
        return res
      for se in self.pfnUsage[ dirLfn ]:
        self.debug_seUsage_acc.setdefault( se, { 'Files': 0 , 'Size': 0 } )
        self.debug_seUsage_acc[ se ][ 'Files' ] += self.pfnUsage[ dirLfn ][ se ][ 'Files' ]
        self.debug_seUsage_acc[ se ][ 'Size' ] += self.pfnUsage[ dirLfn ][ se ][ 'Size' ]

    # Don't forget to commit the remaining records!
    self.__commitRecords()


  def execute( self ):
    if self.am_getOption( "CleanBefore", False ):
      self.log.notice( "Cleaning the DB" )
      result = self.__stDB.purgeOutdatedEntries( "/lhcb/user", self.am_getOption( "OutdatedSeconds", 86400 * 10 ) )
      if not result[ 'OK' ]:
        return result
      self.log.notice( "Purged %s outdated records" % result[ 'Value' ] )

    # User accounting (per user and SE)
    self.userStorageAccounting()
    # Accounting per top directory
    self.topDirectoryAccounting()
    # full production data accounting
    self.bkPathAccounting()


    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "------ End of cycle report for DataStorage accounting--------------------------------\n" )
    self.log.notice( "Total directories found in FC:  %d " % len( self.dirDict ) )
    totalCallsToStorageUsage = self.callsToGetSummary + self.callsToDirectorySummary
    self.log.notice( "Total calls to StorageUsage: %d , took: %d s " % ( totalCallsToStorageUsage, self.genTotalTime ) )
    totalCallsToBkk = self.callsToBkkgetDirectoryMetadata + self.callsToBkkGetEvtType
    self.log.notice( "Total calls to DataUsage for cache: %d" % self.callsToDudbForMetadata )
    self.log.notice( "Total calls to Bookkeeping: %d (getDirectoryMetadata: %d, getEventType: %d)" % ( totalCallsToBkk,
                                                                                                       self.callsToBkkgetDirectoryMetadata,
                                                                                                       self.callsToBkkGetEvtType ) )
    self.log.notice( "Total records sent to accounting for DataStorage:  %d " % self.totalRecords )
    self.log.notice( "Directories not found in Bookkeeping: %d " % ( len( self.directoriesNotInBkk ) ) )
    fileName = os.path.join( self.__workDirectory, "directoriesNotInBkk.txt" )
    self.log.notice( "written to file: %s " % fileName )
    f = open( fileName, "w" )
    for d in self.directoriesNotInBkk:
      f.write( "%s\n" % d )
    f.close()
    # for DEBUG only
    self.log.info( "Summary of StorageUsage: files size " )
    for se in sorted( self.debug_seUsage ):
      self.log.info( "all: %s  %d %d Bytes ( %.2f TB ) " % ( se, self.debug_seUsage[ se ]['Files'],
                                                             self.debug_seUsage[ se ]['Size'],
                                                             self.debug_seUsage[ se ]['Size'] / 1.0e12 ) )
      if se in self.debug_seUsage_acc:
        self.log.info( "acc: %s  %d %d Bytes ( %.2f TB ) " % ( se, self.debug_seUsage_acc[ se ]['Files'],
                                                               self.debug_seUsage_acc[ se ]['Size'],
                                                               self.debug_seUsage_acc[ se ]['Size'] / 1.0e12 ) )
      else:
        self.log.info( "SE not in self.debug_seUsage_acc keys" )
    return S_OK()

  def __getMetadataForAcc( self, dirList ):
    """ Get metadata for a directory either from memory, from the storageDB or from BK """
    # Try and get the metadata from memory cache
    notFound = []
    metaForList = {}
    for dirName in dirList:
      metaForList[dirName] = self.cachedMetadata.get( dirName, {} )
      if not metaForList[dirName]:
        notFound.append( dirName )
    notInCache = []
    if notFound:
      self.log.info( "Memory metadata cache missed for %d directories" % len( notFound ) )
      self.log.verbose( "call getDirMetadata for (first 10): %s " % str( notFound[0:10] ) )
      for dirChunk in breakListIntoChunks( notFound, 10000 ):
        self.callsToDudbForMetadata += 1
        res = self.__dataUsageClient.getDirMetadata( dirChunk )  # this could be a bulk query for a list of directories
        if not res[ 'OK' ]:
          self.log.error( "Error retrieving %d directories meta-data %s " % ( len( dirChunk ), res['Message'] ) )
          # this usually happens when directories are removed from FC between the StorageUsageDB dump and this call,
          # if the Db is refreshed exactly in this time interval. Not really a problem.
          #######################3 just a try ##############################################3
          notInCache += dirChunk
          continue
        self.log.verbose( "getDirMetadata returned: %s " % str( res['Value'] ) )
        for dirName in dirChunk:
          # Compatibility with old (list for single file) and new (dictionary) service
          if type( res['Value'] ) == type( {} ):
            metaTuple = res['Value'].get( dirName, () )
          elif len( dirList ) == 1 and res['Value']:
            metaTuple = res['Value'][0]
          else:
            metaTuple = ()
          if metaTuple and metaTuple[3] is not None:
            metaForDir = metaForList[dirName]
            _dirID, metaForDir[ 'DataType' ], metaForDir[ 'Activity' ], metaForDir[ 'Conditions' ], metaForDir[ 'ProcessingPass' ], \
              metaForDir[ 'EventType' ], metaForDir[ 'FileType' ], metaForDir[ 'Production' ], metaForDir['Visibility'] = metaTuple
          else:
            notInCache.append( dirName )

      failedBK = []
      if notInCache:
        cachedFromBK = []
        self.log.info( "Directory metadata cache missed for %d directories => query BK and cache" % len( notInCache ) )
        for dirChunk in breakListIntoChunks( notInCache, 200 ):
          self.callsToBkkgetDirectoryMetadata += 1
          res = self.__bkClient.getDirectoryMetadata( dirChunk )
          if not res[ 'OK' ]:
            self.log.error( "Totally failed to query Bookkeeping", res[ 'Message' ] )
            failedBK += dirChunk
            for dirName in dirChunk:
              metaForDir = metaForList[dirName]
              _fillMetadata( metaForDir, 'FailedBkkQuery' )
          else:
            bkMetadata = res['Value']
            self.log.debug( "Successfully queried Bookkeeping, result: %s " % bkMetadata )
            for dirName in dirChunk:
              metaForDir = metaForList[dirName]
              # BK returns a list of metadata, chose the first one...
              metadata = bkMetadata['Successful'].get( dirName, [{}] )[0]
              if metadata and metadata.get( 'ConditionDescription' ) is not None:
                metadata['Visibility'] = metadata.pop( 'VisibilityFlag', metadata.get( 'Visibility', 'na' ) )
                # All is OK, directory found
                _fillMetadata( metaForDir, metadata )
                self.log.verbose( "Cache entry %s in DirMetadata table.." % dirName )
                resInsert = self.__dataUsageClient.insertToDirMetadata( { dirName: metadata} )
                if not resInsert[ 'OK' ]:
                  self.log.error( "Failed to cache metadata in DirMetadata table! %s " % resInsert[ 'Message' ] )
                else:
                  cachedFromBK.append( dirName )
                  self.log.verbose( "Successfully cached metadata for %s : %s" % ( dirName, str( metadata ) ) )
                  self.log.debug( "result: %s " % str( resInsert ) )
              else:
                # Directory not found
                self.log.verbose( "Directory %s not registered in Bookkeeping!" % dirName )
                _fillMetadata( metaForDir, 'notInBkk' )
                failedBK.append( dirName )
                self.directoriesNotInBkk.append( dirName )
              # Translate a few keys for accounting
              for bkName, accName in ( ( 'ConfigName', 'DataType' ), ( 'ConfigVersion', 'Activity' ), ( 'ConditionDescription', 'Conditions' ) ):
                metaForDir[accName] = metaForDir.pop( bkName, 'na' )
        self.log.info( 'Successfully cached %d directories from BK' % len( cachedFromBK ) )
        if self.directoriesNotInBkk:
          self.log.warn( '%d directories not found in BK' % len( self.directoriesNotInBkk ) )

      # cache locally the metadata
      for dirName in [dn for dn in notFound if dn not in failedBK]:
        metaForDir = metaForList[dirName]
        # Translate the numerical event type to a description string
        metaForDir['EventType'] = self.__getEventTypeDescription( metaForDir.pop( 'EventType', 'na' ) )
        self.cachedMetadata[ dirName] = metaForDir.copy()
    else:
      self.log.info( "Memory metadata cache hit for %d directories" % len( dirList ) )
    return metaForList

  def __commitRecords( self ):
    if self.recordsToCommit:
      res = gDataStoreClient.commit()
      if not res[ 'OK' ]:
        self.log.error( "Accounting ERROR: commit returned %s" % res )
      else:
        self.log.notice( "%d records committed " % self.recordsToCommit )
        self.recordsToCommit = 0
        self.log.notice( "commit for DataStorage returned: %s" % res )

  def generateStorageUsagePerDir( self ):
    '''Generate a dump of the StorageUsageDB and keep it in memory in a dictionary
       (new version of Apr 2012)
    '''

    start = time.time()
    self.log.notice( 'Starting from path: /lhcb/' )
    res = self.__stDB.getStorageDirectories( '/lhcb/' )
    if not res['OK']:
      return S_ERROR()
    totalDirList = res['Value']
    self.log.info( "Total directories retrieved from StorageUsageDB: %d " % len( totalDirList ) )
    # select only terminal directories (directories without sub-directories)
    # mc directory structure: /lhcb/MC/[year]/[file type]/[prod]/0000/ => len = 7
    # raw data:               /lhcb/data/2011/RAW/FULL/LHCb/COLLISION11/99983
    # => len 9 (under /lhcb/data/ from 2011 only RAW, before 2011 also other file types)
    # processed data: under both /lhcb/data and /lhcb/LHCb/
    #                         /lhcb/data/2010/DST/00009300/0000
    # data:                   /lhcb/LHCb/Collision12/ICHEP.DST/00017399/0000/
    self.dirDict = {}
    ignoredDirectories = dict.fromkeys( self.__ignoreDirsList, 0 )
    self.log.info( "Directories to be ignored: %s " % str( sorted( ignoredDirectories ) ) )
    for dirItem in totalDirList:
      # make sure that last character is a '/'
      dirItem = _standardDirectory( dirItem )
      splitDir = dirItem.split( '/' )
      if len( splitDir ) < 4:  # avoid picking up intermediate directories which don't contain files, like /lhcb/
        self.log.warn( "Directory %s skipped, as top directory" % dirItem )
        continue
      secDir = splitDir[ 2 ]
      if secDir in ignoredDirectories:
        self.log.verbose( "Directory to be ignored, skipped: %s " % dirItem )
        ignoredDirectories[ secDir ] += 1
        continue
      # for each type of directory (MC, reconstructed data and runs) check the format, in order not to count more than
      # once the productions with more than one sub-directory
      # for MC directories:
      # example: '/lhcb/MC/MC10/ALLSTREAMS.DST/00010908/0000/',
      # or        /lhcb/MC/2011/DST/00010870/0000
      # one directory for each file type
      # for data
      # /lhcb/LHCb/Collision11/SWIMSTRIPPINGD02KSPIPI.MDST/00019088/0000/
      # for raw data: /lhcb/data/2012/RAW/FULL/LHCb/COLLISION12/133784/
      try:
        dataType = splitDir[ -6 ]
        if dataType == "RAW":
          self.log.verbose( "RAW DATA directory: %s" % splitDir )
          directory = '/'.join( splitDir[:-1] )
          fullDirectory = directory
        else:
          suffix = splitDir[ -2 ]  # is the sub-directory suffix 0000, 0001, etc...
          self.log.verbose( "MC or reconstructed data directory: %s" % splitDir )
          if splitDir[-3] == 'HIST':
            directory = '/'.join( splitDir[:-1] )
            fullDirectory = directory
            self.log.verbose( "histo dir: %s " % directory )
          else:
            directory = '/'.join( splitDir[:-2] )
            fullDirectory = os.path.join( directory, suffix )
        directory = _standardDirectory( directory )
        fullDirectory = _standardDirectory( fullDirectory )
        if directory not in self.dirDict:
          self.dirDict[ directory ] = fullDirectory
        self.log.verbose( "Directory contains production files: %s " % directory )
      except:
        self.log.warn( "The directory has unexpected format: %s " % splitDir )

    self.lfnUsage = {}
    self.pfnUsage = {}
    totalDiscardedDirs = 0
    self.log.info( "Directories that have been discarded:" )
    for dd in ignoredDirectories:
      self.log.info( "/lhcb/%s - %d " % ( dd, ignoredDirectories[ dd ] ) )
      totalDiscardedDirs += ignoredDirectories[ dd ]
    self.log.info( "Total discarded directories: %d " % totalDiscardedDirs )
    self.log.info( "Retrieved %d dirs from StorageUsageDB containing prod files" % len( self.dirDict ) )
    self.log.info( "Getting the number of files and size from StorageUsage service" )
    for d in self.dirDict:
      self.log.verbose( "Get storage usage for directory %s " % d )
      res = self.__stDB.getDirectorySummaryPerSE( d )
      self.callsToDirectorySummary += 1
      if not res[ 'OK' ]:
        self.log.error( "Cannot retrieve PFN usage %s" % res['Message'] )
        continue
      if d not in self.pfnUsage:
        self.pfnUsage[ d ] = res['Value']
      self.log.verbose( "Get logical usage for directory %s " % d )
      res = self.__stDB.getSummary( d )
      self.callsToGetSummary += 1
      if not res[ 'OK' ]:
        self.log.error( "Cannot retrieve LFN usage %s" % res['Message'] )
        continue
      if not res['Value']:
        self.log.error( "For dir %s getSummary returned an empty value: %s " % ( d, str( res ) ) )
        continue
      self.lfnUsage.setdefault( d, {} )
      for retDir, dirInfo in res['Value'].iteritems():
        if d in retDir:
          self.lfnUsage[ d ][ 'LfnSize' ] = dirInfo['Size']
          self.lfnUsage[ d ][ 'LfnFiles'] = dirInfo['Files']
      self.log.verbose( "PFN usage: %s" % self.pfnUsage[ d ] )
      self.log.verbose( "LFN usage: %s" % self.lfnUsage[ d ] )


    end = time.time()
    self.genTotalTime = end - start
    self.log.info( "StorageUsageDB dump completed in %d s" % self.genTotalTime )

    return S_OK()

  def __getEventTypeDescription( self, eventType ):
    # convert eventType to string:
    try:
      eventType = int( eventType )
    except:
      pass
    # check that the event type description is in the cached dictionary, and otherwise query the Bkk
    if eventType not in self.eventTypeDescription:
      self.log.notice( "Event type description not available for eventTypeID %s, getting from Bkk" % eventType )
      res = self.__bkClient.getAvailableEventTypes()
      self.callsToBkkGetEvtType += 1
      if not res['OK']:
        self.log.error( "Error querying the Bkk: %s" % res['Message'] )
      else:
        self.eventTypeDescription.update( dict( res['Value'] ) )
      self.log.verbose( "Updated  self.eventTypeDescription dict: %s " % str( self.eventTypeDescription ) )
      # If still not found, log it!
      if eventType not in self.eventTypeDescription:
        self.log.error( "EventType %s is not in cached dictionary" % str( eventType ) )

    return self.eventTypeDescription.get( eventType, 'na' )

  def fillAndSendAccountingRecord( self, lfnDir, metadataDict, now ):
    ''' Create, fill and send to accounting a record for the DataStorage type.
    '''
    dataRecord = DataStorage()
    dataRecord.setStartTime( now )
    dataRecord.setEndTime( now )
    logicalSize = self.lfnUsage[ lfnDir ][ 'LfnSize' ]
    logicalFiles = self.lfnUsage[ lfnDir ][ 'LfnFiles' ]
    dataRecord.setValueByKey( "LogicalSize", logicalSize )
    dataRecord.setValueByKey( "LogicalFiles", logicalFiles )
    for key in ( 'DataType', 'Activity', 'FileType', 'Production', 'ProcessingPass', 'Conditions', 'EventType' ):
      dataRecord.setValueByKey( key, metadataDict.get( key, 'na' ) )
    self.log.verbose( ">>> Send DataStorage record to accounting:" )
    self.log.verbose( "\tlfnFiles: %d lfnSize: %d " % ( logicalFiles, logicalSize ) )

    for se in self.pfnUsage[ lfnDir ]:
      self.log.verbose( "Filling accounting record for se %s" % se )
      physicalSize = self.pfnUsage[ lfnDir ][ se ][ 'Size' ]
      physicalFiles = self.pfnUsage[ lfnDir ][ se ][ 'Files' ]

      dataRecord.setValueByKey( "StorageElement", se )
      dataRecord.setValueByKey( "PhysicalSize", physicalSize )
      dataRecord.setValueByKey( "PhysicalFiles", physicalFiles )
      self.log.verbose( "\t\tStorageElement: %s --> physFiles: %d  physSize: %d " % ( se, physicalFiles, physicalSize ) )

      # addRegister is NOT making a copy, therefore all records are otherwise overwritten
      res = gDataStoreClient.addRegister( copy.deepcopy( dataRecord ) )
      if not res[ 'OK']:
        self.log.error( "addRegister returned: %s" % res )
        return S_ERROR( "addRegister returned: %s" % res )
      # Reset logical information to zero in order to send it only once!
      dataRecord.setValueByKey( "LogicalSize", 0 )
      dataRecord.setValueByKey( "LogicalFiles", 0 )
      self.totalRecords += 1
      self.recordsToCommit += 1

    # Commit if necessary
    if self.recordsToCommit > self.limitForCommit:
      self.__commitRecords()

    return S_OK()

Ejemplo n.º 22

Mostrar archivo

class TransformationCleaningAgent(DiracTCAgent):
    """ .. class:: TransformationCleaningAgent
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        DiracTCAgent.__init__(self, *args, **kwargs)

        self.directoryLocations = ['TransformationDB', 'StorageUsage']
        self.archiveAfter = 7
        self.fileTypesToKeep = ['GAUSSHIST']

        self.bkClient = None
        self.transClient = None
        self.storageUsageClient = None

    #############################################################################

    def initialize(self):
        """ Standard initialize method for agents
    """
        DiracTCAgent.initialize(self)

        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days

        self.fileTypesToKeep = Operations().getValue(
            'Transformations/FileTypesToKeep', self.fileTypesToKeep)

        self.bkClient = BookkeepingClient()
        self.transClient = TransformationClient()
        self.storageUsageClient = StorageUsageClient()

        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """ clean the metadata using BKK and Data Manager. This method is a replacement of the one from base class

    :param self: self reference
    :param int transID: transformation ID
    """
        res = self.bkClient.getProductionFiles(transID, 'ALL', 'Yes')
        if not res['OK']:
            return res
        bkMetadata = res['Value']
        fileToRemove = []
        yesReplica = []
        self.log.info(
            "Found a total of %d files in the BK for transformation %d" %
            (len(bkMetadata), transID))
        for lfn, metadata in bkMetadata.iteritems():
            if metadata['FileType'] != 'LOG':
                fileToRemove.append(lfn)
                if metadata['GotReplica'] == 'Yes':
                    yesReplica.append(lfn)
        if fileToRemove:
            self.log.info(
                "Attempting to remove %d possible remnants from the catalog and storage"
                % len(fileToRemove))
            # Executing with shifter proxy
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'false')
            res = DataManager().removeFile(fileToRemove, force=True)
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'true')
            if not res['OK']:
                return res
            for lfn, reason in res['Value']['Failed'].iteritems():
                self.log.error("Failed to remove file found in BK",
                               "%s %s" % (lfn, reason))
            if res['Value']['Failed']:
                return S_ERROR("Failed to remove all files found in the BK")
            if yesReplica:
                self.log.info(
                    "Ensuring that %d files are removed from the BK" %
                    (len(yesReplica)))
                res = FileCatalog(
                    catalogs=['BookkeepingDB']).removeFile(yesReplica)
                if not res['OK']:
                    return res
                for lfn, reason in res['Value']['Failed'].iteritems():
                    self.log.error("Failed to remove file from BK",
                                   "%s %s" % (lfn, reason))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to remove all files from the BK")
        self.log.info("Successfully removed all files found in the BK")
        return S_OK()

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """

        res = DiracTCAgent.getTransformationDirectories(self, transID)

        if not res['OK']:
            return res

        directories = res['Value']
        if isinstance(directories, basestring):  # Check for (stupid) formats
            directories = ast.literal_eval(directories)
            if not isinstance(directories, list):
                return S_ERROR("Wrong format of output directories")

        if 'StorageUsage' in self.directoryLocations:
            res = self.storageUsageClient.getStorageDirectories(
                '', '', transID, [])
            if not res['OK']:
                self.log.error("Failed to obtain storage usage directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")

        # We should be removing from the list of directories
        # those directories created for file types that are part of those:
        # - uploaded (as output files)
        # - not merged by subsequent steps
        # but this is pretty difficult to identify at run time, so we better remove the "RemovingFiles" production status
        # and replace it with a flush (this applies only to MC).
        # So we just have a created list.
        fileTypesToKeepDirs = []
        for fileTypeToKeep in self.fileTypesToKeep:
            fileTypesToKeepDirs.extend(
                [x for x in directories if fileTypeToKeep in x])
        directories = list(
            set(directories).difference(set(fileTypesToKeepDirs)))

        directories = sorted(directories)
        return S_OK(directories)

Ejemplo n.º 23

Mostrar archivo

Archivo: dirac-test-plugin.py Proyecto: antolu/LHCbDIRAC

class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)

Ejemplo n.º 24

Mostrar archivo

########################################################################
"""
  Insert new file types in the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1],
                                     'Usage:',
                                     '  %s [option|cfgfile]' % Script.scriptName ]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

exitCode = 0

ftype = raw_input("FileType: ")
desc = raw_input("Description: ")
version = raw_input("File type version: ")
print 'Do you want to add this new file type? (yes or no)'
value = raw_input('Choice:')
choice = value.lower()
if choice in ['yes', 'y']:
  res = bk.insertFileTypes(ftype.upper(), desc, version)
  if res['OK']:
    print 'The file types added successfully!'
  else:
    print "Error discovered!", res['Message']

Ejemplo n.º 25

Mostrar archivo

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... ProdID' % Script.scriptName, 'Arguments:',
    '  ProdID:   Production ID'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()
import types

if len(args) < 1:
    Script.showHelp()

exitCode = 0

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
prod = long(args[0])

res = bk.getProductionInformations(prod)

if res['OK']:
    val = res['Value']
    print "Production Info: "

    infs = val["Production informations"]
    if infs != None:
        for inf in infs:
            if inf[2] != None:
                print '    Configuration Name:', inf[0]
                print '    Configuration Version:', inf[1]
                print '    Event type:', inf[2]

Ejemplo n.º 26

Mostrar archivo

Archivo: PopularityAgent.py Proyecto: antolu/LHCbDIRAC

class PopularityAgent(AgentModule):
    """
  .. class:: PopularityAgent

  """
    # # DataUsageClient
    __dataUsageClient = None
    # # StorageUsageDB instance or DMS/DataUsage RPS client
    __stDB = None
    # # BKK Client
    __bkClient = None
    # # work directory
    __workDirectory = None
    # # counter for records to be sent to the accouting
    numPopRows = None

    def initialize(self):
        """ agent initialisation """
        self.am_setOption('PollingTime', 43200)
        if self.am_getOption('DirectDB', False):
            self.__stDB = StorageUsageDB()
            # self.bkClient = BookkeepingClient()#the necessary method is still not available in Bookk. client
        else:
            self.__stDB = RPCClient('DataManagement/DataUsage')
            timeout = 600
        self.__bkClient = BookkeepingClient()
        self.__dataUsageClient = DataUsageClient()
        self.__workDirectory = self.am_getOption("WorkDirectory")
        mkDir(self.__workDirectory)
        self.log.info("Working directory is %s" % self.__workDirectory)
        # by default, collects raw records from Popularity table inserted in the last day
        self.timeInterval = self.am_getOption(
            'timeIntervalForPopularityRecords', 1)
        self.queryTimeout = self.am_getOption('queryTimeout', 3600)
        self.cacheMetadata = {}
        self.limitForCommit = self.am_getOption("LimitForCommit", 1000)

        return S_OK()

# .........................................................................................

    def execute(self):
        """ Main loop of Popularity agent """

        now = datetime.now()
        endTime = datetime(now.year, now.month, now.day, 0, 0, 0)
        startTime = endTime - timedelta(days=self.timeInterval)
        endTimeQuery = endTime.isoformat()
        startTimeQuery = startTime.isoformat()
        # query all traces in popularity in the time rage startTime,endTime and status =new
        # the condition to get th etraces is the AND of the time range and the status new
        self.log.info(
            "Querying Pop db to retrieve entries in time range %s - %s " %
            (startTimeQuery, endTimeQuery))
        status = 'New'
        res = self.__dataUsageClient.getDataUsageSummary(
            startTimeQuery, endTimeQuery, status, timeout=self.queryTimeout)
        if not res['OK']:
            self.log.error("Error querying Popularity table.. %s" %
                           res['Message'])
            return S_ERROR(res['Message'])
        val = res['Value']
        self.log.info("Retrieved %d entries from Popularity table" % len(val))
        # Build popularity report, and store the Ids in a  list:
        idList = set()
        traceDict = {}
        for row in val:
            self.log.debug("row: %s" % str(row))
            rowId, dirLfn, site, count, insertTime = row
            if rowId not in idList:
                idList.add(rowId)
            else:
                self.log.error("Same Id found twice! %d " % rowId)
                continue
            if dirLfn.startswith('/lhcb/user/'):
                self.log.verbose(
                    "Private user directory. No metadata stored in Bkk %s " %
                    dirLfn)
                continue
            # get the day (to do )
            dayBin = (insertTime - startTime).days
            traceDict[dayBin][dirLfn][site] = \
                traceDict.setdefault(dayBin, {}).setdefault(dirLfn, {}).setdefault(site, 0) + count

        # print a summary
        dayList = sorted(traceDict)
        for day in dayList:
            self.log.info(" ###### day %s (starting from %s ) " %
                          (day, startTimeQuery))
            self.log.info("---- %d directories touched:" % len(traceDict[day]))
            for lfn in traceDict[day]:
                self.log.verbose(" ---- lfn %s " % lfn)
                for site in traceDict[day][lfn]:
                    self.log.verbose(" -------- site  %s  count: %d " %
                                     (site, traceDict[day][lfn][site]))

        self.log.info("Retrieve meta-data information for each directory ")
        now = Time.dateTime()
        self.numPopRows = 0  # keep a counter of the records to send to accounting data-store
        for day in traceDict:
            timeForAccounting = self.computeTimeForAccounting(startTime, day)
            self.log.info("Processing day %s - time for accounting %s " %
                          (day, timeForAccounting))
            for dirLfn in traceDict[day]:
                # did = configName = configVersion = conditions = processingPass = eventType = fileType = production = "na"
                # retrieve the directory meta-data from the DirMetadata table
                self.log.info("Processing dir %s " % dirLfn)

                metaForDir = self.cacheMetadata.get(dirLfn)
                if not metaForDir:
                    dirList = [dirLfn]
                    # this could be done in a bulk query for a list of directories... TBD
                    res = self.__dataUsageClient.getDirMetadata(dirList)
                    if not res['OK']:
                        self.log.error(
                            "Error retrieving directory meta-data %s " %
                            res['Message'])
                        continue
                    dirMetadata = res['Value'].get(dirLfn)
                    if not res['Value'] or not dirMetadata:
                        self.log.info(
                            "Cache missed: query BK to retrieve '%s' metadata and store  cache"
                            % dirList)
                        res = self.__bkClient.getDirectoryMetadata(dirList)
                        if not res['OK']:
                            self.log.error("Failed to query Bookkeeping %s" %
                                           res['Message'])
                            metadata = None
                        else:
                            self.log.verbose(
                                "Successfully queried Bookkeeping, result: %s "
                                % res)
                            metadata = res['Value'].get('Successful',
                                                        {}).get(dirLfn,
                                                                [{}])[0]
                        if not metadata:
                            self.log.warn(
                                "Directory is not registered in Bookkeeping! %s "
                                % dirLfn)
                            configName = configVersion = conditions = processingPass = eventType = fileType = production = "na"
                        else:
                            metadata['Visibility'] = metadata.pop(
                                'VisibilityFlag',
                                metadata.get('Visibility', 'na'))
                            configName = metadata['ConfigName']
                            configVersion = metadata['ConfigVersion']
                            conditions = metadata['ConditionDescription']
                            processingPass = metadata['ProcessingPass']
                            eventType = metadata['EventType']
                            fileType = metadata['FileType']
                            production = metadata['Production']

                            self.log.info(
                                "Cache this entry in DirMetadata table..")
                            res = self.__dataUsageClient.insertToDirMetadata(
                                {dirLfn: metadata})
                            if not res['OK']:
                                self.log.error(
                                    "Failed to insert metadata in DirMetadata table! %s "
                                    % res['Message'])
                            else:
                                self.log.info(
                                    "Successfully inserted metadata for directory %s in DirMetadata table "
                                    % dirLfn)
                                self.log.verbose("result: %s " % res)

                    else:
                        self.log.info(
                            "Directory %s was cached in DirMetadata table" %
                            dirLfn)
                        try:
                            __did, configName, configVersion, conditions, \
                                processingPass, eventType, fileType, production = dirMetadata[0:8]
                        except BaseException:
                            self.log.error(
                                "Error decoding directory cached information",
                                dirMetadata)
                            continue
                    self.cacheMetadata[dirLfn] = (configName, configVersion,
                                                  conditions, processingPass,
                                                  eventType, fileType,
                                                  production)
                else:
                    configName, configVersion, conditions, processingPass, eventType, fileType, production = metaForDir

                for site in traceDict[day][dirLfn]:
                    usage = traceDict[day][dirLfn][site]
                    # compute the normalized usage, dividing by the number of files in the directory:
                    normUsage = usage  # to be done! after we have decided how to normalize
                    # Build record for the accounting
                    popRecord = Popularity()
                    popRecord.setStartTime(timeForAccounting)
                    popRecord.setEndTime(timeForAccounting)
                    popRecord.setValueByKey("DataType", configName)
                    popRecord.setValueByKey("Activity", configVersion)
                    popRecord.setValueByKey("FileType", fileType)
                    popRecord.setValueByKey("Production", production)
                    popRecord.setValueByKey("ProcessingPass", processingPass)
                    popRecord.setValueByKey("Conditions", conditions)
                    popRecord.setValueByKey("EventType", eventType)
                    popRecord.setValueByKey("StorageElement", site)
                    popRecord.setValueByKey("Usage", usage)
                    popRecord.setValueByKey("NormalizedUsage", normUsage)
                    res = gDataStoreClient.addRegister(popRecord)
                    if not res['OK']:
                        self.log.error("ERROR: addRegister returned: %s" %
                                       res['Message'])
                        continue
                    self.numPopRows += 1
                    self.log.info(
                        ">>> Sending record to accounting for: %s %s %s %s %s %s %s %s %s %d %d "
                        % (timeForAccounting, configName, configVersion,
                           fileType, production, processingPass, conditions,
                           eventType, site, usage, normUsage))
                    if self.numPopRows > self.limitForCommit:
                        res = self.__commitAccounting()
                        if not res['OK']:
                            return res
        # then set the status to Used
        res = self.__commitAccounting()
        if not res['OK']:
            return res
        self.log.info("Set the status to Used for %d entries" % len(idList))
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for idChunk in breakListIntoChunks(list(idList), 1000):
            res = self.__dataUsageClient.updatePopEntryStatus(
                list(idChunk), 'Used', timeout=self.queryTimeout)
            if not res['OK']:
                self.log.error(
                    "Error to update status in  Popularity table.. %s" %
                    res['Message'])
                return res
        self.log.info("Status updated to Used correctly for %s entries " %
                      len(idList))

        return S_OK()


# .........................................................................................

    def __commitAccounting(self):
        res = gDataStoreClient.commit()
        if not res['OK']:
            self.log.error(
                "while committing %d Popularity records" % self.numPopRows,
                res['Message'])
        else:
            self.log.info(
                "%s records for Popularity type successfully committed" %
                self.numPopRows)
            self.numPopRows = 0
        return res

    def computeTimeForAccounting(self, startTime, day):
        """ Compute the time for the accounting record, starting from the start time of the query and the day bin
    """
        self.log.verbose(
            "find time for accounting for startTime: %s + day %s " %
            (startTime, day))
        daysToAdd = timedelta(
            days=day, hours=12
        )  # add 12h just to put the value in the middle of time bin
        self.log.verbose("timedelta to add: %s " % daysToAdd)
        accTime = startTime + daysToAdd
        self.log.verbose("accTime = %s " % accTime)
        return accTime