]))
Script.parseCommandLine()
args = Script.getPositionalArgs()

if not len(args) == 2:
    Script.showHelp()

try:
    prodID = int(args[0])
except:
    Script.showHelp()
filetype = args[1]

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient

client = BookkeepingClient()
res = client.getProductionFiles(prodID, filetype)
if not res['OK']:
    print 'ERROR: Failed to retrieve production files: %s' % res['Message']
else:
    if not res['Value']:
        print 'No files found for production %s with type %s' % (prodID,
                                                                 filetype)
    else:
        print '%s %s %s %s %s' % ('FileName'.ljust(100), 'Size'.ljust(10),
                                  'GUID'.ljust(40), 'Replica'.ljust(8),
                                  'Visible'.ljust(8))
        for lfn in sorted(res['Value']):
            size = res['Value'][lfn]['FileSize']
            guid = res['Value'][lfn]['GUID']
            hasReplica = res['Value'][lfn]['GotReplica']
Example #2
0
########################################################################
"""
  List simulation conditions from the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
exitCode = 0

res = bk.getSimConditions()
if res['OK']:
    dbresult = res['Value']
    for record in dbresult:
        print 'SimId: ' + str(record[0]).ljust(10)
        print '  SimDescription: ' + str(record[1]).ljust(10)
        print '  BeamCond: ' + str(record[2]).ljust(10)
        print '  BeamEnergy: ' + str(record[3]).ljust(10)
        print '  Generator: ' + str(record[4]).ljust(10)
        print '  MagneticField: ' + str(record[5]).ljust(10)
        print '  DetectorCond: ' + str(record[6]).ljust(10)
        print '  Luminosity: ' + str(record[7]).ljust(10)
        print '  G4settings: ' + str(record[8]).ljust(10)
Example #3
0
class DiracLHCb(Dirac):

  #############################################################################
  def __init__(self, withRepo=False, repoLocation='', operationsHelperIn=None):
    """Internal initialization of the DIRAC API.
    """

    super(DiracLHCb, self).__init__(withRepo=withRepo, repoLocation=repoLocation)
    self.tier1s = []

    if not operationsHelperIn:
      self.opsH = Operations()
    else:
      self.opsH = operationsHelperIn

    self._bkQueryTemplate = {'SimulationConditions': 'All',
                             'DataTakingConditions': 'All',
                             'ProcessingPass': '******',
                             'FileType': 'All',
                             'EventType': 'All',
                             'ConfigName': 'All',
                             'ConfigVersion': 'All',
                             'Production': 0,
                             'StartRun': 0,
                             'EndRun': 0,
                             'DataQuality': 'All',
                             'Visible': 'Yes'}
    self._bkClient = BookkeepingClient()  # to expose all BK client methods indirectly

  #############################################################################
  def addRootFile(self, lfn, fullPath, diracSE, printOutput=False):
    """ Add a Root file to Grid storage, an attempt is made to retrieve the
        POOL GUID of the file prior to upload.

       Example Usage:

       >>> print dirac.addFile('/lhcb/user/p/paterson/myRootFile.tar.gz','myFile.tar.gz','CERN-USER')
       {'OK': True, 'Value':{'Failed': {},
        'Successful': {'/lhcb/user/p/paterson/test/myRootFile.tar.gz': {'put': 64.246301889419556,
                                                                    'register': 1.1102778911590576}}}}

       @param lfn: Logical File Name (LFN)
       @type lfn: string
       @param diracSE: DIRAC SE name e.g. CERN-USER
       @type diracSE: strin
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def addFile(self, lfn, fullPath, diracSE, printOutput=False):  # pylint: disable=arguments-differ
    """ Copy of addRootFile
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def getBKAncestors(self, lfns, depth=1, replica=True):
    """ This function allows to retrieve ancestor files from the Bookkeeping.

        Example Usage:

        >>> dirac.getBKAncestors('/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',2)
        {'OK': True, 'Value': ['/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',
        '/lhcb/data/2009/RAW/FULL/LHCb/COLLISION09/63807/063807_0000000004.raw']}

       @param lfn: Logical File Name (LFN)
       @type lfn: string or list
       @param depth: Ancestor depth
       @type depth: integer
    """

    result = self._bkClient.getFileAncestors(lfns, depth, replica=replica)
    if not result['OK']:
      self.log.error('Could not get ancestors', result['Message'])
      return result
    ancestors = set(x['FileName'] for ancestors in result['Value']['Successful'].itervalues() for x in ancestors)

    return S_OK(lfns + list(ancestors))

  #############################################################################
  def bkQueryRunsByDate(self, bkPath, startDate, endDate, dqFlag='All', selection='Runs'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ConfigurationName>/<Configuration Version>/<Condition Description><Processing Pass>/<Event Type>/<File Type>

        so an example for 2016 collisions data would be:

        /LHCb/Collision09//LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real Data/Reco16/Stripping26/90000000/EW.DST

        The startDate and endDate must be specified as yyyy-mm-dd.

        Runs can be selected based on their status e.g. the selection parameter
        has the following possible attributes:
         - Runs - data for all runs in the range are queried (default)
         - ProcessedRuns - data is retrieved for runs that are processed
         - NotProcessed - data is retrieved for runs that are not yet processed.

       Example Usage:

       >>> dirac.bkQueryRunsByDate('/LHCb/Collision16//Real Data/90000000/RAW',
                                   '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

      dirac.bkQueryRunsByDate('/LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real'
                              'Data/Reco16/Stripping26/90000000/EW.DST',
                              '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @param startDate: Start date  yyyy-mm-dd
       @param startDate: string
       @param endDate: End date  yyyy-mm-dd
       @param endDate: string
       @param selection: Either Runs, ProcessedRuns or NotProcessed
       @param selection: string
       @return: S_OK,S_ERROR
    """
    runSelection = ['Runs', 'ProcessedRuns', 'NotProcessed']
    if selection not in runSelection:
      return S_ERROR('Expected one of %s not "%s" for selection' % (', '.join(runSelection), selection))

    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkQuery = BKQuery().buildBKQuery(bkPath)
    if not bkQuery:
      return S_ERROR(
          'Please provide a BK path: '
          '/<ConfigurationName>/<Configuration Version>/<Condition Description>/<Processing Pass>'
          '/<Event Type>/<File Type>')

    if not startDate or not endDate:
      return S_ERROR('Expected both start and end dates to be defined in format: yyyy-mm-dd')

    if not isinstance(startDate, str) or not isinstance(endDate, str):
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    if not len(startDate.split('-')) == 3 or not len(endDate.split('-')) == 3:
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    start = time.time()
    result = self._bkClient.getRunsForAGivenPeriod({'StartDate': startDate, 'EndDate': endDate})
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)
    if not result['OK']:
      self.log.info('Could not get runs with given dates from BK with result: "%s"' % result)
      return result

    if not result['Value']:
      self.log.info('No runs selected from BK for specified dates')
      return result

    if selection not in result['Value']:
      return S_ERROR('No %s runs for specified dates' % (selection))

    runs = result['Value'][selection]
    self.log.info('Found the following %s runs:\n%s' % (len(runs), ', '.join([str(i) for i in runs])))
    # temporary until we can query for a discrete list of runs
    selectedData = []
    for run in runs:
      query = bkQuery.copy()
      query['StartRun'] = run
      query['EndRun'] = run
      query['CheckRunStatus'] = True if selection in ['ProcessedRuns', 'NotProcessed'] else False
      if dqFlag:
        check = self.__checkDQFlags(dqFlag)
        if not check['OK']:
          return check
        dqFlag = check['Value']
        query['DataQuality'] = dqFlag
      start = time.time()
      result = self._bkClient.getVisibleFilesWithMetadata(query)
      rtime = time.time() - start
      self.log.info('BK query time: %.2f sec' % rtime)
      self.log.verbose(result)
      if not result['OK']:
        return result
      self.log.info('Selected %s files for run %s' % (len(result['Value']), run))
      if result['Value']['LFNs']:
        selectedData += result['Value']['LFNs'].keys()

    self.log.info('Total files selected = %s' % (len(selectedData)))
    return S_OK(selectedData)

  #############################################################################
  def bkQueryRun(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<Run Number>/<Processing Pass>/<Event Type>/<File Type>

        so an example for 2009 collisions data would be:

       /63566/Real Data + RecoToDST-07/90000000/DST

       In addition users can specify a range of runs using the following convention:

       /<Run Number 1> - <Run Number 2>/<Processing Pass>/<Event Type>/<File Type>

       so extending the above example this would look like:

       /63566-63600/Real Data + RecoToDST-07/90000000/DST

       Example Usage:

       >>> dirac.bkQueryRun('/63566/Real Data/RecoToDST-07/90000000/DST')
       {'OK':True,'Value': ['/lhcb/data/2009/DST/00005842/0000/00005842_00000008_1.dst']}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if not len(bkPath) == 4:
      return S_ERROR('Expected 4 components to the BK path: /<Run Number>/<Processing Pass>/<Event Type>/<File Type>')

    runNumberString = bkPath[0].replace('--', '-').replace(' ', '')
    startRun = 0
    endRun = 0
    if '-' in runNumberString:
      runs = runNumberString.split('-')
      if len(runs) != 2:
        return S_ERROR('Could not determine run range from "%s", try "<Run 1> - <Run2>"' % (runNumberString))
      try:
        start = int(runs[0])
        end = int(runs[1])
      except Exception:
        return S_ERROR('Invalid run range: %s' % runNumberString)
      startRun = min(start, end)
      endRun = max(start, end)
    else:
      try:
        startRun = int(runNumberString)
        endRun = startRun
      except Exception:
        return S_ERROR('Invalid run number: %s' % runNumberString)

    query = self._bkQueryTemplate.copy()
    query['StartRun'] = startRun
    query['EndRun'] = endRun
    query['ProcessingPass'] = bkPath[1]
    query['EventType'] = bkPath[2]
    query['FileType'] = bkPath[3]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryProduction(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ProductionID>/[<Processing Pass>/<Event Type>/]<File Type>

        so an example for 2009 collisions data would be:

       /5842/Real Data + RecoToDST-07/90000000/DST

       Note that neither the processing pass nor the event type should be necessary. So either of them can be omitted

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryProduction('/5842/Real Data/RecoToDST-07/90000000/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if len(bkPath) < 2:
      return S_ERROR('Invalid bkPath: should at least contain /ProductionID/FileType')
    query = self._bkQueryTemplate.copy()
    try:
      query['Production'] = int(bkPath[0])
    except Exception:
      return S_ERROR('Invalid production ID')
    query['FileType'] = bkPath[-1]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    for key, val in query.items():
      if isinstance(val, basestring) and val.lower() == 'all':
        query.pop(key)
    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryPath(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

       /<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>
       /<Processing Pass>/<Event Type>/<File Type>

       so an example for 2009 collsions data would be:

       /LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data + RecoToDST-07/90000000/DST

       or for MC09 simulated data:

       /MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/2010-Sim01Reco01-withTruth/27163001/DST

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryPath('/MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/Sim07/Reco06-withTruth/10012004/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=3)
    if not len(bkPath) == 6:
      return S_ERROR('Expected 6 components to the BK path: '
                     '/<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>'
                     '/<Processing Pass>/<Event Type>/<File Type>')

    query = self._bkQueryTemplate.copy()
    query['ConfigName'] = bkPath[0]
    query['ConfigVersion'] = bkPath[1]
    query['ProcessingPass'] = bkPath[3]
    query['EventType'] = bkPath[4]
    query['FileType'] = bkPath[5]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    # The problem here is that we don't know if it's a sim or data taking condition,
    # assume that if configName=MC this is simulation
    if bkPath[0].lower() == 'mc':
      query['SimulationConditions'] = bkPath[2]
    else:
      query['DataTakingConditions'] = bkPath[2]

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bookkeepingQuery(self, SimulationConditions='All', DataTakingConditions='All',
                       ProcessingPass='******', FileType='All', EventType='All', ConfigName='All',
                       ConfigVersion='All', ProductionID=0, DataQuality='ALL'):
    """ This function will create and perform a BK query using the supplied arguments
        and return a list of LFNs.

        Example Usage:

        >>> dirac.bookkeepingQuery(ConfigName='LHCb',ConfigVersion='Collision09',
        EventType='90000000',ProcessingPass='******',DataTakingConditions='Beam450GeV-VeloOpen-MagDown')
        {'OK':True,'Value':<files>}

       @param  ConfigName: BK ConfigName
       @type ConfigName: string
       @param  EventType: BK EventType
       @type EventType: string
       @param  FileType: BK FileType
       @type FileType: string
       @param  ProcessingPass: BK ProcessingPass
       @type ProcessingPass: string
       @param  ProductionID: BK ProductionID
       @type ProductionID: integer
       @param  DataQuality: BK DataQuality
       @type DataQuality: string
       @param  ConfigVersion: BK ConfigVersion
       @type ConfigVersion: string
       @param  DataTakingConditions: BK DataTakingConditions
       @type DataTakingConditions: string
       @param  SimulationConditions: BK SimulationConditions
       @type SimulationConditions: string
       @return: S_OK,S_ERROR
    """
    query = self._bkQueryTemplate.copy()
    query['SimulationConditions'] = SimulationConditions
    query['DataTakingConditions'] = DataTakingConditions
    query['ProcessingPass'] = ProcessingPass
    query['FileType'] = FileType
    query['EventType'] = EventType
    query['ConfigName'] = ConfigName
    query['ConfigVersion'] = ConfigVersion
    query['Production'] = ProductionID
    query['DataQuality'] = DataQuality
    return self.bkQuery(query)

  #############################################################################
  def bkQuery(self, bkQueryDict):
    """ Developer function. Perform a query to the LHCb Bookkeeping to return
        a list of LFN(s). This method takes a BK query dictionary.

        Example Usage:

        >>> print dirac.bkQuery(query)
        {'OK':True,'Value':<files>}

       @param bkQueryDict: BK query
       @type bkQueryDict: dictionary (see bookkeepingQuery() for keys)
       @return: S_OK,S_ERROR
    """
    problematicFields = []
    # Remove the Visible flag as anyway the method is for visible files ;-)
    # bkQueryDict.setdefault( 'Visible', 'Yes' )
    for name, value in bkQueryDict.items():
      if name not in self._bkQueryTemplate:
        problematicFields.append(name)

    if problematicFields:
      msg = 'The following fields are not valid for a BK query: %s\nValid fields include: %s' % \
            (', '.join(problematicFields), ', '.join(self._bkQueryTemplate.keys()))
      return S_ERROR(msg)

    for name, value in bkQueryDict.items():
      if name == "Production" or name == "EventType" or name == "StartRun" or name == "EndRun":
        if value == 0:
          del bkQueryDict[name]
        else:
          bkQueryDict[name] = str(value)
      elif name == "FileType":
        if value.lower() == "all":
          bkQueryDict[name] = 'ALL'
      else:
        if str(value).lower() == "all":
          del bkQueryDict[name]

    if 'Production' in bkQueryDict or 'StartRun' in bkQueryDict or 'EndRun' in bkQueryDict:
      self.log.verbose('Found a specific query so loosening some restrictions to prevent BK overloading')
    else:
      if 'SimulationConditions' not in bkQueryDict and 'DataTakingConditions' not in bkQueryDict:
        return S_ERROR('A Simulation or DataTaking Condition must be specified for a BK query.')
      if 'EventType' not in bkQueryDict and 'ConfigName' not in bkQueryDict and 'ConfigVersion' not in bkQueryDict:
        return S_ERROR(
            'The minimal set of BK fields for a query is: EventType, ConfigName and ConfigVersion'
            ' in addition to a Simulation or DataTaking Condition')

    self.log.verbose('Final BK query dictionary is:')
    for item in bkQueryDict.iteritems():
      self.log.verbose('%s : %s' % item)

    start = time.time()
    result = self._bkClient.getVisibleFilesWithMetadata(bkQueryDict)
#    result = bk.getFilesWithGivenDataSets(bkQueryDict)
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)

    if not result['OK']:
      return S_ERROR('BK query returned an error: "%s"' % (result['Message']))

    if not result['Value']:
      return self._errorReport('No BK files selected')

    returnedFiles = len(result['Value'])
    self.log.verbose('%s files selected from the BK' % (returnedFiles))
    return result

  #############################################################################
  def __checkDQFlags(self, flags):
    """ Internal function.  Checks the provided flags against the list of
        possible DQ flag statuses from the Bookkeeping.
    """
    dqFlags = []
    if isinstance(flags, list):
      dqFlags = flags
    else:
      dqFlags = [flags]

    bkFlags = self.getAllDQFlags()
    if not bkFlags['OK']:
      return bkFlags

    final = []
    for flag in dqFlags:
      if flag.lower() == 'all':
        final.append(flag.upper())
      else:
        flag = flag.upper()
        if flag not in bkFlags['Value']:
          msg = 'Specified DQ flag "%s" is not in allowed list: %s' % (flag, ', '.join(bkFlags['Value']))
          self.log.error(msg)
          return S_ERROR(msg)
        else:
          final.append(flag)

    # when first coding this it was not possible to use a list ;)
    if len(final) == 1:
      final = final[0]

    return S_OK(final)

  #############################################################################
  def getAllDQFlags(self, printOutput=False):
    """ Helper function.  Returns the list of possible DQ flag statuses
        from the Bookkeeping.

        Example Usage:

        >>> print dirac.getAllDQFlags()
        {'OK':True,'Value':<flags>}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    result = self._bkClient.getAvailableDataQuality()
    if not result['OK']:
      self.log.error('Could not obtain possible DQ flags from BK with result:\n%s' % (result))
      return result

    if printOutput:
      flags = result['Value']
      self.log.info('Possible DQ flags from BK are: %s' % (', '.join(flags)))

    return result

  #############################################################################
  def getDataByRun(self, lfns, printOutput=False):
    """Sort the supplied lfn list by run. An S_OK object will be returned
       containing a dictionary of runs and the corresponding list of LFN(s)
       associated with them.

       Example usage:

       >>> print dirac.getDataByRun(lfns)
       {'OK': True, 'Value': {<RUN>:['<LFN>','<LFN>',...], <RUN>:['<LFN>',..]}}


       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    runDict = {}
    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    for lfn, metadata in result['Value']['Successful'].items():
      if 'RunNumber' in metadata:
        runNumber = metadata['RunNumber']
        runDict.setdefault(runNumber, []).append(lfn)
      else:
        self.log.warn('Could not find run number from BK for %s' % (lfn))

    if printOutput:
      self.log.notice(self.pPrint.pformat(runDict))

    return S_OK(runDict)

  #############################################################################
  def bkMetadata(self, lfns, printOutput=False):
    """Return metadata for the supplied lfn list. An S_OK object will be returned
       containing a dictionary of LFN(s) and the corresponding metadata associated
       with them.

       Example usage:

       >>> print dirac.bkMetadata(lfns)
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    if printOutput:
      self.log.notice(self.pPrint.pformat(result['Value']))

    return result

  #############################################################################

  def lhcbProxyInit(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-init script
    """
    os.system("dirac-proxy-init -o LogLevel=NOTICE -t --rfc %s" % "' '".join(args))

  #############################################################################

  def lhcbProxyInfo(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-info script
    """
    os.system("dirac-proxy-info -o LogLevel=NOTICE %s" % "' '".join(args))
  #############################################################################

  def gridWeather(self, printOutput=False):
    """This method gives a snapshot of the current Grid weather from the perspective
       of the DIRAC site and SE masks.  Tier-1 sites are returned with more detailed
       information.

       Example usage:

       >>> print dirac.gridWeather()
       {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    lcgSites = gConfig.getSections('/Resources/Sites/LCG')
    if not lcgSites['OK']:
      return lcgSites

    for lcgSite in lcgSites['Value']:

      tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2)
      if tier in (0, 1):
        self.tier1s.append(lcgSite)

    siteInfo = self.checkSites()
    if not siteInfo['OK']:
      return siteInfo
    siteInfo = siteInfo['Value']

    seInfo = self.checkSEs()
    if not seInfo['OK']:
      return seInfo
    seInfo = seInfo['Value']

    tierSEs = {}
    for site in self.tier1s:
      tierSEs[site] = getSEsForSite(site)['Value']

    tierInfo = {}
    for site, seList in tierSEs.items():
      tierInfo[site] = {}
      for se in seList:
        if se in seInfo:
          tierSEInfo = seInfo[se]
          tierInfo[site][se] = tierSEInfo
      if site in siteInfo['AllowedSites']:
        tierInfo[site]['MaskStatus'] = 'Allowed'
      else:
        tierInfo[site]['MaskStatus'] = 'Banned'

    if printOutput:
      self.log.notice('========> Tier-1 status in DIRAC site and SE masks')
      for site in sorted(self.tier1s):
        self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus']))
        self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))
        for se in sorted(tierSEs[site]):
          if se in tierInfo[site]:
            self.log.notice('%s %s %s' % (se.ljust(25),
                                          tierInfo[site][se]['ReadStatus'].rjust(15),
                                          tierInfo[site][se]['WriteStatus'].rjust(15))
                            )

      self.log.notice('\n========> Tier-2 status in DIRAC site mask\n')
      allowedSites = siteInfo['AllowedSites']
      bannedSites = siteInfo['BannedSites']
      for site in self.tier1s:
        if site in allowedSites:
          allowedSites.remove(site)
        if site in bannedSites:
          bannedSites.remove(site)
      self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites)))

    summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo}
    return S_OK(summary)

  #############################################################################
  def checkSites(self, printOutput=False):  # pylint: disable=no-self-use
    """Return the list of sites in the DIRAC site mask and those which are banned.

       Example usage:

       >>> print dirac.checkSites()
       {'OK': True, 'Value': {'AllowedSites':['<Site>',...],'BannedSites':[]}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    res = getSites()
    if not res['OK']:
      self.log.error('Could not get list of sites from CS', res['Message'])
      return res
    totalList = res['Value']

    res = DiracAdmin().getSiteMask()
    if not res['OK']:
      return res

    sites = res['Value']
    bannedSites = []
    for site in totalList:
      if site not in sites:
        bannedSites.append(site)

    if printOutput:
      self.log.notice('\n========> Allowed Sites\n')
      self.log.notice('\n'.join(sites))
      self.log.notice('\n========> Banned Sites\n')
      self.log.notice('\n'.join(bannedSites))
      self.log.notice('\nThere is a total of %s allowed sites and %s banned sites in the system.' % (len(sites),
                                                                                                     len(bannedSites)))

    return S_OK({'AllowedSites': sites, 'BannedSites': bannedSites})

  #############################################################################
  def checkSEs(self, printOutput=False):  # pylint: disable=no-self-use
    """Check the status of read and write operations in the DIRAC SE mask.

       Example usage:

       >>> print dirac.checkSEs()
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    res = gConfig.getSections('/Resources/StorageElements', True)

    if not res['OK']:
      self.log.error('Failed to get storage element information', res['Message'])
      return res

    if printOutput:
      self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))

    seList = sorted(res['Value'])
    result = {}
    rss = ResourceStatus()
    for se in seList:
      res = rss.getElementStatus(se, 'StorageElement')
      if not res['OK']:
        self.log.error("Failed to get StorageElement status for %s" % se)
      else:
        readState = res['Value'].get('ReadAccess', 'Active')
        writeState = res['Value'].get('WriteAccess', 'Active')
        result[se] = {'ReadStatus': readState, 'WriteStatus': writeState}
        if printOutput:
          self.log.notice('%s %s %s' % (se.ljust(25), readState.rjust(15), writeState.rjust(15)))

    return S_OK(result)

  def splitInputDataBySize(self, lfns, maxSizePerJob=20, printOutput=False):
    """Split the supplied lfn list by the replicas present at the possible
       destination sites, based on a maximum size.
       An S_OK object will be returned containing a list of
       lists in order to create the jobs.

       Example usage:

       >>> d.splitInputDataBySize(lfns,10)
       {'OK': True, 'Value': [['<LFN>'], ['<LFN>']]}


       @param lfns: Logical File Name(s) to split
       @type lfns: list
       @param maxSizePerJob: Maximum size (in GB) per bunch
       @type maxSizePerJob: integer
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    sitesForSE = {}
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except TypeError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    if not isinstance(maxSizePerJob, int):
      try:
        maxSizePerJob = int(maxSizePerJob)
      except ValueError as x:
        return self._errorReport(str(x), 'Expected integer for maxSizePerJob')
    maxSizePerJob *= 1000 * 1000 * 1000

    replicaDict = self.getReplicas(lfns)
    if not replicaDict['OK']:
      return replicaDict
    replicas = replicaDict['Value']['Successful']
    if not replicas:
      return self._errorReport(replicaDict['Value']['Failed'].items()[0],
                               'Failed to get replica information')
    siteLfns = {}
    for lfn, reps in replicas.items():
      possibleSites = set(site
                          for se in reps
                          for site in sitesForSE.setdefault(se, getSitesForSE(se).get('Value', [])))
      siteLfns.setdefault(','.join(sorted(possibleSites)), []).append(lfn)

    if '' in siteLfns:
      # Some files don't have active replicas
      return self._errorReport('No active replica found for', str(siteLfns['']))
    # Get size of files
    metadataDict = self.getLfnMetadata(lfns, printOutput)
    if not metadataDict['OK']:
      return metadataDict
    fileSizes = dict((lfn, metadataDict['Value']['Successful'].get(lfn, {}).get('Size', maxSizePerJob))
                     for lfn in lfns)

    lfnGroups = []
    # maxSize is in GB
    for files in siteLfns.values():
      # Now get bunches of files,
      # Sort in decreasing size
      files.sort(cmp=(lambda f1, f2: fileSizes[f2] - fileSizes[f1]))
      while files:
        # print [( lfn, fileSizes[lfn] ) for lfn in files]
        group = []
        sizeTot = 0
        for lfn in list(files):
          size = fileSizes[lfn]
          if size >= maxSizePerJob:
            lfnGroups.append([lfn])
          elif sizeTot + size < maxSizePerJob:
            sizeTot += size
            group.append(lfn)
            files.remove(lfn)
        if group:
          lfnGroups.append(group)

    if printOutput:
      self.log.notice(self.pPrint.pformat(lfnGroups))
    return S_OK(lfnGroups)

    #############################################################################

  def getAccessURL(self, lfn, storageElement, protocol=None, printOutput=False):
    """Allows to retrieve an access URL for an LFN replica given a valid DIRAC SE
       name.  Contacts the file catalog and contacts the site SRM endpoint behind
       the scenes.

       Example Usage:

       >>> print dirac.getAccessURL('/lhcb/data/CCRC08/DST/00000151/0000/00000151_00004848_2.dst','CERN-RAW')
       {'OK': True, 'Value': {'Successful': {'srm://...': {'SRM2': 'rfio://...'}}, 'Failed': {}}}

       :param lfn: Logical File Name (LFN)
       :type lfn: str or python:list
       :param storageElement: DIRAC SE name e.g. CERN-RAW
       :type storageElement: string
       :param printOutput: Optional flag to print result
       :type printOutput: boolean
       :returns: S_OK,S_ERROR
    """
    ret = self._checkFileArgument(lfn, 'LFN')
    if not ret['OK']:
      return ret
    lfn = ret['Value']
    if isinstance(lfn, basestring):
      lfn = [lfn]
    results = getAccessURL(lfn, storageElement, protocol=protocol)
    if printOutput:
      printDMResult(results, empty="File not at SE", script="dirac-dms-lfn-accessURL")
    return results

  #############################################################################

  def _getLocalInputData(self, parameters):
    """ LHCb extension of DIRAC API's _getLocalInputData. Only used for handling ancestors.
    """
    inputData = parameters.get('InputData')
    if inputData:
      self.log.debug("DiracLHCb._getLocalInputData. InputData: %s" % inputData)
      if isinstance(inputData, basestring):
        inputData = inputData.split(';')
      inputData = [lfn.strip('LFN:') for lfn in inputData]
      ancestorsDepth = int(parameters.get('AncestorDepth', 0))
      if ancestorsDepth:
        self.log.debug("DiracLHCb._getLocalInputData. ancestorsDepth: %d" % ancestorsDepth)
        res = self._bkClient.getFileAncestors(inputData, ancestorsDepth)
        if not res['OK']:
          self.log.error("Can't get ancestors", res['Message'])
          return res
        ancestorsLFNs = []
        for ancestorsLFN in res['Value']['Successful'].itervalues():
          ancestorsLFNs += [i['FileName'] for i in ancestorsLFN]
        self.log.info("DiracLHCb._getLocalInputData: adding %d ancestors" % len(ancestorsLFNs))
        self.log.verbose("%s", ', '.join(ancestorsLFNs))
        inputData += ancestorsLFNs

    return S_OK(inputData)
Example #4
0
    transformation.setPlugin(plugin)
    transformation.setBkQuery(bkQueryDict)

    from LHCbDIRAC.TransformationSystem.Agent.TransformationPlugin import TransformationPlugin
    transID = -9999
    pluginParams['TransformationID'] = transID
    pluginParams['Status'] = "Active"
    pluginParams['Type'] = transType
    # Create a fake transformation client
    fakeClient = fakeClient(transformation, transID, lfns, asIfProd)
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    oplugin = TransformationPlugin(plugin,
                                   transClient=fakeClient,
                                   dataManager=DataManager(),
                                   bkClient=BookkeepingClient())
    pluginParams['TransformationID'] = transID
    pluginParams.update(pluginSEParams)
    oplugin.setParameters(pluginParams)
    replicas = fakeClient.getReplicas()
    # Special case of RAW files registered in CERN-RDST...
    if plugin == "AtomicRun":
        for lfn in [lfn for lfn in replicas if "CERN-RDST" in replicas[lfn]]:
            ses = {}
            for se in replicas[lfn]:
                pfn = replicas[lfn][se]
                if se == "CERN-RDST":
                    se = "CERN-RAW"
                ses[se] = pfn
            replicas[lfn] = ses
    files = fakeClient.getFiles()
import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... LFN|File Flag' % Script.scriptName,
    'Arguments:', '  LFN:      Logical File Name',
    '  File:     Name of the file with a list of LFNs',
    '  Flag:     Quality Flag'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

if len(args) < 2:
    result = bk.getAvailableDataQuality()
    if not result['OK']:
        print 'ERROR: %s' % (result['Message'])
        DIRAC.exit(2)
    flags = result['Value']
    print "Available Data Quality Flags"
    for flag in flags:
        print flag
    Script.showHelp()

exitCode = 0
filename = args[0]
flag = args[1]
                      " output file name [dirac-dms-chec-dir-cont.out]")
Script.registerSwitch("v", "Verbose",
                      " use this option for verbose output [False]")

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1],
    'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName,
]))

Script.parseCommandLine(ignoreErrors=False)

from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
from DIRAC.Resources.Storage.StorageElement import StorageElement
from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bkClient = BookkeepingClient()

outputFileName = 'dirac-dms-chec-dir-cont.out'

for switch in Script.getUnprocessedSwitches():
    if switch[0].lower() == "u" or switch[0].lower() == "unit":
        unit = switch[1]
    if switch[0] == "D" or switch[0].lower() == "dir":
        dir = switch[1]
    if switch[0] == "f" or switch[0].lower() == "output":
        outputFile = switch[1]
        outputFileName = outputFile
    if switch[0] == "v" or switch[0].lower() == "verbose":
        verbose = True

if verbose:
Example #7
0
#!/usr/bin/env python

import sys
import random
import time

from DIRAC.Core.Base.Script import parseCommandLine
parseCommandLine()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
cl = BookkeepingClient()

res = cl.getAvailableProductions()
if not res['OK']:
  print res['Message']
  sys.exit(0)

allproductions = sorted([i[0] for i in res['Value']], reverse=True)
productions = allproductions[:10000]


class Transaction(object):

  def __init__(self):
    self.custom_timers = {}

  def run(self):
    # print len(datasets)
    i = random.randint(0, len(productions) - 1)
    production = productions[i]
    # print dataset
Example #8
0
# DIRAC.exit(2)

#
# Processing pass needs to start as "/Real Data" for FULL stream flagging
#

if realData not in processing:
    print 'You forgot /Real Data in the processing pass:  '******'OK']:
    gLogger.error('Cannot load the information for run %s' % (run))
    gLogger.error(res['Message'])
    DIRAC.exit(2)

dtd = res['Value']['DataTakingDescription']
configName = res['Value']['Configuration Name']
configVersion = res['Value']['Configuration Version']

bkDict = {
    'ConfigName': configName,
    'ConfigVersion': configVersion,
    'ConditionDescription': dtd
}
    ]))

    Script.parseCommandLine(ignoreErrors=False)
    dumpList = False
    for switch in Script.getUnprocessedSwitches():
        if switch[0] == 'List':
            dumpList = True

    bkQuery = dmScript.getBKQuery()
    lfns = dmScript.getOption('LFNs', [])
    if not bkQuery and not lfns:
        gLogger.error("No BKQuery and no files given...")
        dExit(1)
    # Invert the visibility flag as want to set Invisible those that are visible and vice-versa
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    bk = BookkeepingClient()

    visibilityFlag = dmScript.getOption('Visibility', None)
    if visibilityFlag is None:
        gLogger.error('Visibility option should be given')
        dExit(2)
    visibilityFlag = str(visibilityFlag).lower() == 'yes'
    if bkQuery:
        # Query with visibility opposite to what is requested to be set ;-)
        bkQuery.setOption('Visible', 'No' if visibilityFlag else 'Yes')
        gLogger.notice("BQ query:", bkQuery)
        lfns += bkQuery.getLFNs()
    if not lfns:
        gLogger.notice("No files found...")
    else:
        res = {'OK': True}
Example #10
0
class ProcessingProgress( object ):

  def __init__( self, cacheFile = None ):
    if not cacheFile:
      self.prodStatFile = os.path.join( os.environ['HOME'], ".dirac/work", "dirac-production-stats.pkl" )
    else:
      self.prodStatFile = cacheFile
    self.cacheVersion = '0.0'
    self.clearCache = []
    self.cachedInfo = {}

    # Recuperate the previous cached information
    self.readCache()

    self.bk = BookkeepingClient()
    self.transClient = TransformationClient()

  def setClearCache( self, clearCache ):
    self.clearCache = clearCache

  def __getProdBkDict( self, prod ):
    res = self.transClient.getBookkeepingQuery( prod )
    if not res['OK']:
      gLogger.error( "Couldn't get BK query on production %d" % prod )
      return {}
    prodBKDict = res['Value']
    return prodBKDict

  def getFullStats( self, bkQuery, printResult = False ):
    processingPass = bkQuery.getProcessingPass()
    if printResult:
      gLogger.info( "\nStatistics for processing %s, condition %s\n" % ( processingPass, bkQuery.getConditions() ) )
    prodStats = []
    processingPass = processingPass.split( '/' )
    if len( processingPass ) != 4 or processingPass[1] != "Real Data":
      gLogger.error( "Processing pass should be /Real Data/<Reco>/<Stripping>" )
      return []

    # Get production numbers for the Reco
    recoBKQuery = BKQuery( bkQuery )
    recoBKQuery.setProcessingPass( '/'.join( processingPass[0:3] ) )
    recoList = recoBKQuery.getBKProductions( visible = False )
    recoRunRanges = {}
    recoDQFlags = []
    for prod in recoList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        recoRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
        dqFlags = prodBKDict.get( "DataQualityFlag", ['UNCHECKED', 'EXPRESS_OK', 'OK'] )
        if isinstance( dqFlags, basestring ):
          dqFlags = dqFlags.split( ',' )
        recoDQFlags += [fl for fl in dqFlags if fl not in recoDQFlags]
      else:
        recoRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      recoList.sort( cmp = ( lambda p1, p2: int( recoRunRanges[p1][0] - recoRunRanges[p2][1] ) ) )
    except:
      print "Exception in sorting productions:"
      for p in recoList:
        print p, recoRunRanges[p]
    gLogger.verbose( "Reconstruction productions found (%d): %s" % ( len( recoList ), str( sorted( recoList ) ) ) )
    gLogger.verbose( "Reconstruction DQ flags: %s" % str( recoDQFlags ) )

    # Get productions for merging
    mergeList = []
    mergeStripProds = {}
    # Get stripping productions as parents of merging productions
    stripList = []
    for prod in bkQuery.getBKProductions( visible = False ):
      prodBKDict = self.__getProdBkDict( prod )
      gLogger.verbose( "BK query for production %s: %s" % ( prod, str( prodBKDict ) ) )
      mergedTypes = prodBKDict.get( 'FileType' )
      if type( mergedTypes ) != type( [] ):
        mergedTypes = [mergedTypes]
      if [ft for ft in bkQuery.getFileTypeList() if ft in mergedTypes] and 'ProductionID' in prodBKDict:
        mergeList.append( prod )
        prods = prodBKDict['ProductionID']
        if type( prods ) != type( [] ):
          prods = [prods]
        stripList += prods
        mergeStripProds[prod] = [str( p ) for p in prods]
      else:
        _msgTuple = ( str( bkQuery.getFileTypeList() ), prod, str( prodBKDict ) )
        gLogger.verbose( "Could not find production or filetype %s in BKquery of production %d (%s)" % _msgTuple )
    mergeList.sort( cmp = ( lambda p1, p2: int( mergeStripProds[p1][0] ) - int( mergeStripProds[p2][0] ) ) )
    gLogger.verbose( "Merging productions found: %s" % str( mergeList ) )

    # get list of stripping productions (from merging)
    stripRunRanges = {}
    for prod in stripList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        stripRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
      else:
        stripRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      stripList.sort( cmp = ( lambda p1, p2: int( stripRunRanges[p1][0] - stripRunRanges[p2][1] ) ) )
    except:
      print "Error when sorting stripping productions:"
      for prodStrip in stripList:
        print prodStrip, stripRunRanges[prodStrip]
    gLogger.verbose( "Stripping productions found (%d): %s" % ( len( stripList ), str( sorted( stripList ) ) ) )

    # Get all runs corresponding to the run range used by the Reco productions
    rawBKQuery = BKQuery( bkQuery )
    rawBKQuery.setProcessingPass( '/Real Data' )
    rawBKQuery.setFileType( "RAW" )
    # get the list of runs (-prodNum)
    fullRunList = rawBKQuery.getBKRuns()
    gLogger.verbose( "Initial list of runs: %s" % str( fullRunList ) )
    recoRunList = []
    openProd = False
    for prod in [p for p in recoList]:
      # Forget fully openProd productions
      # Don't consider productions without a BK query (these are individual files)
      if recoRunRanges[prod][1] == sys.maxint and recoRunRanges[prod][0] != -sys.maxint:
        openProd = True
        # Try and find if that open production overlaps with a closed one, in which case, remove it
        # Do nothing for derived productions
        for p in [p for p in recoList if p != prod and recoRunRanges[prod] != recoRunRanges[p]]:
          if recoRunRanges[prod][0] < recoRunRanges[p][1] and recoRunRanges[p][1] != sys.maxint:
            openProd = False
            gLogger.verbose( "Production %s was removed as redundant..." % str( prod ) )
            recoList.remove( prod )
            break
        if not openProd: continue
      recoRunList += [run for run in fullRunList if run not in recoRunList and run >= recoRunRanges[prod][0] and run <= recoRunRanges[prod][1]]
    gLogger.verbose( "List of runs matching Reco (%d): %s" % ( len( recoRunList ), str( sorted( recoRunList ) ) ) )

    restrictToStripping = True
    if restrictToStripping and not openProd and stripList:
      runList = []
      for prod in stripList:
        runList += [run for run in recoRunList if run not in runList and run >= stripRunRanges[prod][0] and run <= stripRunRanges[prod][1]]
    else:
      runList = recoRunList
    gLogger.verbose( "Final list of runs matching Reco and Stripping (%d): %s" % ( len( runList ), str( sorted( runList ) ) ) )

    # Now get statistics from the runs
    info, runInfo = self._getStatsFromRuns( int( bkQuery.getEventTypeList()[0] ), runList, recoDQFlags )
    rawInfo = StatInfo( processingPass[1], info )
    prodStats.append( rawInfo )
    if printResult:
      gLogger.info( "%s - All runs in Reco productions" % processingPass[1] )
      for fileInfo in runInfo:
        if runInfo[fileInfo]:
          gLogger.info( "%s runs (%d): %s" % ( fileInfo, len( runInfo[fileInfo] ), str( runInfo[fileInfo] ) ) )
      summStr = "%s files, " % rawInfo.getItemAsString( 'Files' )
      summStr += "%s events in " % rawInfo.getItemAsString( 'Events' )
      summStr += "%s runs, luminosity (pb-1):All=%s, Bad=%s, OK=%s" % ( rawInfo.getItemAsString( 'Runs' ),
                                                                        rawInfo.getItemAsString( 'Lumi' ),
                                                                        rawInfo.getItemAsString( 'BadLumi' ),
                                                                        rawInfo.getItemAsString( 'OKLumi' ) )
      gLogger.info( summStr )

    # Create the info for the 3 sets of productions
    prodSets = []
    fileType = bkQuery.getFileTypeList()[0]
    prodSets.append( {'Name': processingPass[2], 'FileType': ['SDST', 'FULL.DST'], 'List':recoList,
                      'RunRange':recoRunRanges, 'MotherProds':None, 'AllReplicas':False } )
    prodSets.append( {'Name': processingPass[3], 'FileType': fileType, 'List':stripList,
                      'RunRange':stripRunRanges, 'MotherProds':None, 'AllReplicas':True, 'StatForOK':False } )
    prodSets.append( {'Name': "Merging (%s)" % fileType.split( '.' )[0], 'FileType': fileType, 'List':mergeList,
                      'RunRange':None, 'MotherProds':mergeStripProds, 'AllReplicas':False } )

    prevInfo = rawInfo
    for prodSet in prodSets:
      info = StatInfo( prodSet['Name'], self._getProdInfo( prodSet, runList, printResult = printResult ) )
      info.setRawInfo( rawInfo )
      info.setPrevInfo( prevInfo )
      prevInfo = info
      prodStats.append( info )

    self.saveCache()
    return prodStats

  @staticmethod
  def __sumProdInfo( info, totInfo ):
    for inf in info:
      for flag in info[inf]:
        if inf == 'Runs':
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, [] ) + info[inf][flag]
        else:
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, 0 ) + info[inf][flag]
    return totInfo

  def _getProdInfo( self, prodSet, runList, printResult = False ):
    totInfo = {}
    if printResult:
      gLogger.info( "" )
    for prod in prodSet['List']:
      info, runInfo = self._getStatsFromBK( prod, prodSet['FileType'], runList, prodSet['AllReplicas'] )
      if info['Files'][''] == 0:
        continue
      if not prodSet.get( 'StatForOK', True ):
        for item in info:
          for fl in info[item]:
            if fl == 'OK':
              info[item][fl] = 0 if not item == 'Runs' else []
      runRange = prodSet['RunRange']
      if runRange and prod in runRange and runRange[prod][0] == 0 and runRange[prod][1] == 0:
        for flag in info['Runs']:
          info['Runs'][flag] = []
      totInfo = self.__sumProdInfo( info, totInfo )
      if printResult:
        summStr = "%s production %d -" % ( prodSet['Name'], prod )
        if runRange and prod in runRange:
          firstRun = runRange[prod][0]
          lastRun = runRange[prod][1]
          if firstRun:
            summStr += " From run %d" % int( firstRun )
          if lastRun and lastRun != sys.maxint:
            summStr += " Up to run %d" % int( lastRun )
          if firstRun == 0 and lastRun == 0:
            summStr += "No run range specified"
        motherProds = prodSet['MotherProds']
        if motherProds and prod in motherProds:
          summStr += " from productions %s" % motherProds[prod]
        gLogger.info( summStr )
        for inf in runInfo:
          if runInfo[inf]:
            gLogger.info( "%s runs (%d): %s" % ( inf, len( runInfo[inf] ), str( runInfo[inf] ) ) )
        summStr = "%d files, " % info['Files']['']
        if info['Events']:
          summStr += "%d events in " % info['Events']['']
        _msgTuple = ( len( info['Runs'][''] ), info['Lumi'][''], info['Lumi']['Bad'], info['Lumi']['OK'] )
        summStr += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f" % _msgTuple
        gLogger.info( summStr )
    for flag in totInfo.get( 'Runs', [] ):
      totInfo['Runs'][flag] = len( totInfo['Runs'][flag] )
    return totInfo

  @staticmethod
  def outputResults( conditions, processingPass, prodStats ):
    outputString = ""
    _msgTuple = ( conditions, ",", processingPass, "on", time.ctime( time.time() ) )
    outputString += "\nProduction progress for %s %s %s %s %s\n" % _msgTuple
    if len( prodStats ) < 4:
      outputString += "No statistics found for this BK query"
      return outputString
    for i in xrange( 4 ):
      info = prodStats[i]
      if not info:
        continue
      name = info.getName()
      outputString += "\nSummary for %s\n" % name
      outputString += "%d files, " % info.getItem( 'Files' )
      if info.getItem( 'Events' ):
        outputString += "%d events in " % info.getItem( 'Events' )
      _msgTuple = ( info.getItem( 'Runs' ), info.getItem( 'Lumi' ), info.getItem( 'BadLumi' ), info.getItem( 'OKLumi' ) )
      outputString += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f\n" % _msgTuple
      prevStats = prodStats[:i]
      prevStats.reverse()
      for prevInfo in prevStats:
        name = prevInfo.getName()
        if prevInfo.getItem( 'Runs' ) == 0:
          outputString += "From %s : - No runs...\n" % name
        else:
          outputString += "From %s : %.1f%% files, " % ( name, 100.*info.getItem( 'Files' ) / prevInfo.getItem( 'Files' ) )
          if info.getItem( 'Events' ) and prevInfo.getItem( 'Events' ):
            outputString += "%.1f%% events\n" % ( 100.*info.getItem( 'Events' ) / prevInfo.getItem( 'Events' ) )
          outputString += "%.1f%% runs, %.1f%% luminosity\n" \
                          % ( 100. * info.getItem( 'Runs' ) / prevInfo.getItem( 'Runs' ),
                              100. * info.getItem( 'Lumi' ) / prevInfo.getItem( 'Lumi' ) )
    return outputString

  def __getRunsDQFlag( self, runList, evtType ):
    res = self.bk.getRunFilesDataQuality( runList )
    runFlags = {}
    if res['OK']:
      dqFlags = res['Value']
      for dq in dqFlags:
        if dq[2] == evtType:
          runFlags.setdefault( dq[0], [] ).append( dq[1] )
    runDQFlags = {}
    flags = ( 'BAD', 'OK', 'EXPRESS_OK', 'UNCHECKED' )
    for run in runFlags:
      for fl in flags:
        if fl in runFlags[run]:
          runDQFlags[run] = fl
          break
    return runDQFlags

  def _getStatsFromRuns( self, evtType, runList, recoDQFlags ):
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )
    now = datetime.datetime.utcnow()
    # Set to True to renew the cache
    clearCache = 'RAW' in self.clearCache
    newRuns = [ run for run in runList if clearCache
                or run not in self.cachedInfo
                or 'DQFlag' not in self.cachedInfo[run]
                or ( now - self.cachedInfo[run]['Time'] ) < datetime.timedelta( days = 2 )   ]
    if newRuns:
      runFlags = self.__getRunsDQFlag( newRuns, evtType )
    else:
      runFlags = {}
    runsByDQFlag = {}
    runInfo = {}
    for run in runList:
      cached = self.cachedInfo.get( run, {} )
      cachedTime = cached.get( 'Time', None )
      if run not in newRuns:
        cachedFiles = cached.get( 'Files', 0 )
        cachedEvents = cached.get( 'EventStat', 0 )
        cachedLumi = cached.get( 'Luminosity', 0 )
        dqFlag = cached.get( 'DQFlag', None )
      else:
        res = self.bk.getRunInformations( run )
        if res['OK']:
          val = res['Value']
          ind = val['Stream'].index( 90000000 )
          cachedFiles = val['Number of file'][ind]
          cachedEvents = val['Number of events'][ind]
          cachedLumi = val['luminosity'][ind]
          cachedTime = val['RunStart']
        else:
          gLogger.error( "Unable to get run information for run %s" % str( run ) )
          continue
        dqFlag = runFlags[run]
      self.cachedInfo[run] = { 'Time':cachedTime, 'Files':cachedFiles, 'EventStat': cachedEvents,
                              'Luminosity': cachedLumi, 'DQFlag':dqFlag }
      runsByDQFlag[dqFlag] = runsByDQFlag.setdefault( dqFlag, 0 ) + 1
      if dqFlag == "BAD":
        runInfo.setdefault( 'BAD', [] ).append( run )
      elif dqFlag not in recoDQFlags and dqFlag != 'OK' :
        runInfo.setdefault( 'Untagged', [] ).append( run )
      # Now count...
      flags = []
      if dqFlag != 'BAD':
        flags.append( '' )
        # OK in recoDQFlags means we take everything that is not BAD (reprocessing or new convention)
        if dqFlag in recoDQFlags or dqFlag == 'OK':
          flags.append( 'OK' )
      else:
        flags.append( 'Bad' )
      for flag in flags:
        info['Runs'][flag] += 1
        info['Files'][flag] += cachedFiles
        info['Events'][flag] += cachedEvents
        info['Lumi'][flag] += cachedLumi

    # Set lumi in pb-1
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    gLogger.info( "Runs per flag:" )
    for key in runsByDQFlag:
      gLogger.info( "%s : %d" % ( key, runsByDQFlag[key] ) )
    for flag in runInfo:
      runInfo[flag].sort()
    return info, runInfo

  def __getLfnsMetadata( self, lfns ):
    lfnDict = {}
    if len( lfns ):
      gLogger.verbose( "Getting metadata for %d files" % len( lfns ) )
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata, retrying...", res['Message'] )
          else:
            break
        metadata = res['Value']['Successful']
        for lfn in lfnChunk:
          lfnDict[lfn] = {}
          for meta in ( 'EventStat', 'Luminosity', 'DQFlag', 'RunNumber' ):
            lfnDict[lfn][meta] = metadata[lfn][meta]
    return lfnDict

  def _getStatsFromBK( self, prod, fileType, runList, allReplicas ):
    bkQueryDict = { "ProductionID": prod, "FileType": fileType }
    bkStr = str( bkQueryDict )
    bkQuery = BKQuery( bkQueryDict, visible = not allReplicas )
    if allReplicas:
      bkQuery.setOption( 'ReplicaFlag', "All" )
    cached = self.cachedInfo.get( bkStr, {} )
    cachedTime = cached.get( 'Time', None )
    cachedLfns = cached.get( 'Lfns', {} )
    if isinstance( fileType, basestring ):
      fileType = [fileType]
    if set( fileType ).intersection( set( self.clearCache ) ):
      cachedTime = datetime.datetime.utcnow() - datetime.timedelta( days = 8 )
      cachedTime = None
      cachedLfns = {}
      gLogger.verbose( "Cleared cache for production %s, file type %s" % ( str( prod ), fileType ) )
    # Update if needed the cached information on LFNs
    if cachedLfns:
      lfns = [lfn for lfn in cachedLfns if cachedLfns[lfn].get( 'DQFlag' ) not in ( 'OK', 'BAD' )]
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        #  get the DQFlag of files that are not yet OK
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata for cached files, bkQuery %s: %s" % ( bkStr, res['Message'] ) )
          else:
            metadata = res['Value']['Successful']
            for lfn in lfnChunk:
              cachedLfns[lfn]['DQFlag'] = metadata[lfn]['DQFlag']
            break

    # Now get the new files since last time...
    if cachedTime:
      bkQuery.setOption( 'StartDate', cachedTime.strftime( '%Y-%m-%d %H:%M:%S' ) )
    gLogger.verbose( "Getting files for BKQuery %s" % str( bkQuery ) )
    cachedTime = datetime.datetime.utcnow()
    lfns = [lfn for lfn in bkQuery.getLFNs( printOutput = False ) if lfn not in cachedLfns]
    gLogger.verbose( "Returned %d files" % len( lfns ) )
    cachedLfns.update( self.__getLfnsMetadata( lfns ) )

    self.cachedInfo[bkStr] = { 'Time':cachedTime, 'Lfns':cachedLfns }

    # Now sum up all information for the files
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      if inf == 'Runs':
        for flag in ( 'Bad', 'OK', '' ):
          info[inf][flag] = []
      else:
        info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )

    for lfn in cachedLfns:
      lfnInfo = cachedLfns[lfn]
      run = lfnInfo['RunNumber']
      if run in runList and run in self.cachedInfo and self.cachedInfo[run]['DQFlag'] != 'BAD':
        dqFlag = cachedLfns[lfn]['DQFlag']
        flags = []
        if dqFlag != 'BAD':
          flags.append( '' )
        else:
          flags.append( 'Bad' )
        if dqFlag == 'OK':
          flags.append( 'OK' )
        for flag in flags:
          if run not in info['Runs'][flag]:
            info['Runs'][flag].append( run )
          info['Files'][flag] += 1
          info['Events'][flag] += lfnInfo['EventStat']
          info['Lumi'][flag] += lfnInfo['Luminosity']

    runInfo = {}
    if 'BAD' in info['Runs']:
      runInfo['BAD'] = info['Runs']['BAD']
      runInfo['BAD'].sort()
    else:
      runInfo['BAD'] = []
    if '' in info['Runs'] and 'OK' in info['Runs']:
      runInfo['Untagged'] = [run for run in info['Runs'][''] if run not in info['Runs']['OK']]
      runInfo['Untagged'].sort()
    else:
      runInfo['Untagged'] = []
    # for f in info['Runs']:
    #  info['Runs'][f] = len( info['Runs'][f] )
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    return info, runInfo

  def getPreviousStats( self, processingPass ):
    prevStats = self.cachedInfo.get( 'ProdStats', {} ).get( processingPass )
    if prevStats:
      try:
        _name = prevStats['ProdStats'][0][0].getName()
      except:
        prevStats = None
    return prevStats

  def setPreviousStats( self, processingPass, prevStats ):
    self.cachedInfo.setdefault( 'ProdStats', {} )[processingPass] = prevStats
    self.saveCache()

  def readCache( self ):
    if not os.path.exists( self.prodStatFile ):
      gLogger.info( "Created cached file %s" % self.prodStatFile )
      self.cachedInfo = {}
      self.saveCache()
      return
    fileRead = False
    while not fileRead:
      try:
        with FileLock( self.prodStatFile ):
          lFile = open( self.prodStatFile, 'r' )
          cachedVersion = pickle.load( lFile )
          startTime = time.time()
          if cachedVersion == self.cacheVersion:
            self.cachedInfo = pickle.load( lFile )
            _msgTuple = ( self.prodStatFile, time.time() - startTime )
            gLogger.info( "Loaded cached information from %s in %.3f seconds" % _msgTuple )
          else:
            _msgTuple = ( cachedVersion, self.cacheVersion )
            gLogger.info( "Incompatible versions of cache, reset information (%s, expect %s)" % _msgTuple )
            self.cachedInfo = {}
          lFile.close()
          fileRead = True
      except FileLockException, error:
        gLogger.error( "Lock exception: %s while reading pickle file %s" % ( error, self.prodStatFile ) )
      except:
Example #11
0
        inputFiles = True
        outputFiles = True
    jobidList = []
    for jobid in args:
        if os.path.exists(jobid):
            bkScript.setJobidsFromFile(jobid)
        else:
            jobidList += jobid.split(',')
    jobidList += bkScript.getOption('JobIDs', [])
    if not jobidList:
        print "No jobID provided!"
        Script.showHelp()
        DIRAC.exit(0)

    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    retVal = BookkeepingClient().getJobInputOutputFiles(jobidList)
    if retVal['OK']:
        success = retVal['Value']['Successful']
        for job in success:
            # Remove from input the files that are also output! This happens because the output of step 1 can be the input of step 2...
            # only worth if input files are requested though
            if inputFiles:
                success[job]['InputFiles'] = sorted(
                    set(success[job]['InputFiles']) -
                    set(success[job]['OutputFiles']))

            if not inputFiles or not outputFiles:
                success[job].pop(
                    'InputFiles' if not inputFiles else 'OutputFiles')

    printDMResult(retVal, empty="File does not exists in the Bookkeeping")
def execute():
    """
  Parse the options and execute the script
  """
    bkQuery = dmScript.getBKQuery()
    fileType = bkQuery.getFileTypeList()
    if not set(fileType) & {'FULL.DST', 'RDST', 'SDST'}:
        gLogger.error("Please provide a reconstruction BK path")
        DIRAC.exit(1)

    from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    from DIRAC.Core.Utilities.List import breakListIntoChunks
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers, resolveSEGroup

    bk = BookkeepingClient()
    tr = TransformationClient()
    dm = DataManager()
    dmsHelper = DMSHelpers()

    bkQueryDict = bkQuery.getQueryDict()
    gLogger.notice("For BK Query:", str(bkQueryDict))
    progressBar = ProgressBar(1, title="Running BK query...", step=1)
    res = bk.getFilesWithMetadata(bkQueryDict)
    if not res['OK']:
        gLogger.error("Error getting files from BK", res['Message'])
        DIRAC.exit(2)

    if 'ParameterNames' in res.get('Value', {}):
        parameterNames = res['Value']['ParameterNames']
        info = res['Value']['Records']
        progressBar.endLoop("Obtained %d files" % len(info))
    else:
        gLogger.error('\nNo metadata found')
        DIRAC.exit(3)
    lfns = []
    runLFNs = {}
    for item in info:
        metadata = dict(zip(parameterNames, item))
        lfn = metadata['FileName']
        lfns.append(lfn)
        runLFNs.setdefault(metadata['RunNumber'], []).append(lfn)

    chunkSize = 1000
    progressBar = ProgressBar(len(lfns),
                              title='Getting replicas of %d files' % len(lfns),
                              chunk=chunkSize)
    replicas = {}
    errors = {}
    for lfnChunk in breakListIntoChunks(lfns, chunkSize):
        progressBar.loop()
        res = dm.getReplicas(lfnChunk, getUrl=False)
        if not res['OK']:
            errors.setdefault(res['Message'], []).extend(lfnChunk)
        else:
            replicas.update(res['Value']['Successful'])
            for lfn, error in res['Value']['Failed'].iteritems():
                errors.setdefault(error, []).append(lfn)
    progressBar.endLoop()
    for error, lfns in errors.iteritems():
        gLogger.error(error, 'for %d files' % len(lfns))

    tier1RDST = set(resolveSEGroup('Tier1-RDST'))
    setOK = 0
    errors = {}
    progressBar = ProgressBar(len(runLFNs),
                              title='Defining destination for %d runs' %
                              len(runLFNs),
                              step=10)
    for run, lfns in runLFNs.iteritems():
        progressBar.loop()
        res = tr.getDestinationForRun(run)
        if res.get('Value'):
            errors.setdefault('Destination already set', []).append(str(run))
            continue
        # print 'Run', run, len( lfns ), 'Files', lfns[:3]
        seCounts = {}
        for lfn in lfns:
            for se in tier1RDST.intersection(replicas.get(lfn, [])):
                seCounts[se] = seCounts.setdefault(se, 0) + 1
        # print seCounts
        maxi = 0
        seMax = None
        for se, count in seCounts.iteritems():
            if count > maxi:
                seMax = se
                maxi = count
        if not seMax:
            errors.setdefault('No SE found, use CERN-RDST',
                              []).append(str(run))
            seMax = 'CERN-RDST'
        # SE found, get its site
        res = dmsHelper.getLocalSiteForSE(seMax)
        if res['OK']:
            site = res['Value']
            res = tr.setDestinationForRun(run, site)
            if not res['OK']:
                errors.setdefault(res['Message'], []).append(str(run))
            else:
                setOK += 1
    progressBar.endLoop('Successfully set destination for %d runs' % setOK)
    for error, runs in errors.iteritems():
        gLogger.error(error, 'for runs %s' % ','.join(runs))
Example #13
0
# Author :  Zoltan Mathe
########################################################################
"""
  List file types from the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile]' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
exitCode = 0

mfiletypes = []
res = bk.getAvailableFileTypes()

if res['OK']:
    dbresult = res['Value']
    print 'Filetypes:'
    for record in dbresult['Records']:
        print str(record[0]).ljust(30) + str(record[1])

DIRAC.exit(exitCode)
########################################################################
"""
  Insert a new set of simulation conditions in the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ...' % Script.scriptName
]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

exitCode = 0

desc = raw_input("SimDescription: ")
beamcond = raw_input("BeamCond: ")
beamEnergy = raw_input("BeamEnergy: ")
generator = raw_input("Generator: ")
magneticField = raw_input("MagneticField: ")
detectorCond = raw_input("DetectorCond: ")
luminosity = raw_input("Luminosity: ")
g4settings = raw_input("G4settings: ")
print 'Do you want to add these new simulation conditions? (yes or no)'
value = raw_input('Choice:')
choice = value.lower()
if choice in ['yes', 'y']:
Example #15
0
    class BookkeepingDB:
    
        def __init__(self):
            self.bkClient = BookkeepingClient()
            self.tfClient = TransformationClient()
#            self.err = err
            
            
        def MakeRunLFN(self, runNmbr, cfgVersion, prodId):
            try:
                padding = "%08d" % int(prodId)
    
                lfn = LFN_FORMAT_STRING  %(
                    cfgVersion, runNmbr, runNmbr, padding)
                
                return lfn
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None     
            
        def getTCK(self, runNmbr):
            try:
                print runNmbr
                res = self.getRunsMetadata(runNmbr)
                
                pprint.pprint(res)
                
                if res != None and hasattr(res, 'Value') \
                        and hasattr(res['Value'], runNmbr) \
                        and hasattr(res['Value'][runNmbr], "TCK"):
                    return res['Value'][runNmbr]["TCK"]
                else:
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
            
            
        def getRunsMetadata(self, runNmbr):
            try:
                res = self.tfClient.getRunsMetadata(int(runNmbr))
                if res['OK']:
                    return res
                else:
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
    
        def getInformation(self, run):
            try:
                res = self.bkClient.getRunInformations(run)
    
                if res['OK']:
                    result = dict()
                
                    val = res['Value']
            
                    result = {"runstart": val.get('RunStart', 'Unknown'), "runend": val.get('RunEnd', 'Unknown'),
                        "configname": val.get('Configuration Name', 'Unknown'), "configversion": val.get('Configuration Version', 'Unknown'),
                        "fillnb" : val.get('FillNumber', 'Unknown'), "datataking" : val.get('DataTakingDescription', 'Unknown'),
                        "datataking" : val.get('DataTakingDescription', 'Unknown'), "processing" : val.get('ProcessingPass', 'Unknown'),
                        "stream" : val.get('Stream', 'Unknown'), "fullstat" : val.get('FullStat', 'Unknown'), 
                        "nbofe" : val.get('Number of events', 'Unknown'), "nboff" : val.get('Number of file', 'Unknown'),
                        "fsize" : val.get('File size', 'Unknown')
            
                    }
            
                    return result
                else:
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
                
        def getListOfRecos(self, runNmbr):
            try:
                d = {'RunNumber' : runNmbr}
        
                res = self.bkClient.getRunAndProcessingPass(d)
        
                results = list()
        
                if res['OK'] == True:
                    recosList = res["Value"]
            
                    for recoEntry in recosList:
                        recoPath = recoEntry[1]
                
                        if recoEntry[0] == runNmbr \
                            and recoPath.count("/") == 2 \
                            and "Reco" in recoPath :
                    
                            results.append(recoPath)
            
                    return results
                else:
                    pprint.pprint(res)
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
            
        def getProcessId(self, runNmbr, recoVersion):
            try:
                d = {'RunNumber' : runNmbr,
                'ProcessingPass':  recoVersion}
        
                res = self.bkClient.getProductionsFromView(d)
        
                if res["OK"] == True:
                    return res["Value"][0][0]
                else:
                    self.errorMessage("error in bkClient Connection")
                    return None
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
                
        #recoVersion is just Reco13 and not the full path!!
        def makeReferenceROOTFileName(self, recoVersion, runNmbr):
            try:
                basePath = REFERENCE_BASE_PATH + recoVersion +"/"
            
                #nasty stuff!!
                #the problem is tck retrieved from db
                #0x790038
                #but in the file it looks like
                #TCK_0x00760037_1.root
                #so do padding here
                tck = self.getTCK(runNmbr)
                
                #sometimes no tck set, then take default file
                if tck != None:
                    tckDecimal = int(tck, 0)
                    tckHexPaddedFileName = "TCK_0x" + str(format(tckDecimal, '08x')) + "_"
            
                #if we have multiple files like
                #TCK_0x00790038_1.root
                #TCK_0x00790038_2.root
                #we want the file with the highest subindex, so in this example _2
                possibleTCKList = list()
            
                #store all possible files
                for file in os.listdir(basePath):
                    if tck != None \
                    and file.endswith(".root") \
                    and file != "default_1.root" \
                    and tckHexPaddedFileName in file:
                        possibleTCKList.append(file)
    
                #if we haven't foun anything, look for the default files and choose the one with the highest index
                if len(possibleTCKList) == 0:
            
                    #store all possible files
                    for file in os.listdir(basePath):
                        if file.endswith(".root") \
                        and "default_" in file:
                            possibleTCKList.append(file)
            
                #now sort this list, to find the highest subindex               
                possibleTCKList.sort()
    
                return basePath+possibleTCKList.pop()
            except Exception as inst:
#                self.err.rethrowException(inst)
                return None
Example #16
0
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... ProdID' % Script.scriptName, 'Arguments:',
    '  ProdID:   Production ID'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()

if len(args) < 1:
    Script.showHelp()

exitCode = 0

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
try:
    prod = long(args[0])
except:
    Script.showHelp()
    DIRAC.exit(1)

res = bk.getNbOfJobsBySites(prod)

if res['OK']:
    if not res['Value']:
        print "No jobs for production", prod
        DIRAC.exit(0)
    sites = dict([(site, num) for num, site in res['Value']])
    shift = 0
    for site in sites:
Example #17
0
 def __init__(self):
     self.bkClient = BookkeepingClient()
     self.tfClient = TransformationClient()
Example #18
0
class ProductionRequest(object):
    """ Production request class - objects are usually created starting from a production request
  """
    def __init__(self, bkkClientIn=None, diracProdIn=None):
        """ c'tor

        Some variables are defined here. A production request is made of:
        stepsList, productionsTypes, and various parameters of those productions
    """

        if bkkClientIn is None:
            self.bkkClient = BookkeepingClient()
        else:
            self.bkkClient = bkkClientIn

        if diracProdIn is None:
            self.diracProduction = DiracProduction()
        else:
            self.diracProduction = diracProdIn

        self.rpcProductionRequest = RPCClient(
            'ProductionManagement/ProductionRequest')
        self.tc = TransformationClient()

        self.logger = gLogger.getSubLogger('ProductionRequest')

        self.opsH = Operations()

        # parameters of the request
        self.requestID = 0
        self.parentRequestID = 0
        self.appendName = '1'
        self.outConfigName = ''
        self.prodsToLaunch = []  # productions to launch
        self.stepsListDict = []  # list of dict of steps
        self.stepsInProds = []  # a list of lists
        # parameters of the input data
        self.processingPass = ''
        self.dataTakingConditions = ''
        self.eventType = ''
        self.bkFileType = []
        self.dqFlag = ''
        self.startRun = 1
        self.endRun = 2
        self.runsList = ''
        self.configName = 'test'
        self.configVersion = 'certification'
        # parameters of the first production
        self.publishFlag = True
        self.testFlag = False
        self.derivedProduction = 0
        self.previousProdID = 0  # optional prod from which to start
        self.fullListOfOutputFileTypes = []

        # parameters that are the same for each productions
        self.prodGroup = ''  # This ends up being 'ProcessingType', workflow parameter, and it is used for accounting
        self.visibility = ''  # For BKQuery
        self.fractionToProcess = 0
        self.minFilesToProcess = 0
        self.modulesList = None  # Usually:
        # ['GaudiApplication', 'AnalyseXMLSummary',
        # 'ErrorLogging', 'BookkeepingReport', 'StepAccounting' ]

        # parameters of each production (the length of each list has to be the same as the number of productions
        self.events = []
        self.stepsList = []
        self.extraOptions = {}
        self.prodsTypeList = []
        self.bkQueries = []  # list of bk queries
        self.removeInputsFlags = []
        self.priorities = []
        self.cpus = []
        self.inputs = []  # list of lists
        self.outputModes = []
        self.targets = []
        self.outputFileMasks = []
        self.outputFileSteps = []
        self.groupSizes = []
        self.plugins = []
        self.inputDataPolicies = []
        self.previousProds = [
            None
        ]  # list of productions from which to take the inputs (the first is always None)
        self.multicore = [
        ]  # list of flags to override the multi core flags of the steps

        self.outputSEs = []  # a list of StorageElements
        self.specialOutputSEs = []  # a list of dictionaries - might be empty
        self.outputSEsPerFileType = []  # a list of dictionaries - filled later
        self.ancestorDepths = []
        self.compDict = {
            'HIGH': 'Compression-LZMA-4',
            'LOW': 'Compression-ZLIB-1'
        }
        self.compressionLvl = ['']  # List: one compression level each step
        self.appConfig = '$APPCONFIGOPTS/Persistency/'  # Default location of the compression level configuration files
        #
        # These lists define the visibility of the output files produced by each step. For MC productions, the visibility
        # is tied to compression level. VIsible files are compressed at the highest level
        #
        self.outputVisFlag = [
        ]  # List of dictionary with default visibility flag of the output files per single step
        self.specialOutputVisFlag = [
        ]  # List of dictionaries with special visibility flag for given file type

    #############################################################################

    def resolveSteps(self):
        """ Given a list of steps in strings, some of which might be missing,
        resolve it into a list of dictionary of steps (self.stepsListDict)
    """
        outputVisFlag = dict(
            [k, v] for el in self.outputVisFlag for k, v in el.iteritems()
        )  # Transform the list of dictionaries in a dictionary
        specialOutputVisFlag = dict([k, v] for el in self.specialOutputVisFlag
                                    for k, v in el.iteritems())
        count = 0  # Needed to add correctly the optionFiles to the list of dictonaries of steps
        for stepID in self.stepsList:

            stepDict = self.bkkClient.getAvailableSteps({'StepId': stepID})
            if not stepDict['OK']:
                raise ValueError(stepDict['Message'])
            else:
                stepDict = stepDict['Value']

            stepsListDictItem = {}

            s_in = self.bkkClient.getStepInputFiles(stepID)
            if not s_in['OK']:
                raise ValueError(s_in['Message'])
            stepsListDictItem['fileTypesIn'] = [
                fileType[0].strip() for fileType in s_in['Value']['Records']
            ]

            s_out = self.bkkClient.getStepOutputFiles(stepID)
            if not s_out['OK']:
                raise ValueError(s_out['Message'])
            fileTypesList = [
                fileType[0].strip() for fileType in s_out['Value']['Records']
            ]
            self.fullListOfOutputFileTypes = self.fullListOfOutputFileTypes + fileTypesList
            stepsListDictItem['fileTypesOut'] = fileTypesList

            for parameter, value in itertools.izip(stepDict['ParameterNames'],
                                                   stepDict['Records'][0]):
                if parameter.lower() in ['conddb', 'dddb', 'dqtag'] and value:
                    if value.lower() == 'frompreviousstep':
                        value = self.stepsListDict[-1][parameter]

                if parameter == 'OptionFiles':  # Modifying the OptionFiles (for setting the compression level)
                    if 'MDF' not in stepsListDictItem[
                            'fileTypesOut']:  # certain MC produce MDF, which shouldn't be compressed
                        #
                        # If the prod manager sets a compression level for a particular step, either we append the option file
                        # or we overwrite the existing one inherited with the step
                        #
                        if len(self.compressionLvl
                               ) > count and self.compressionLvl[count] != '':
                            persist = re.compile('Compression-[A-Z]{4}-[1-9]')
                            # self.compressionLvl[count] = self.appConfig + self.compressionLvl[count] + '.py'
                            self.compressionLvl[
                                count] = self.appConfig + self.compDict[
                                    self.compressionLvl[count].upper()] + '.py'
                            if not persist.search(value):
                                if value == '':
                                    value = self.compressionLvl[count]
                                else:
                                    value = ";".join(
                                        (value, self.compressionLvl[count]))
                            else:
                                value = persist.sub(
                                    persist.search(
                                        self.compressionLvl[count]).group(),
                                    value)
                        #
                        # If instead the prod manager doesn't declare a compression level, e.g. for intermediate steps,
                        # we check if there is one in the options and in case we delete it. This leaves the default zip level
                        # defined inside Gaudi
                        #
                        elif len(
                                self.compressionLvl
                        ) > count and self.compressionLvl[count] == '':
                            persist = re.compile(
                                r'\$\w+/Persistency/Compression-[A-Z]{4}-[1-9].py;?'
                            )
                            if persist.search(value):
                                value = persist.sub('', value)

                if parameter == 'SystemConfig' and value is not None and re.search(
                        'slc5', value):
                    p = re.compile(
                        r'\$\w+/Persistency/Compression-[A-Z]{4}-[1-9].py;?')
                    if p.search(stepsListDictItem['OptionFiles']):
                        stepsListDictItem['OptionFiles'] = p.sub(
                            '', stepsListDictItem['OptionFiles'])

                stepsListDictItem[parameter] = value  # Fixing what decided

            if stepsListDictItem['StepId'] in self.extraOptions:
                stepsListDictItem['ExtraOptions'] = self.extraOptions[
                    stepsListDictItem['StepId']]
            else:
                stepsListDictItem['ExtraOptions'] = ''

            stepsListDictItem['prodStepID'] = str(stepID) + str(
                stepsListDictItem['fileTypesIn'])

            if 'isMulticore' not in stepsListDictItem:
                stepsListDictItem['isMulticore'] = 'N'

            if 'SystemConfig' not in stepsListDictItem:
                stepsListDictItem['SystemConfig'] = ''

            if 'mcTCK' not in stepsListDictItem:
                stepsListDictItem['mcTCK'] = ''

            # Add visibility info during step resolution
            if 'visibilityFlag' not in stepsListDictItem:
                outputVisList = list({
                    'Visible': outputVisFlag[str(count + 1)],
                    'FileType': ftype
                } for ftype in stepsListDictItem['fileTypesOut'])
                if str(count + 1) in specialOutputVisFlag:
                    for it in outputVisList:
                        if it['FileType'] in specialOutputVisFlag[str(count +
                                                                      1)]:
                            it['Visible'] = specialOutputVisFlag[str(
                                count + 1)][it['FileType']]

                stepsListDictItem['visibilityFlag'] = outputVisList

            self.stepsListDict.append(stepsListDictItem)
            count += 1

    #############################################################################

    def buildAndLaunchRequest(self):
        """ uses _applyOptionalCorrections, _getProdsDescriptionDict,
        _buildProduction, and DiracProduction.launchProduction
    """

        if not self.stepsListDict:
            self.resolveSteps()

        self._applyOptionalCorrections()

        self._determineOutputSEs()

        prodsDict = self._getProdsDescriptionDict()

        stepsListDict = list(self.stepsListDict)

        fromProd = self.previousProdID
        prodsLaunched = []

        self.logger.debug(prodsDict)
        # now we build and launch each productions
        for prodIndex, prodDict in prodsDict.items():

            if self.prodsToLaunch:
                if prodIndex not in self.prodsToLaunch:
                    continue

            # build the list of steps in a production
            stepsInProd = []
            for stepID in prodDict['stepsInProd-ProdName']:
                for step in stepsListDict:
                    if step['prodStepID'] == stepID:
                        stepsInProd.append(
                            stepsListDict.pop(stepsListDict.index(step)))
            # NOT READY (alternative to previous 5 lines)
            # build the DAG of steps in a production
            # stepsInProdDAG = self._getStepsInProdDAG( prodDict, stepsListDict )
            # Here, for today it is just convert to a list
            # TODO: fix this in order to properly use DAGs (now it's only sequential)
            # FIXME: using getIndexNodes we can't assure the order is respected
            # stepsInProd = stepsInProdDAG.getIndexNodes()

            if prodDict['previousProd'] is not None:
                fromProd = prodsLaunched[prodDict['previousProd'] - 1]
                self.previousProdID = fromProd

            prod = self._buildProduction(
                prodType=prodDict['productionType'],
                stepsInProd=stepsInProd,
                outputSE=prodDict['outputSE'],
                priority=prodDict['priority'],
                cpu=prodDict['cpu'],
                inputDataList=prodDict['input'],
                outputMode=prodDict['outputMode'],
                inputDataPolicy=prodDict['inputDataPolicy'],
                outputFileMask=prodDict['outputFileMask'],
                outputFileStep=prodDict['outputFileStep'],
                target=prodDict['target'],
                removeInputData=prodDict['removeInputsFlag'],
                groupSize=prodDict['groupSize'],
                bkQuery=prodDict['bkQuery'],
                plugin=prodDict['plugin'],
                previousProdID=fromProd,
                derivedProdID=prodDict['derivedProduction'],
                transformationFamily=prodDict['transformationFamily'],
                events=prodDict['events'],
                multicore=prodDict['multicore'],
                ancestorDepth=prodDict['ancestorDepth'])

            # if the production is a simulation production type, submit it to the automated testing
            if prodDict['productionType'] in self.opsH.getValue(
                    'Transformations/ExtendableTransfTypes', ['MCSimulation']):
                prodID = self._mcSpecialCase(prod, prodDict)

            else:
                res = self.diracProduction.launchProduction(
                    prod=prod,
                    publishFlag=self.publishFlag,
                    testFlag=self.testFlag,
                    requestID=self.requestID,
                    tracking=prodDict['tracking'])
                if not res['OK']:
                    raise RuntimeError(res['Message'])

                prodID = res['Value']

            prodsLaunched.append(prodID)

            if self.publishFlag:
                self.logger.notice(
                    "For request %d, submitted Production %d, of type %s, ID = %s"
                    % (self.requestID, prodIndex, prodDict['productionType'],
                       str(prodID)))
        return S_OK(prodsLaunched)

    #############################################################################

    def _getStepsInProdDAG(self,
                           prodDict,
                           stepsListDict,
                           stepsOrder='sequential'):
        """ Builds the DAG of steps in a production

        :params dict prodDict: dictionary representing one production
        :params list stepsListDict: list of steps (which are dictionaries) that should be in the production

        :returns: stepsInProd (DAG)
    """
        stepsInProd = DAG()

        inserted = None
        for stepID in prodDict['stepsInProd-ProdName']:
            for step in stepsListDict:
                if step['prodStepID'] == stepID:
                    ind = stepsListDict.index(step)
                    step = stepsListDict.pop(ind)
                    stepsInProd.addNode(step)
                    if inserted and stepsOrder == 'sequential':
                        stepsInProd.addEdge(inserted, step)
                    inserted = step

        return stepsInProd

    def _mcSpecialCase(self, prod, prodDict):
        """ Treating the MC special case for putting MC productions in status "Testing"
    """

        # save the original xml before it is edited for testing
        prod._lastParameters()  # pylint: disable=protected-access

        # launchProduction adds extra parameters, as we 'hot swap' the xml, we
        # need to get these parameters for the un-edited version
        originalProcessingType = prod.prodGroup
        originalPriority = prod.priority

        prodXML = prod.LHCbJob.workflow.toXML()

        prodID = self._modifyAndLaunchMCXML(prod, prodDict)

        # load a production from the original xml to save the priority and processing type
        workflowToSave = fromXMLString(prodXML)
        prod.LHCbJob.workflow = workflowToSave
        prod.setParameter('ProcessingType', 'JDL', str(originalProcessingType),
                          'ProductionGroupOrType')
        prod.setParameter('Priority', 'JDL', str(originalPriority),
                          'Job Priority')

        # original xml to save
        descriptionToStore = prod.LHCbJob.workflow.toXML()

        # saving the original xml in the StoredJobDescription table.
        res = self.tc.addStoredJobDescription(prodID, descriptionToStore)
        if not res['OK']:
            self.logger.error("Error calling addStoredJobDescription",
                              res['Message'])
            self.logger.info("Cleaning created production and exiting")
            self.diracProduction.production(res['Value'], 'cleaning')
            raise RuntimeError(res['Message'])

        return prodID

    def _modifyAndLaunchMCXML(self, prod, prodDict):
        """ Apply modifications to the workflow XML for MC testing case

        :param Production prod: Production object
        :param dict prodDict: dictionary with production info

        :returns: res['OK'] or res['ERROR']
    """
        # set the destination and number of events for testing
        destination = self.opsH.getValue(
            "Productions/MCTesting/MCTestingDestination", 'DIRAC.Test.ch')
        numberOfEvents = self.opsH.getValue(
            "Productions/MCTesting/numberOfEvents", '500')
        extendBy = self.opsH.getValue("Productions/MCTesting/extendBy", 20)

        prod.setJobParameters({'Destination': destination})
        prod.LHCbJob.workflow.removeParameter('BannedSites')
        prod.setParameter('numberOfEvents', 'string', str(numberOfEvents),
                          'Number of events to test')

        # add '1' to the stepMask and add GAUSSHIST to the fileMask
        fileTypesOutLastStep = prod.LHCbJob.workflow.step_instances[
            -2].findParameter('listoutput').getValue()[0]['outputDataType']
        newFileMask = ['GAUSSHIST'] + [
            ftOut.upper() for ftOut in fileTypesOutLastStep.split(';')
        ]
        stepMaskParameter = prod.LHCbJob.workflow.findParameter(
            'outputDataStep')
        if stepMaskParameter:
            stepMask = stepMaskParameter.getValue().replace(' ', '').split(';')
            newOutputFileStep = ';'.join(
                sorted(list(set(['1']).union(set(stepMask)))))
        else:
            newOutputFileStep = '1'
        prod.setFileMask(newFileMask, newOutputFileStep)

        # find the file types out already built, append GAUSSHIST and set the new listoutput
        fileTypesOut = prod.LHCbJob.workflow.step_instances[0].findParameter(
            'listoutput').getValue()[0]['outputDataType']
        fileTypesOut = fileTypesOut.split(', ')
        fileTypesOut.append('GAUSSHIST')
        outputFilesList = prod._constructOutputFilesList(fileTypesOut)  # pylint: disable=protected-access
        prod.LHCbJob.workflow.step_instances[0].setValue(
            'listoutput', outputFilesList)

        # increase the priority to 10
        prod.priority = 10

        # launch the test production
        res = self.diracProduction.launchProduction(
            prod=prod,
            publishFlag=self.publishFlag,
            testFlag=self.testFlag,
            requestID=self.requestID,
            extend=extendBy,
            tracking=prodDict['tracking'],
            MCsimflag=True)
        if not res['OK']:
            self.logger.error("Error launching production", res['Message'])
            raise RuntimeError(res['Message'])

        return res['Value']

    def _determineOutputSEs(self):
        """ Fill outputSEsPerFileType based on outputSEs, fullListOfOutputFileTypes and specialOutputSEs
    """
        for outputSE, specialOutputSEs in itertools.izip(
                self.outputSEs, self.specialOutputSEs):
            outputSEDict = {}
            if not self.fullListOfOutputFileTypes:
                raise ValueError("No steps defined")
            outputSEDict = dict([(fType, outputSE)
                                 for fType in self.fullListOfOutputFileTypes])
            if specialOutputSEs:
                outputSEDict.update(specialOutputSEs)
            self.outputSEsPerFileType.append(outputSEDict)

    def _applyOptionalCorrections(self):
        """ if needed, calls _splitIntoProductionSteps. It also applies other changes
    """
        if len(self.bkQueries) != len(self.prodsTypeList):
            self.bkQueries += ['fromPreviousProd'] * (len(self.prodsTypeList) -
                                                      len(self.bkQueries))

        if len(self.previousProds) != len(self.prodsTypeList):
            self.previousProds += xrange(1, len(self.prodsTypeList))

        if len(self.events) != len(self.prodsTypeList):
            self.events += ['-1'
                            ] * (len(self.prodsTypeList) - len(self.events))

        if not self.removeInputsFlags:
            removeInputsFlags = []
            for prodType in self.prodsTypeList:
                if prodType.lower() == 'merge':
                    removeInputsFlags.append(True)
                else:
                    removeInputsFlags.append(False)
            self.removeInputsFlags = removeInputsFlags

        if not self.outputFileMasks:
            self.outputFileMasks = [''] * len(self.prodsTypeList)

        if not self.outputFileSteps:
            self.outputFileSteps = [''] * len(self.prodsTypeList)

        if not self.inputs:
            self.inputs = [[]] * len(self.prodsTypeList)

        if not self.outputModes:
            self.outputModes = ['Any'] * len(self.prodsTypeList)

        if not self.targets:
            self.targets = [''] * len(self.prodsTypeList)

        if not self.inputDataPolicies:
            self.inputDataPolicies = ['download'] * len(self.prodsTypeList)

        if not self.multicore:
            self.multicore = ['True'] * len(self.prodsTypeList)

        if not self.specialOutputSEs:
            self.specialOutputSEs = [{}] * len(self.prodsTypeList)

        if not self.ancestorDepths:
            self.ancestorDepths = [0] * len(self.prodsTypeList)

        # Checking if we need to split the merging step into many productions
        if 'merge' in [pt.lower() for pt in self.prodsTypeList]:
            i = 0
            indexes = []
            for pt in self.prodsTypeList:
                if pt.lower() == 'merge':
                    indexes.append(i)
                i += 1

            for index in indexes:
                # In this case and only in this case I have to split the merging in many productions
                plugin = self.plugins[index]
                outputSE = self.outputSEs[index]
                specialOutputSE = self.specialOutputSEs[index]
                priority = self.priorities[index]
                cpu = self.cpus[index]
                bkQuery = self.bkQueries[index]
                groupSize = self.groupSizes[index]
                preProd = self.previousProds[index]
                removeInputsFlag = self.removeInputsFlags[index]
                outputFileMask = self.outputFileMasks[index]
                outputFileStep = self.outputFileSteps[index]
                inputs = self.inputs[index]
                idp = self.inputDataPolicies[index]
                stepID = self.stepsList[index]
                events = self.events[index]
                targets = self.targets[index]
                multicore = self.multicore[index]
                outputMode = self.outputModes[index]
                ancestorDepth = self.ancestorDepths[index]
                if plugin.lower(
                ) != 'byrunfiletypesizewithflush' and 'rootmerging' not in plugin.lower(
                ):
                    stepToSplit = self.stepsListDict[index]
                    numberOfProdsToInsert = len(stepToSplit['fileTypesOut'])
                    self.prodsTypeList.remove('Merge')
                    self.plugins.pop(index)
                    self.outputSEs.pop(index)
                    self.specialOutputSEs.pop(index)
                    self.priorities.pop(index)
                    self.cpus.pop(index)
                    self.bkQueries.pop(index)
                    self.previousProds.pop(index)
                    self.groupSizes.pop(index)
                    self.removeInputsFlags.pop(index)
                    self.outputFileMasks.pop(index)
                    self.outputFileSteps.pop(index)
                    self.inputs.pop(index)
                    self.inputDataPolicies.pop(index)
                    self.stepsList.pop(index)
                    self.events.pop(index)
                    self.targets.pop(index)
                    self.multicore.pop(index)
                    self.outputModes.pop(index)
                    self.ancestorDepths.pop(index)
                    newSteps = _splitIntoProductionSteps(stepToSplit)
                    newSteps.reverse()
                    self.stepsListDict.remove(stepToSplit)
                    last = self.stepsInProds.pop(index)[0]
                    for x in xrange(numberOfProdsToInsert):
                        self.prodsTypeList.insert(index, 'Merge')
                        self.plugins.insert(index, plugin)
                        self.outputSEs.insert(index, outputSE)
                        self.specialOutputSEs.insert(index, specialOutputSE)
                        self.priorities.insert(index, priority)
                        self.cpus.insert(index, cpu)
                        self.bkQueries.insert(index, bkQuery)
                        self.groupSizes.insert(index, groupSize)
                        self.removeInputsFlags.insert(index, removeInputsFlag)
                        self.outputFileMasks.insert(index, outputFileMask)
                        self.outputFileSteps.insert(index, outputFileStep)
                        self.inputs.insert(index, inputs)
                        self.inputDataPolicies.insert(index, idp)
                        self.stepsList.insert(index, stepID)
                        self.previousProds.insert(index, preProd)
                        self.stepsListDict.insert(index, newSteps[x])
                        self.stepsInProds.insert(index + x, [last + x])
                        self.events.insert(index, events)
                        self.targets.insert(index, targets)
                        self.multicore.insert(index, multicore)
                        self.outputModes.insert(index, outputMode)
                        self.ancestorDepths.insert(index, ancestorDepth)

        correctedStepsInProds = []
        toInsert = self.stepsInProds[0][0]
        lengths = [len(x) for x in self.stepsInProds]
        for length in lengths:
            li = [toInsert + x for x in xrange(length)]
            toInsert += length
            correctedStepsInProds.append(li)

        self.stepsInProds = correctedStepsInProds

    #############################################################################

    def _getProdsDescriptionDict(self):
        """ Returns a dictionary representing the description of the request (of all the productions in it)
    """

        prodsDict = {}

        prodNumber = 1

        for prodType, stepsInProd, bkQuery, removeInputsFlag, outputSE, priority, \
            cpu, inputD, outputMode, outFileMask, outFileStep, target, groupSize, plugin, idp, \
            previousProd, events, multicore, ancestorDepth in itertools.izip(self.prodsTypeList,
                                                                             self.stepsInProds,
                                                                             self.bkQueries,
                                                                             self.removeInputsFlags,
                                                                             self.outputSEsPerFileType,
                                                                             self.priorities,
                                                                             self.cpus,
                                                                             self.inputs,
                                                                             self.outputModes,
                                                                             self.outputFileMasks,
                                                                             self.outputFileSteps,
                                                                             self.targets,
                                                                             self.groupSizes,
                                                                             self.plugins,
                                                                             self.inputDataPolicies,
                                                                             self.previousProds,
                                                                             self.events,
                                                                             self.multicore,
                                                                             self.ancestorDepths):

            if not self.parentRequestID and self.requestID:
                transformationFamily = self.requestID
            else:
                transformationFamily = self.parentRequestID

            stepsInProdProdNameList = [
                str(self.stepsList[index - 1]) +
                str(self.stepsListDict[index - 1]['fileTypesIn'])
                for index in stepsInProd
            ]
            prodsDict[prodNumber] = {
                'productionType': prodType,
                'stepsInProd':
                [self.stepsList[index - 1] for index in stepsInProd],
                'bkQuery': bkQuery,
                'removeInputsFlag': removeInputsFlag,
                'tracking': 0,
                'outputSE': outputSE,
                'priority': priority,
                'cpu': cpu,
                'input': inputD,
                'outputMode': outputMode,
                'outputFileMask': outFileMask,
                'outputFileStep': outFileStep,
                'target': target,
                'groupSize': groupSize,
                'plugin': plugin,
                'inputDataPolicy': idp,
                'derivedProduction': 0,
                'transformationFamily': transformationFamily,
                'previousProd': previousProd,
                'stepsInProd-ProdName': stepsInProdProdNameList,
                'events': events,
                'multicore': multicore,
                'ancestorDepth': ancestorDepth
            }
            prodNumber += 1

        # tracking the last production(s)
        prodsDict[prodNumber - 1]['tracking'] = 1
        typeOfLastProd = prodsDict[prodNumber - 1]['productionType']
        index = 2
        try:
            while prodsDict[prodNumber -
                            index]['productionType'] == typeOfLastProd:
                prodsDict[prodNumber - index]['tracking'] = 1
                index += 1
        except KeyError:
            pass

        # production derivation, if necessary
        if self.derivedProduction:
            prodsDict[1]['derivedProduction'] = self.derivedProduction

        return prodsDict

    #############################################################################

    def _buildProduction(self,
                         prodType,
                         stepsInProd,
                         outputSE,
                         priority,
                         cpu,
                         inputDataList=None,
                         outputMode='Any',
                         inputDataPolicy='download',
                         outputFileMask='',
                         outputFileStep='',
                         target='',
                         removeInputData=False,
                         groupSize=1,
                         bkQuery=None,
                         plugin='',
                         previousProdID=0,
                         derivedProdID=0,
                         transformationFamily=0,
                         events=-1,
                         multicore='True',
                         ancestorDepth=0):
        """ Wrapper around Production API to build a production, given the needed parameters

        Args:
          prodType (str): production type (e.g. 'DataStripping')
          stepsInProd (list): list of steps in the production
          outputSE (dict): dictionary that holds relation between file type and output SE
          priority (int): production priority
          cpu (int): CPU time, in HS06s for jobs in this production

        Returns:
          prod: a Production object
    """
        prod = Production()

        # non optional parameters
        prod.LHCbJob.setType(prodType)
        try:
            fTypeIn = [ft.upper() for ft in stepsInProd[0]['fileTypesIn']]
        except IndexError:
            fTypeIn = []
        prod.LHCbJob.workflow.setName(
            'Request_%d_%s_%s_EventType_%s_%s_%s' %
            (self.requestID, prodType, self.prodGroup, self.eventType, ''.join(
                [x.split('.')[0] for x in fTypeIn]), self.appendName))
        prod.setBKParameters(configName=self.outConfigName,
                             configVersion=self.configVersion,
                             groupDescriptionOrStepsList=stepsInProd,
                             conditions=self.dataTakingConditions)
        prod.setParameter('eventType', 'string', self.eventType,
                          'Event Type of the production')
        prod.setParameter('numberOfEvents', 'string', str(events),
                          'Number of events requested')

        prod.setParameter('multicore', 'string', multicore,
                          'Flag for enabling gaudi parallel')
        prod.prodGroup = self.prodGroup
        prod.priority = priority
        prod.LHCbJob.workflow.setDescrShort('prodDescription')
        prod.LHCbJob.workflow.setDescription('prodDescription')
        prod.LHCbJob.setCPUTime(cpu)
        prod.plugin = plugin

        # optional parameters
        prod.jobFileGroupSize = groupSize
        if inputDataPolicy:
            prod.LHCbJob.setInputDataPolicy(inputDataPolicy)
        prod.setOutputMode(outputMode)
        if outputFileMask:
            outputFileMask = [
                m.lower() for m in outputFileMask.replace(' ', '').split(',')
            ]
        if outputFileStep:
            if isinstance(outputFileStep, str):
                outputFileStep = [
                    m.lower()
                    for m in outputFileStep.replace(' ', '').split(',')
                ]
        prod.setFileMask(outputFileMask, outputFileStep)
        if target:
            if target == 'Tier2':
                prod.banTier1s()
            elif 'BAN' in target:
                sitesToBan = target.split(':')
                if len(sitesToBan) > 1:
                    prod.banSites(sitesToBan[1:])
            elif target != 'ALL':
                prod.LHCbJob.setDestination(target)
        prod.LHCbJob.setInputData(inputDataList)
        if ancestorDepth:
            prod.LHCbJob.setAncestorDepth(ancestorDepth)
        if derivedProdID:
            prod.ancestorProduction = derivedProdID
        if transformationFamily:
            prod.transformationFamily = transformationFamily
        if self.fractionToProcess:
            prod.setParameter('FractionToProcess', 'string',
                              str(self.fractionToProcess),
                              'Fraction to process')
        if self.minFilesToProcess:
            prod.setParameter('MinFilesToProcess', 'string',
                              str(self.minFilesToProcess),
                              'Min N of Files to process')
        if self.processingPass:
            prod.setParameter('processingPass', 'string', self.processingPass,
                              'Processing pass of input for the request')

        # Adding optional input BK query
        if bkQuery:
            if bkQuery.lower() == 'full':
                prod.inputBKSelection = self._getBKKQuery()
            elif bkQuery.lower() == 'frompreviousprod':
                prod.inputBKSelection = self._getBKKQuery(
                    'frompreviousprod', fTypeIn, previousProdID)

        self.logger.verbose('Launching with BK selection %s' %
                            prod.inputBKSelection)

        prod = self._addStepsToProd(prod,
                                    stepsInProd,
                                    removeInputData=removeInputData)

        for ft, oSE in outputSE.items():
            prod.outputSEs.setdefault(ft, oSE)

        prod.LHCbJob.setDIRACPlatform()

        return prod

    #############################################################################

    def _addStepsToProd(self,
                        prod,
                        stepsInProd,
                        stepsSequence='sequential',
                        removeInputData=False):
        """ Given a Production object, add requested steps (application and finalization)

    Args:
      prod (Production): the Production object to which the steps are added
      stepsInProd (DAG): DAG of steps in a production
      stepsSequence (str or dict): applications steps sequence
      removeInputData (bool): flag that indicates if the input data should be removed (for the finalization step)

    Returns:
      prod with steps added

    """
        # Adding the application steps
        firstStep = stepsInProd.pop(0)
        stepName = prod.addApplicationStep(stepDict=firstStep,
                                           modulesList=self.modulesList)
        prod.gaudiSteps.append(stepName)

        for step in stepsInProd:
            stepName = prod.addApplicationStep(stepDict=step,
                                               inputData='previousStep',
                                               modulesList=self.modulesList)
            prod.gaudiSteps.append(stepName)

        # Adding the finalization step
        if removeInputData:
            prod.addFinalizationStep([
                'UploadOutputData', 'RemoveInputData', 'UploadLogFile',
                'UploadMC', 'FailoverRequest'
            ])
        else:
            prod.addFinalizationStep()

        return prod

    def _getBKKQuery(self, mode='full', fileType=None, previousProdID=0):
        """ simply creates the bkk query dictionary
    """

        if fileType is None:
            fileType = []

        if mode.lower() == 'full':
            bkQuery = {
                'FileType': ';;;'.join(self.bkFileType),
                'EventType': str(self.eventType),
                'ConfigName': self.configName,
                'ConfigVersion': self.configVersion
            }

            if self.dataTakingConditions:
                bkQuery['DataTakingConditions'] = self.dataTakingConditions

            if self.processingPass:
                bkQuery['ProcessingPass'] = self.processingPass

            if self.dqFlag:
                bkQuery['DataQualityFlag'] = self.dqFlag.replace(
                    ',', ';;;').replace(' ', '')

            if self.startRun and self.runsList or self.endRun and self.runsList:
                raise ValueError(
                    "Please don't mix runs list with start/end run")

            if self.endRun and self.startRun:
                if self.endRun < self.startRun:
                    gLogger.error(
                        "Your end run '%d' should be more than your start run '%d'!"
                        % (self.endRun, self.startRun))
                    raise ValueError("Error setting start or end run")

            if self.startRun:
                bkQuery['StartRun'] = self.startRun
            if self.endRun:
                bkQuery['EndRun'] = self.endRun

            if self.runsList:
                bkQuery['RunNumbers'] = self.runsList.replace(',',
                                                              ';;;').replace(
                                                                  ' ', '')

            if self.visibility:
                bkQuery['Visible'] = self.visibility

        elif mode.lower() == 'frompreviousprod':
            bkQuery = {
                'FileType': ';;;'.join(fileType).replace(' ', ''),
                'ProductionID': int(previousProdID)
            }

            if self.eventType:
                bkQuery['EventType'] = str(self.eventType)

            if self.dqFlag:
                bkQuery['DataQualityFlag'] = self.dqFlag.replace(
                    ',', ';;;').replace(' ', '')

        return bkQuery

    ################################################################################
    # properties

    def set_stepsList(self, value):
        listInt = []
        i = 0
        while True:
            try:
                listInt.append(int(value[i]))
                i = i + 1
            except ValueError:
                break
            except IndexError:
                break
        self._stepsList = listInt

    def get_stepsList(self):
        return self._stepsList

    stepsList = property(get_stepsList, set_stepsList)

    def set_startRun(self, value):
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("startRun can not be negative")
        self._startRun = value

    def get_startRun(self):
        return self._startRun

    startRun = property(get_startRun, set_startRun)

    def set_endRun(self, value):
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("endRun can not be negative")
        self._endRun = value

    def get_endRun(self):
        return self._endRun

    endRun = property(get_endRun, set_endRun)

    def set_requestID(self, value):
        if value == '':
            value = 0
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("requestID can not be negative")
        self._requestID = value

    def get_requestID(self):
        return self._requestID

    requestID = property(get_requestID, set_requestID)

    def set_parentRequestID(self, value):
        if value == '':
            value = 0
        if isinstance(value, str):
            value = int(value)
        if value < 0:
            raise ValueError("parentRequestID can not be negative")
        self._parentRequestID = value

    def get_parentRequestID(self):
        return self._parentRequestID

    parentRequestID = property(get_parentRequestID, set_parentRequestID)

    def set_bkFileType(self, value):
        if isinstance(value, str):
            value = value.replace(' ', '').split(',')
        self._bkFileType = value

    def get_bkFileType(self):
        return self._bkFileType

    bkFileType = property(get_bkFileType, set_bkFileType)
Example #19
0
"""

This is a very simple bkk performance test. It calls the service with a message. The service
return the message.

"""
import time

from DIRAC.Core.Base.Script import parseCommandLine
parseCommandLine()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
cl = BookkeepingClient()


class Transaction(object):
    def __init__(self):
        self.custom_timers = {}

    def run(self):
        start_time = time.time()
        retVal = cl.echo("simple test")
        if not retVal['OK']:
            print 'ERROR', retVal['Message']
        end_time = time.time()
        self.custom_timers['Bkk_ResponseTime'] = end_time - start_time
        self.custom_timers['Bkk_Echo'] = end_time - start_time


if __name__ == '__main__':
    trans = Transaction()
Example #20
0
    def __init__(self, bkkClientIn=None, diracProdIn=None):
        """ c'tor

        Some variables are defined here. A production request is made of:
        stepsList, productionsTypes, and various parameters of those productions
    """

        if bkkClientIn is None:
            self.bkkClient = BookkeepingClient()
        else:
            self.bkkClient = bkkClientIn

        if diracProdIn is None:
            self.diracProduction = DiracProduction()
        else:
            self.diracProduction = diracProdIn

        self.rpcProductionRequest = RPCClient(
            'ProductionManagement/ProductionRequest')
        self.tc = TransformationClient()

        self.logger = gLogger.getSubLogger('ProductionRequest')

        self.opsH = Operations()

        # parameters of the request
        self.requestID = 0
        self.parentRequestID = 0
        self.appendName = '1'
        self.outConfigName = ''
        self.prodsToLaunch = []  # productions to launch
        self.stepsListDict = []  # list of dict of steps
        self.stepsInProds = []  # a list of lists
        # parameters of the input data
        self.processingPass = ''
        self.dataTakingConditions = ''
        self.eventType = ''
        self.bkFileType = []
        self.dqFlag = ''
        self.startRun = 1
        self.endRun = 2
        self.runsList = ''
        self.configName = 'test'
        self.configVersion = 'certification'
        # parameters of the first production
        self.publishFlag = True
        self.testFlag = False
        self.derivedProduction = 0
        self.previousProdID = 0  # optional prod from which to start
        self.fullListOfOutputFileTypes = []

        # parameters that are the same for each productions
        self.prodGroup = ''  # This ends up being 'ProcessingType', workflow parameter, and it is used for accounting
        self.visibility = ''  # For BKQuery
        self.fractionToProcess = 0
        self.minFilesToProcess = 0
        self.modulesList = None  # Usually:
        # ['GaudiApplication', 'AnalyseXMLSummary',
        # 'ErrorLogging', 'BookkeepingReport', 'StepAccounting' ]

        # parameters of each production (the length of each list has to be the same as the number of productions
        self.events = []
        self.stepsList = []
        self.extraOptions = {}
        self.prodsTypeList = []
        self.bkQueries = []  # list of bk queries
        self.removeInputsFlags = []
        self.priorities = []
        self.cpus = []
        self.inputs = []  # list of lists
        self.outputModes = []
        self.targets = []
        self.outputFileMasks = []
        self.outputFileSteps = []
        self.groupSizes = []
        self.plugins = []
        self.inputDataPolicies = []
        self.previousProds = [
            None
        ]  # list of productions from which to take the inputs (the first is always None)
        self.multicore = [
        ]  # list of flags to override the multi core flags of the steps

        self.outputSEs = []  # a list of StorageElements
        self.specialOutputSEs = []  # a list of dictionaries - might be empty
        self.outputSEsPerFileType = []  # a list of dictionaries - filled later
        self.ancestorDepths = []
        self.compDict = {
            'HIGH': 'Compression-LZMA-4',
            'LOW': 'Compression-ZLIB-1'
        }
        self.compressionLvl = ['']  # List: one compression level each step
        self.appConfig = '$APPCONFIGOPTS/Persistency/'  # Default location of the compression level configuration files
        #
        # These lists define the visibility of the output files produced by each step. For MC productions, the visibility
        # is tied to compression level. VIsible files are compressed at the highest level
        #
        self.outputVisFlag = [
        ]  # List of dictionary with default visibility flag of the output files per single step
        self.specialOutputVisFlag = [
        ]  # List of dictionaries with special visibility flag for given file type
Example #21
0
class StorageHistoryAgent( AgentModule ):
  def initialize( self ):
    '''Sets defaults
    '''
    self.am_setOption( 'PollingTime', 43200 )
    if self.am_getOption( 'DirectDB', False ):
      from LHCbDIRAC.DataManagementSystem.DB.StorageUsageDB import StorageUsageDB
      self.__stDB = StorageUsageDB()
    else:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      self.__stDB = RPCClient( 'DataManagement/StorageUsage' )
    self.__workDirectory = self.am_getOption( "WorkDirectory" )
    mkDir( self.__workDirectory )
    self.log.info( "Working directory is %s" % self.__workDirectory )

    self.__ignoreDirsList = self.am_getOption( 'Ignore', [] )
    self.log.info( "List of directories to ignore for the DataStorage accounting: %s " % self.__ignoreDirsList )

    self.__bkClient = BookkeepingClient()
    self.__dataUsageClient = DataUsageClient()
    self.cachedMetadata = {}
    # build a dictionary with Event Type descriptions (to be send to accounting, instead of number Event Type ID)
    self.eventTypeDescription = { 'na':'na', 'notInBkk':'notInBkk', 'FailedBkkQuery':'FailedBkkQuery', 'None':'None'}
    self.limitForCommit = self.am_getOption( "LimitForCommit", 1000 )
    self.callsToGetSummary = 0
    self.callsToDirectorySummary = 0
    self.callsToDudbForMetadata = 0
    # keep count of calls to Bkk
    self.callsToBkkgetDirectoryMetadata = 0
    self.callsToBkkGetEvtType = 0

    return S_OK()

  def userStorageAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for user directories " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )

    result = self.__stDB.getUserSummary()
    if not result[ 'OK' ]:
      return result
    userCatalogData = result[ 'Value' ]
    print userCatalogData
    self.log.notice( "Got summary for %s users" % ( len( userCatalogData ) ) )
    result = self.__stDB.getUserSummaryPerSE()
    if not result[ 'OK' ]:
      return result
    userSEData = result[ 'Value' ]
    self.log.notice( "Got SE summary for %s users" % ( len( userSEData ) ) )

    now = Time.dateTime()
    numRows = 0
    for user in sorted( userSEData ):
      if user not in userCatalogData:
        self.log.error( "User has SE data but not Catalog data!", user )
        continue
      for se in sorted( userSEData[ user ] ):
        seData = userSEData[ user ][ se ]
        usRecord = UserStorage()
        usRecord.setStartTime( now )
        usRecord.setEndTime( now )
        usRecord.setValueByKey( "User", user )
        usRecord.setValueByKey( "StorageElement", se )
        usRecord.setValueByKey( "LogicalSize", userCatalogData[ user ][ 'Size' ] )
        usRecord.setValueByKey( "LogicalFiles", userCatalogData[ user ][ 'Files' ] )
        usRecord.setValueByKey( "PhysicalSize", seData[ 'Size' ] )
        usRecord.setValueByKey( "PhysicalFiles", seData[ 'Files' ] )
        usRecord.setValueByKey( "StorageSize", 0 )
        usRecord.setValueByKey( "StorageFiles", 0 )
        gDataStoreClient.addRegister( usRecord )
        numRows += 1

      self.log.notice( " User %s is using %.2f GiB (%s files)" % ( user,
                                                                   userCatalogData[ user ][ 'Size' ] / ( 1024.0 ** 3 ),
                                                                   userCatalogData[ user ][ 'Files' ] ) )
    self.log.notice( "Sending %s records to accounting for user storage" % numRows )
    res = gDataStoreClient.commit()
    if not res[ 'OK' ]:
      self.log.notice( "ERROR: committing UserStorage records: %s " % res )
      return S_ERROR( res )
    else:
      self.log.notice( "%s records for UserStorage type successfully committed" % numRows )

  def topDirectoryAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for top directories " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )

    ftb = 1.0e12

    # get info from the DB about the LOGICAL STORAGE USAGE (from the su_Directory table):
    result = self.__stDB.getSummary( '/lhcb/' )
    if not result[ 'OK' ]:
      return result
    logicalUsage = result['Value']
    topDirLogicalUsage = {}  # build the list of first level directories
    for row in logicalUsage:
      # d, size, files = row
      splitDir = row.split( "/" )
      if len( splitDir ) > 3:  # skip the root directory "/lhcb/"
        firstLevelDir = '/' + splitDir[1] + '/' + splitDir[2] + '/'
        topDirLogicalUsage.setdefault( firstLevelDir, {'Files':0, 'Size':0} )
        topDirLogicalUsage[ firstLevelDir ][ 'Files' ] += logicalUsage[ row ][ 'Files' ]
        topDirLogicalUsage[ firstLevelDir ][ 'Size' ] += logicalUsage[ row ][ 'Size' ]
    self.log.notice( "Summary on logical usage of top directories: " )
    for row in topDirLogicalUsage:
      self.log.notice( "dir: %s size: %.4f TB  files: %d" % ( row, topDirLogicalUsage[row]['Size'] / ftb,
                                                              topDirLogicalUsage[row]['Files'] ) )

    # loop on top level directories (/lhcb/data, /lhcb/user/, /lhcb/MC/, etc..)
    # to get the summary in terms of PHYSICAL usage grouped by SE:
    seData = {}
    for directory in topDirLogicalUsage:
      result = self.__stDB.getDirectorySummaryPerSE( directory )  # retrieve the PHYSICAL usage
      if not result[ 'OK' ]:
        return result
      seData[ directory ] = result[ 'Value' ]
      self.log.notice( "Got SE summary for %s directories " % ( len( seData ) ) )
      self.log.debug( "SEData: %s" % seData )
    # loop on top level directories to send the accounting records
    numRows = 0
    now = Time.dateTime()
    for directory in seData:
      self.log.debug( "dir: %s SEData: %s " % ( directory, seData[ directory ] ) )
      if directory not in topDirLogicalUsage:
        self.log.error( "Dir %s is in the summary per SE, but it is not in the logical files summary!" % directory )
        continue
      for se in sorted( seData[ directory ] ):
        storageRecord = Storage()
        storageRecord.setStartTime( now )
        storageRecord.setEndTime( now )
        storageRecord.setValueByKey( "Directory", directory )
        storageRecord.setValueByKey( "StorageElement", se )
        storageRecord.setValueByKey( "LogicalFiles", topDirLogicalUsage[ directory ][ 'Files' ] )
        storageRecord.setValueByKey( "LogicalSize", topDirLogicalUsage[ directory ][ 'Size' ] )
        try:
          physicalFiles = seData[ directory ][ se ][ 'Files' ]
        except:
          self.log.error( "WARNING! no files replicas for directory %s on SE %s" % ( directory, se ) )
          physicalFiles = 0
        try:
          physicalSize = seData[ directory ][ se ][ 'Size' ]
        except:
          self.log.error( "WARNING! no size for replicas for directory %s on SE %s" % ( directory, se ) )
          physicalSize = 0
        storageRecord.setValueByKey( "PhysicalFiles", physicalFiles )
        storageRecord.setValueByKey( "PhysicalSize", physicalSize )
        gDataStoreClient.addRegister( storageRecord )
        numRows += 1
        self.log.debug( "Directory: %s SE: %s  physical size: %.4f TB (%d files)" % ( directory,
                                                                                      se,
                                                                                      physicalSize / ftb,
                                                                                      physicalFiles ) )

    self.log.notice( "Sending %s records to accounting for top level directories storage" % numRows )
    res = gDataStoreClient.commit()
    if not res[ 'OK' ]:
      self.log.notice( "ERROR: committing Storage records: %s " % res )
      return S_ERROR( res )
    else:
      self.log.notice( "%s records for Storage type successfully committed" % numRows )

  def bkPathAccounting( self ):
    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "Generate accounting records for DataStorage type " )
    self.log.notice( "-------------------------------------------------------------------------------------\n" )


    # counter for DataStorage records, commit to the accounting in bunches of self.limitForCommit records
    self.totalRecords = 0
    self.recordsToCommit = 0
    self.log.notice( " Call the function to create the StorageUsageDB dump.." )
    res = self.generateStorageUsagePerDir()
    if not res[ 'OK' ]:
      self.log.error( "ERROR generating the StorageUsageDB dump per directory" )
      return S_ERROR()

    # Keep a list of all directories in FC that are not found in the Bkk
    self.directoriesNotInBkk = []
    # for debugging purposes build dictionaries with storage usage to compare with the accounting plots
    self.debug_seUsage = {}
    self.debug_seUsage_acc = {}

    # set the time for the accounting records (same time for all records)
    now = Time.dateTime()
    # Get the directory metadata in a bulk query
    metaForList = self.__getMetadataForAcc( self.dirDict.values() )

    # loop on all directories  to get the bkk metadata
    for dirLfn, fullDirectory in self.dirDict.iteritems():
      if dirLfn not in fullDirectory:
        self.log.error( "ERROR: fullDirectory should include the dirname: %s %s " % ( fullDirectory, dirLfn ) )
        continue
      self.log.info( "Processing directory %s " % dirLfn )
      if dirLfn not in self.pfnUsage:
        self.log.error( "ERROR: directory does not have PFN usage %s " % dirLfn )
        continue
      self.log.verbose( "PFN usage: %s " % self.pfnUsage[ dirLfn ] )
      if dirLfn not in self.lfnUsage:
        self.log.error( "ERROR: directory does not have LFN usage %s " % dirLfn )
        continue
      self.log.verbose( "LFN usage: %s " % self.lfnUsage[ dirLfn ] )

      # for DEBUGGING:
      for se in self.pfnUsage[ dirLfn ]:
        self.debug_seUsage.setdefault( se, {'Files': 0 , 'Size' : 0 } )
        self.debug_seUsage[ se ][ 'Files' ] += self.pfnUsage[ dirLfn ][ se ][ 'Files' ]
        self.debug_seUsage[ se ][ 'Size' ] += self.pfnUsage[ dirLfn ][ se ][ 'Size' ]
      # end of DEBUGGING

      # get metadata for this directory
      metaForDir = metaForList.get( fullDirectory, {} )
      if not metaForDir:
        self.log.warn( "Metadata not available for directory %s" % fullDirectory )
        continue

      # Fill in the accounting record
      self.log.info( "Fill the record for %s and metadata: %s " % ( dirLfn, metaForDir ) )
      res = self.fillAndSendAccountingRecord( dirLfn, metaForDir, now )
      if not res['OK']:
        return res
      for se in self.pfnUsage[ dirLfn ]:
        self.debug_seUsage_acc.setdefault( se, { 'Files': 0 , 'Size': 0 } )
        self.debug_seUsage_acc[ se ][ 'Files' ] += self.pfnUsage[ dirLfn ][ se ][ 'Files' ]
        self.debug_seUsage_acc[ se ][ 'Size' ] += self.pfnUsage[ dirLfn ][ se ][ 'Size' ]

    # Don't forget to commit the remaining records!
    self.__commitRecords()


  def execute( self ):
    if self.am_getOption( "CleanBefore", False ):
      self.log.notice( "Cleaning the DB" )
      result = self.__stDB.purgeOutdatedEntries( "/lhcb/user", self.am_getOption( "OutdatedSeconds", 86400 * 10 ) )
      if not result[ 'OK' ]:
        return result
      self.log.notice( "Purged %s outdated records" % result[ 'Value' ] )

    # User accounting (per user and SE)
    self.userStorageAccounting()
    # Accounting per top directory
    self.topDirectoryAccounting()
    # full production data accounting
    self.bkPathAccounting()


    self.log.notice( "-------------------------------------------------------------------------------------\n" )
    self.log.notice( "------ End of cycle report for DataStorage accounting--------------------------------\n" )
    self.log.notice( "Total directories found in FC:  %d " % len( self.dirDict ) )
    totalCallsToStorageUsage = self.callsToGetSummary + self.callsToDirectorySummary
    self.log.notice( "Total calls to StorageUsage: %d , took: %d s " % ( totalCallsToStorageUsage, self.genTotalTime ) )
    totalCallsToBkk = self.callsToBkkgetDirectoryMetadata + self.callsToBkkGetEvtType
    self.log.notice( "Total calls to DataUsage for cache: %d" % self.callsToDudbForMetadata )
    self.log.notice( "Total calls to Bookkeeping: %d (getDirectoryMetadata: %d, getEventType: %d)" % ( totalCallsToBkk,
                                                                                                       self.callsToBkkgetDirectoryMetadata,
                                                                                                       self.callsToBkkGetEvtType ) )
    self.log.notice( "Total records sent to accounting for DataStorage:  %d " % self.totalRecords )
    self.log.notice( "Directories not found in Bookkeeping: %d " % ( len( self.directoriesNotInBkk ) ) )
    fileName = os.path.join( self.__workDirectory, "directoriesNotInBkk.txt" )
    self.log.notice( "written to file: %s " % fileName )
    f = open( fileName, "w" )
    for d in self.directoriesNotInBkk:
      f.write( "%s\n" % d )
    f.close()
    # for DEBUG only
    self.log.info( "Summary of StorageUsage: files size " )
    for se in sorted( self.debug_seUsage ):
      self.log.info( "all: %s  %d %d Bytes ( %.2f TB ) " % ( se, self.debug_seUsage[ se ]['Files'],
                                                             self.debug_seUsage[ se ]['Size'],
                                                             self.debug_seUsage[ se ]['Size'] / 1.0e12 ) )
      if se in self.debug_seUsage_acc:
        self.log.info( "acc: %s  %d %d Bytes ( %.2f TB ) " % ( se, self.debug_seUsage_acc[ se ]['Files'],
                                                               self.debug_seUsage_acc[ se ]['Size'],
                                                               self.debug_seUsage_acc[ se ]['Size'] / 1.0e12 ) )
      else:
        self.log.info( "SE not in self.debug_seUsage_acc keys" )
    return S_OK()

  def __getMetadataForAcc( self, dirList ):
    """ Get metadata for a directory either from memory, from the storageDB or from BK """
    # Try and get the metadata from memory cache
    notFound = []
    metaForList = {}
    for dirName in dirList:
      metaForList[dirName] = self.cachedMetadata.get( dirName, {} )
      if not metaForList[dirName]:
        notFound.append( dirName )
    notInCache = []
    if notFound:
      self.log.info( "Memory metadata cache missed for %d directories" % len( notFound ) )
      self.log.verbose( "call getDirMetadata for (first 10): %s " % str( notFound[0:10] ) )
      for dirChunk in breakListIntoChunks( notFound, 10000 ):
        self.callsToDudbForMetadata += 1
        res = self.__dataUsageClient.getDirMetadata( dirChunk )  # this could be a bulk query for a list of directories
        if not res[ 'OK' ]:
          self.log.error( "Error retrieving %d directories meta-data %s " % ( len( dirChunk ), res['Message'] ) )
          # this usually happens when directories are removed from FC between the StorageUsageDB dump and this call,
          # if the Db is refreshed exactly in this time interval. Not really a problem.
          #######################3 just a try ##############################################3
          notInCache += dirChunk
          continue
        self.log.verbose( "getDirMetadata returned: %s " % str( res['Value'] ) )
        for dirName in dirChunk:
          # Compatibility with old (list for single file) and new (dictionary) service
          if type( res['Value'] ) == type( {} ):
            metaTuple = res['Value'].get( dirName, () )
          elif len( dirList ) == 1 and res['Value']:
            metaTuple = res['Value'][0]
          else:
            metaTuple = ()
          if metaTuple and metaTuple[3] is not None:
            metaForDir = metaForList[dirName]
            _dirID, metaForDir[ 'DataType' ], metaForDir[ 'Activity' ], metaForDir[ 'Conditions' ], metaForDir[ 'ProcessingPass' ], \
              metaForDir[ 'EventType' ], metaForDir[ 'FileType' ], metaForDir[ 'Production' ], metaForDir['Visibility'] = metaTuple
          else:
            notInCache.append( dirName )

      failedBK = []
      if notInCache:
        cachedFromBK = []
        self.log.info( "Directory metadata cache missed for %d directories => query BK and cache" % len( notInCache ) )
        for dirChunk in breakListIntoChunks( notInCache, 200 ):
          self.callsToBkkgetDirectoryMetadata += 1
          res = self.__bkClient.getDirectoryMetadata( dirChunk )
          if not res[ 'OK' ]:
            self.log.error( "Totally failed to query Bookkeeping", res[ 'Message' ] )
            failedBK += dirChunk
            for dirName in dirChunk:
              metaForDir = metaForList[dirName]
              _fillMetadata( metaForDir, 'FailedBkkQuery' )
          else:
            bkMetadata = res['Value']
            self.log.debug( "Successfully queried Bookkeeping, result: %s " % bkMetadata )
            for dirName in dirChunk:
              metaForDir = metaForList[dirName]
              # BK returns a list of metadata, chose the first one...
              metadata = bkMetadata['Successful'].get( dirName, [{}] )[0]
              if metadata and metadata.get( 'ConditionDescription' ) is not None:
                metadata['Visibility'] = metadata.pop( 'VisibilityFlag', metadata.get( 'Visibility', 'na' ) )
                # All is OK, directory found
                _fillMetadata( metaForDir, metadata )
                self.log.verbose( "Cache entry %s in DirMetadata table.." % dirName )
                resInsert = self.__dataUsageClient.insertToDirMetadata( { dirName: metadata} )
                if not resInsert[ 'OK' ]:
                  self.log.error( "Failed to cache metadata in DirMetadata table! %s " % resInsert[ 'Message' ] )
                else:
                  cachedFromBK.append( dirName )
                  self.log.verbose( "Successfully cached metadata for %s : %s" % ( dirName, str( metadata ) ) )
                  self.log.debug( "result: %s " % str( resInsert ) )
              else:
                # Directory not found
                self.log.verbose( "Directory %s not registered in Bookkeeping!" % dirName )
                _fillMetadata( metaForDir, 'notInBkk' )
                failedBK.append( dirName )
                self.directoriesNotInBkk.append( dirName )
              # Translate a few keys for accounting
              for bkName, accName in ( ( 'ConfigName', 'DataType' ), ( 'ConfigVersion', 'Activity' ), ( 'ConditionDescription', 'Conditions' ) ):
                metaForDir[accName] = metaForDir.pop( bkName, 'na' )
        self.log.info( 'Successfully cached %d directories from BK' % len( cachedFromBK ) )
        if self.directoriesNotInBkk:
          self.log.warn( '%d directories not found in BK' % len( self.directoriesNotInBkk ) )

      # cache locally the metadata
      for dirName in [dn for dn in notFound if dn not in failedBK]:
        metaForDir = metaForList[dirName]
        # Translate the numerical event type to a description string
        metaForDir['EventType'] = self.__getEventTypeDescription( metaForDir.pop( 'EventType', 'na' ) )
        self.cachedMetadata[ dirName] = metaForDir.copy()
    else:
      self.log.info( "Memory metadata cache hit for %d directories" % len( dirList ) )
    return metaForList

  def __commitRecords( self ):
    if self.recordsToCommit:
      res = gDataStoreClient.commit()
      if not res[ 'OK' ]:
        self.log.error( "Accounting ERROR: commit returned %s" % res )
      else:
        self.log.notice( "%d records committed " % self.recordsToCommit )
        self.recordsToCommit = 0
        self.log.notice( "commit for DataStorage returned: %s" % res )

  def generateStorageUsagePerDir( self ):
    '''Generate a dump of the StorageUsageDB and keep it in memory in a dictionary
       (new version of Apr 2012)
    '''

    start = time.time()
    self.log.notice( 'Starting from path: /lhcb/' )
    res = self.__stDB.getStorageDirectories( '/lhcb/' )
    if not res['OK']:
      return S_ERROR()
    totalDirList = res['Value']
    self.log.info( "Total directories retrieved from StorageUsageDB: %d " % len( totalDirList ) )
    # select only terminal directories (directories without sub-directories)
    # mc directory structure: /lhcb/MC/[year]/[file type]/[prod]/0000/ => len = 7
    # raw data:               /lhcb/data/2011/RAW/FULL/LHCb/COLLISION11/99983
    # => len 9 (under /lhcb/data/ from 2011 only RAW, before 2011 also other file types)
    # processed data: under both /lhcb/data and /lhcb/LHCb/
    #                         /lhcb/data/2010/DST/00009300/0000
    # data:                   /lhcb/LHCb/Collision12/ICHEP.DST/00017399/0000/
    self.dirDict = {}
    ignoredDirectories = dict.fromkeys( self.__ignoreDirsList, 0 )
    self.log.info( "Directories to be ignored: %s " % str( sorted( ignoredDirectories ) ) )
    for dirItem in totalDirList:
      # make sure that last character is a '/'
      dirItem = _standardDirectory( dirItem )
      splitDir = dirItem.split( '/' )
      if len( splitDir ) < 4:  # avoid picking up intermediate directories which don't contain files, like /lhcb/
        self.log.warn( "Directory %s skipped, as top directory" % dirItem )
        continue
      secDir = splitDir[ 2 ]
      if secDir in ignoredDirectories:
        self.log.verbose( "Directory to be ignored, skipped: %s " % dirItem )
        ignoredDirectories[ secDir ] += 1
        continue
      # for each type of directory (MC, reconstructed data and runs) check the format, in order not to count more than
      # once the productions with more than one sub-directory
      # for MC directories:
      # example: '/lhcb/MC/MC10/ALLSTREAMS.DST/00010908/0000/',
      # or        /lhcb/MC/2011/DST/00010870/0000
      # one directory for each file type
      # for data
      # /lhcb/LHCb/Collision11/SWIMSTRIPPINGD02KSPIPI.MDST/00019088/0000/
      # for raw data: /lhcb/data/2012/RAW/FULL/LHCb/COLLISION12/133784/
      try:
        dataType = splitDir[ -6 ]
        if dataType == "RAW":
          self.log.verbose( "RAW DATA directory: %s" % splitDir )
          directory = '/'.join( splitDir[:-1] )
          fullDirectory = directory
        else:
          suffix = splitDir[ -2 ]  # is the sub-directory suffix 0000, 0001, etc...
          self.log.verbose( "MC or reconstructed data directory: %s" % splitDir )
          if splitDir[-3] == 'HIST':
            directory = '/'.join( splitDir[:-1] )
            fullDirectory = directory
            self.log.verbose( "histo dir: %s " % directory )
          else:
            directory = '/'.join( splitDir[:-2] )
            fullDirectory = os.path.join( directory, suffix )
        directory = _standardDirectory( directory )
        fullDirectory = _standardDirectory( fullDirectory )
        if directory not in self.dirDict:
          self.dirDict[ directory ] = fullDirectory
        self.log.verbose( "Directory contains production files: %s " % directory )
      except:
        self.log.warn( "The directory has unexpected format: %s " % splitDir )

    self.lfnUsage = {}
    self.pfnUsage = {}
    totalDiscardedDirs = 0
    self.log.info( "Directories that have been discarded:" )
    for dd in ignoredDirectories:
      self.log.info( "/lhcb/%s - %d " % ( dd, ignoredDirectories[ dd ] ) )
      totalDiscardedDirs += ignoredDirectories[ dd ]
    self.log.info( "Total discarded directories: %d " % totalDiscardedDirs )
    self.log.info( "Retrieved %d dirs from StorageUsageDB containing prod files" % len( self.dirDict ) )
    self.log.info( "Getting the number of files and size from StorageUsage service" )
    for d in self.dirDict:
      self.log.verbose( "Get storage usage for directory %s " % d )
      res = self.__stDB.getDirectorySummaryPerSE( d )
      self.callsToDirectorySummary += 1
      if not res[ 'OK' ]:
        self.log.error( "Cannot retrieve PFN usage %s" % res['Message'] )
        continue
      if d not in self.pfnUsage:
        self.pfnUsage[ d ] = res['Value']
      self.log.verbose( "Get logical usage for directory %s " % d )
      res = self.__stDB.getSummary( d )
      self.callsToGetSummary += 1
      if not res[ 'OK' ]:
        self.log.error( "Cannot retrieve LFN usage %s" % res['Message'] )
        continue
      if not res['Value']:
        self.log.error( "For dir %s getSummary returned an empty value: %s " % ( d, str( res ) ) )
        continue
      self.lfnUsage.setdefault( d, {} )
      for retDir, dirInfo in res['Value'].iteritems():
        if d in retDir:
          self.lfnUsage[ d ][ 'LfnSize' ] = dirInfo['Size']
          self.lfnUsage[ d ][ 'LfnFiles'] = dirInfo['Files']
      self.log.verbose( "PFN usage: %s" % self.pfnUsage[ d ] )
      self.log.verbose( "LFN usage: %s" % self.lfnUsage[ d ] )


    end = time.time()
    self.genTotalTime = end - start
    self.log.info( "StorageUsageDB dump completed in %d s" % self.genTotalTime )

    return S_OK()

  def __getEventTypeDescription( self, eventType ):
    # convert eventType to string:
    try:
      eventType = int( eventType )
    except:
      pass
    # check that the event type description is in the cached dictionary, and otherwise query the Bkk
    if eventType not in self.eventTypeDescription:
      self.log.notice( "Event type description not available for eventTypeID %s, getting from Bkk" % eventType )
      res = self.__bkClient.getAvailableEventTypes()
      self.callsToBkkGetEvtType += 1
      if not res['OK']:
        self.log.error( "Error querying the Bkk: %s" % res['Message'] )
      else:
        self.eventTypeDescription.update( dict( res['Value'] ) )
      self.log.verbose( "Updated  self.eventTypeDescription dict: %s " % str( self.eventTypeDescription ) )
      # If still not found, log it!
      if eventType not in self.eventTypeDescription:
        self.log.error( "EventType %s is not in cached dictionary" % str( eventType ) )

    return self.eventTypeDescription.get( eventType, 'na' )

  def fillAndSendAccountingRecord( self, lfnDir, metadataDict, now ):
    ''' Create, fill and send to accounting a record for the DataStorage type.
    '''
    dataRecord = DataStorage()
    dataRecord.setStartTime( now )
    dataRecord.setEndTime( now )
    logicalSize = self.lfnUsage[ lfnDir ][ 'LfnSize' ]
    logicalFiles = self.lfnUsage[ lfnDir ][ 'LfnFiles' ]
    dataRecord.setValueByKey( "LogicalSize", logicalSize )
    dataRecord.setValueByKey( "LogicalFiles", logicalFiles )
    for key in ( 'DataType', 'Activity', 'FileType', 'Production', 'ProcessingPass', 'Conditions', 'EventType' ):
      dataRecord.setValueByKey( key, metadataDict.get( key, 'na' ) )
    self.log.verbose( ">>> Send DataStorage record to accounting:" )
    self.log.verbose( "\tlfnFiles: %d lfnSize: %d " % ( logicalFiles, logicalSize ) )

    for se in self.pfnUsage[ lfnDir ]:
      self.log.verbose( "Filling accounting record for se %s" % se )
      physicalSize = self.pfnUsage[ lfnDir ][ se ][ 'Size' ]
      physicalFiles = self.pfnUsage[ lfnDir ][ se ][ 'Files' ]

      dataRecord.setValueByKey( "StorageElement", se )
      dataRecord.setValueByKey( "PhysicalSize", physicalSize )
      dataRecord.setValueByKey( "PhysicalFiles", physicalFiles )
      self.log.verbose( "\t\tStorageElement: %s --> physFiles: %d  physSize: %d " % ( se, physicalFiles, physicalSize ) )

      # addRegister is NOT making a copy, therefore all records are otherwise overwritten
      res = gDataStoreClient.addRegister( copy.deepcopy( dataRecord ) )
      if not res[ 'OK']:
        self.log.error( "addRegister returned: %s" % res )
        return S_ERROR( "addRegister returned: %s" % res )
      # Reset logical information to zero in order to send it only once!
      dataRecord.setValueByKey( "LogicalSize", 0 )
      dataRecord.setValueByKey( "LogicalFiles", 0 )
      self.totalRecords += 1
      self.recordsToCommit += 1

    # Commit if necessary
    if self.recordsToCommit > self.limitForCommit:
      self.__commitRecords()

    return S_OK()
Example #22
0
class TransformationCleaningAgent(DiracTCAgent):
    """ .. class:: TransformationCleaningAgent
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        DiracTCAgent.__init__(self, *args, **kwargs)

        self.directoryLocations = ['TransformationDB', 'StorageUsage']
        self.archiveAfter = 7
        self.fileTypesToKeep = ['GAUSSHIST']

        self.bkClient = None
        self.transClient = None
        self.storageUsageClient = None

    #############################################################################

    def initialize(self):
        """ Standard initialize method for agents
    """
        DiracTCAgent.initialize(self)

        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days

        self.fileTypesToKeep = Operations().getValue(
            'Transformations/FileTypesToKeep', self.fileTypesToKeep)

        self.bkClient = BookkeepingClient()
        self.transClient = TransformationClient()
        self.storageUsageClient = StorageUsageClient()

        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """ clean the metadata using BKK and Data Manager. This method is a replacement of the one from base class

    :param self: self reference
    :param int transID: transformation ID
    """
        res = self.bkClient.getProductionFiles(transID, 'ALL', 'Yes')
        if not res['OK']:
            return res
        bkMetadata = res['Value']
        fileToRemove = []
        yesReplica = []
        self.log.info(
            "Found a total of %d files in the BK for transformation %d" %
            (len(bkMetadata), transID))
        for lfn, metadata in bkMetadata.iteritems():
            if metadata['FileType'] != 'LOG':
                fileToRemove.append(lfn)
                if metadata['GotReplica'] == 'Yes':
                    yesReplica.append(lfn)
        if fileToRemove:
            self.log.info(
                "Attempting to remove %d possible remnants from the catalog and storage"
                % len(fileToRemove))
            # Executing with shifter proxy
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'false')
            res = DataManager().removeFile(fileToRemove, force=True)
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'true')
            if not res['OK']:
                return res
            for lfn, reason in res['Value']['Failed'].iteritems():
                self.log.error("Failed to remove file found in BK",
                               "%s %s" % (lfn, reason))
            if res['Value']['Failed']:
                return S_ERROR("Failed to remove all files found in the BK")
            if yesReplica:
                self.log.info(
                    "Ensuring that %d files are removed from the BK" %
                    (len(yesReplica)))
                res = FileCatalog(
                    catalogs=['BookkeepingDB']).removeFile(yesReplica)
                if not res['OK']:
                    return res
                for lfn, reason in res['Value']['Failed'].iteritems():
                    self.log.error("Failed to remove file from BK",
                                   "%s %s" % (lfn, reason))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to remove all files from the BK")
        self.log.info("Successfully removed all files found in the BK")
        return S_OK()

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """

        res = DiracTCAgent.getTransformationDirectories(self, transID)

        if not res['OK']:
            return res

        directories = res['Value']
        if isinstance(directories, basestring):  # Check for (stupid) formats
            directories = ast.literal_eval(directories)
            if not isinstance(directories, list):
                return S_ERROR("Wrong format of output directories")

        if 'StorageUsage' in self.directoryLocations:
            res = self.storageUsageClient.getStorageDirectories(
                '', '', transID, [])
            if not res['OK']:
                self.log.error("Failed to obtain storage usage directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")

        # We should be removing from the list of directories
        # those directories created for file types that are part of those:
        # - uploaded (as output files)
        # - not merged by subsequent steps
        # but this is pretty difficult to identify at run time, so we better remove the "RemovingFiles" production status
        # and replace it with a flush (this applies only to MC).
        # So we just have a created list.
        fileTypesToKeepDirs = []
        for fileTypeToKeep in self.fileTypesToKeep:
            fileTypesToKeepDirs.extend(
                [x for x in directories if fileTypeToKeep in x])
        directories = list(
            set(directories).difference(set(fileTypesToKeepDirs)))

        directories = sorted(directories)
        return S_OK(directories)
Example #23
0
class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)
Example #24
0
########################################################################
"""
  Insert new file types in the Bookkeeping
"""
__RCSID__ = "$Id$"

import DIRAC
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1],
                                     'Usage:',
                                     '  %s [option|cfgfile]' % Script.scriptName ]))
Script.parseCommandLine(ignoreErrors=True)

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

exitCode = 0

ftype = raw_input("FileType: ")
desc = raw_input("Description: ")
version = raw_input("File type version: ")
print 'Do you want to add this new file type? (yes or no)'
value = raw_input('Choice:')
choice = value.lower()
if choice in ['yes', 'y']:
  res = bk.insertFileTypes(ftype.upper(), desc, version)
  if res['OK']:
    print 'The file types added successfully!'
  else:
    print "Error discovered!", res['Message']
Example #25
0
Script.setUsageMessage('\n'.join([
    __doc__.split('\n')[1], 'Usage:',
    '  %s [option|cfgfile] ... ProdID' % Script.scriptName, 'Arguments:',
    '  ProdID:   Production ID'
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()
import types

if len(args) < 1:
    Script.showHelp()

exitCode = 0

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()
prod = long(args[0])

res = bk.getProductionInformations(prod)

if res['OK']:
    val = res['Value']
    print "Production Info: "

    infs = val["Production informations"]
    if infs != None:
        for inf in infs:
            if inf[2] != None:
                print '    Configuration Name:', inf[0]
                print '    Configuration Version:', inf[1]
                print '    Event type:', inf[2]
Example #26
0
class PopularityAgent(AgentModule):
    """
  .. class:: PopularityAgent

  """
    # # DataUsageClient
    __dataUsageClient = None
    # # StorageUsageDB instance or DMS/DataUsage RPS client
    __stDB = None
    # # BKK Client
    __bkClient = None
    # # work directory
    __workDirectory = None
    # # counter for records to be sent to the accouting
    numPopRows = None

    def initialize(self):
        """ agent initialisation """
        self.am_setOption('PollingTime', 43200)
        if self.am_getOption('DirectDB', False):
            self.__stDB = StorageUsageDB()
            # self.bkClient = BookkeepingClient()#the necessary method is still not available in Bookk. client
        else:
            self.__stDB = RPCClient('DataManagement/DataUsage')
            timeout = 600
        self.__bkClient = BookkeepingClient()
        self.__dataUsageClient = DataUsageClient()
        self.__workDirectory = self.am_getOption("WorkDirectory")
        mkDir(self.__workDirectory)
        self.log.info("Working directory is %s" % self.__workDirectory)
        # by default, collects raw records from Popularity table inserted in the last day
        self.timeInterval = self.am_getOption(
            'timeIntervalForPopularityRecords', 1)
        self.queryTimeout = self.am_getOption('queryTimeout', 3600)
        self.cacheMetadata = {}
        self.limitForCommit = self.am_getOption("LimitForCommit", 1000)

        return S_OK()

# .........................................................................................

    def execute(self):
        """ Main loop of Popularity agent """

        now = datetime.now()
        endTime = datetime(now.year, now.month, now.day, 0, 0, 0)
        startTime = endTime - timedelta(days=self.timeInterval)
        endTimeQuery = endTime.isoformat()
        startTimeQuery = startTime.isoformat()
        # query all traces in popularity in the time rage startTime,endTime and status =new
        # the condition to get th etraces is the AND of the time range and the status new
        self.log.info(
            "Querying Pop db to retrieve entries in time range %s - %s " %
            (startTimeQuery, endTimeQuery))
        status = 'New'
        res = self.__dataUsageClient.getDataUsageSummary(
            startTimeQuery, endTimeQuery, status, timeout=self.queryTimeout)
        if not res['OK']:
            self.log.error("Error querying Popularity table.. %s" %
                           res['Message'])
            return S_ERROR(res['Message'])
        val = res['Value']
        self.log.info("Retrieved %d entries from Popularity table" % len(val))
        # Build popularity report, and store the Ids in a  list:
        idList = set()
        traceDict = {}
        for row in val:
            self.log.debug("row: %s" % str(row))
            rowId, dirLfn, site, count, insertTime = row
            if rowId not in idList:
                idList.add(rowId)
            else:
                self.log.error("Same Id found twice! %d " % rowId)
                continue
            if dirLfn.startswith('/lhcb/user/'):
                self.log.verbose(
                    "Private user directory. No metadata stored in Bkk %s " %
                    dirLfn)
                continue
            # get the day (to do )
            dayBin = (insertTime - startTime).days
            traceDict[dayBin][dirLfn][site] = \
                traceDict.setdefault(dayBin, {}).setdefault(dirLfn, {}).setdefault(site, 0) + count

        # print a summary
        dayList = sorted(traceDict)
        for day in dayList:
            self.log.info(" ###### day %s (starting from %s ) " %
                          (day, startTimeQuery))
            self.log.info("---- %d directories touched:" % len(traceDict[day]))
            for lfn in traceDict[day]:
                self.log.verbose(" ---- lfn %s " % lfn)
                for site in traceDict[day][lfn]:
                    self.log.verbose(" -------- site  %s  count: %d " %
                                     (site, traceDict[day][lfn][site]))

        self.log.info("Retrieve meta-data information for each directory ")
        now = Time.dateTime()
        self.numPopRows = 0  # keep a counter of the records to send to accounting data-store
        for day in traceDict:
            timeForAccounting = self.computeTimeForAccounting(startTime, day)
            self.log.info("Processing day %s - time for accounting %s " %
                          (day, timeForAccounting))
            for dirLfn in traceDict[day]:
                # did = configName = configVersion = conditions = processingPass = eventType = fileType = production = "na"
                # retrieve the directory meta-data from the DirMetadata table
                self.log.info("Processing dir %s " % dirLfn)

                metaForDir = self.cacheMetadata.get(dirLfn)
                if not metaForDir:
                    dirList = [dirLfn]
                    # this could be done in a bulk query for a list of directories... TBD
                    res = self.__dataUsageClient.getDirMetadata(dirList)
                    if not res['OK']:
                        self.log.error(
                            "Error retrieving directory meta-data %s " %
                            res['Message'])
                        continue
                    dirMetadata = res['Value'].get(dirLfn)
                    if not res['Value'] or not dirMetadata:
                        self.log.info(
                            "Cache missed: query BK to retrieve '%s' metadata and store  cache"
                            % dirList)
                        res = self.__bkClient.getDirectoryMetadata(dirList)
                        if not res['OK']:
                            self.log.error("Failed to query Bookkeeping %s" %
                                           res['Message'])
                            metadata = None
                        else:
                            self.log.verbose(
                                "Successfully queried Bookkeeping, result: %s "
                                % res)
                            metadata = res['Value'].get('Successful',
                                                        {}).get(dirLfn,
                                                                [{}])[0]
                        if not metadata:
                            self.log.warn(
                                "Directory is not registered in Bookkeeping! %s "
                                % dirLfn)
                            configName = configVersion = conditions = processingPass = eventType = fileType = production = "na"
                        else:
                            metadata['Visibility'] = metadata.pop(
                                'VisibilityFlag',
                                metadata.get('Visibility', 'na'))
                            configName = metadata['ConfigName']
                            configVersion = metadata['ConfigVersion']
                            conditions = metadata['ConditionDescription']
                            processingPass = metadata['ProcessingPass']
                            eventType = metadata['EventType']
                            fileType = metadata['FileType']
                            production = metadata['Production']

                            self.log.info(
                                "Cache this entry in DirMetadata table..")
                            res = self.__dataUsageClient.insertToDirMetadata(
                                {dirLfn: metadata})
                            if not res['OK']:
                                self.log.error(
                                    "Failed to insert metadata in DirMetadata table! %s "
                                    % res['Message'])
                            else:
                                self.log.info(
                                    "Successfully inserted metadata for directory %s in DirMetadata table "
                                    % dirLfn)
                                self.log.verbose("result: %s " % res)

                    else:
                        self.log.info(
                            "Directory %s was cached in DirMetadata table" %
                            dirLfn)
                        try:
                            __did, configName, configVersion, conditions, \
                                processingPass, eventType, fileType, production = dirMetadata[0:8]
                        except BaseException:
                            self.log.error(
                                "Error decoding directory cached information",
                                dirMetadata)
                            continue
                    self.cacheMetadata[dirLfn] = (configName, configVersion,
                                                  conditions, processingPass,
                                                  eventType, fileType,
                                                  production)
                else:
                    configName, configVersion, conditions, processingPass, eventType, fileType, production = metaForDir

                for site in traceDict[day][dirLfn]:
                    usage = traceDict[day][dirLfn][site]
                    # compute the normalized usage, dividing by the number of files in the directory:
                    normUsage = usage  # to be done! after we have decided how to normalize
                    # Build record for the accounting
                    popRecord = Popularity()
                    popRecord.setStartTime(timeForAccounting)
                    popRecord.setEndTime(timeForAccounting)
                    popRecord.setValueByKey("DataType", configName)
                    popRecord.setValueByKey("Activity", configVersion)
                    popRecord.setValueByKey("FileType", fileType)
                    popRecord.setValueByKey("Production", production)
                    popRecord.setValueByKey("ProcessingPass", processingPass)
                    popRecord.setValueByKey("Conditions", conditions)
                    popRecord.setValueByKey("EventType", eventType)
                    popRecord.setValueByKey("StorageElement", site)
                    popRecord.setValueByKey("Usage", usage)
                    popRecord.setValueByKey("NormalizedUsage", normUsage)
                    res = gDataStoreClient.addRegister(popRecord)
                    if not res['OK']:
                        self.log.error("ERROR: addRegister returned: %s" %
                                       res['Message'])
                        continue
                    self.numPopRows += 1
                    self.log.info(
                        ">>> Sending record to accounting for: %s %s %s %s %s %s %s %s %s %d %d "
                        % (timeForAccounting, configName, configVersion,
                           fileType, production, processingPass, conditions,
                           eventType, site, usage, normUsage))
                    if self.numPopRows > self.limitForCommit:
                        res = self.__commitAccounting()
                        if not res['OK']:
                            return res
        # then set the status to Used
        res = self.__commitAccounting()
        if not res['OK']:
            return res
        self.log.info("Set the status to Used for %d entries" % len(idList))
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for idChunk in breakListIntoChunks(list(idList), 1000):
            res = self.__dataUsageClient.updatePopEntryStatus(
                list(idChunk), 'Used', timeout=self.queryTimeout)
            if not res['OK']:
                self.log.error(
                    "Error to update status in  Popularity table.. %s" %
                    res['Message'])
                return res
        self.log.info("Status updated to Used correctly for %s entries " %
                      len(idList))

        return S_OK()


# .........................................................................................

    def __commitAccounting(self):
        res = gDataStoreClient.commit()
        if not res['OK']:
            self.log.error(
                "while committing %d Popularity records" % self.numPopRows,
                res['Message'])
        else:
            self.log.info(
                "%s records for Popularity type successfully committed" %
                self.numPopRows)
            self.numPopRows = 0
        return res

    def computeTimeForAccounting(self, startTime, day):
        """ Compute the time for the accounting record, starting from the start time of the query and the day bin
    """
        self.log.verbose(
            "find time for accounting for startTime: %s + day %s " %
            (startTime, day))
        daysToAdd = timedelta(
            days=day, hours=12
        )  # add 12h just to put the value in the middle of time bin
        self.log.verbose("timedelta to add: %s " % daysToAdd)
        accTime = startTime + daysToAdd
        self.log.verbose("accTime = %s " % accTime)
        return accTime