from DIRAC.Core.Base import Script

Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1],
                                     'Usage:',
                                     '  %s [option|cfgfile] ... Run Flag' % Script.scriptName,
                                     'Arguments:',
                                     '  Run:      Run number',
                                     '  Flag:     Quality Flag' ] ) )
Script.parseCommandLine( ignoreErrors = True )
args = Script.getPositionalArgs()

from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
bk = BookkeepingClient()

if len( args ) < 2:
  result = bk.getAvailableDataQuality()
  if not result['OK']:
    print 'ERROR: %s' % ( result['Message'] )
    DIRAC.exit( 2 )
  flags = result['Value']
  print "Available Data Quality Flags"
  for flag in flags:
    print flag
  Script.showHelp()

exitCode = 0
rnb = int( args[0] )
flag = args[1]
result = bk.setRunDataQuality( rnb, flag )

if not result['OK']:
Exemple #2
0
class DiracLHCb(Dirac):

  #############################################################################
  def __init__(self, withRepo=False, repoLocation='', operationsHelperIn=None):
    """Internal initialization of the DIRAC API.
    """

    super(DiracLHCb, self).__init__(withRepo=withRepo, repoLocation=repoLocation)
    self.tier1s = []

    if not operationsHelperIn:
      self.opsH = Operations()
    else:
      self.opsH = operationsHelperIn

    self._bkQueryTemplate = {'SimulationConditions': 'All',
                             'DataTakingConditions': 'All',
                             'ProcessingPass': '******',
                             'FileType': 'All',
                             'EventType': 'All',
                             'ConfigName': 'All',
                             'ConfigVersion': 'All',
                             'Production': 0,
                             'StartRun': 0,
                             'EndRun': 0,
                             'DataQuality': 'All',
                             'Visible': 'Yes'}
    self._bkClient = BookkeepingClient()  # to expose all BK client methods indirectly

  #############################################################################
  def addRootFile(self, lfn, fullPath, diracSE, printOutput=False):
    """ Add a Root file to Grid storage, an attempt is made to retrieve the
        POOL GUID of the file prior to upload.

       Example Usage:

       >>> print dirac.addFile('/lhcb/user/p/paterson/myRootFile.tar.gz','myFile.tar.gz','CERN-USER')
       {'OK': True, 'Value':{'Failed': {},
        'Successful': {'/lhcb/user/p/paterson/test/myRootFile.tar.gz': {'put': 64.246301889419556,
                                                                    'register': 1.1102778911590576}}}}

       @param lfn: Logical File Name (LFN)
       @type lfn: string
       @param diracSE: DIRAC SE name e.g. CERN-USER
       @type diracSE: strin
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def addFile(self, lfn, fullPath, diracSE, printOutput=False):  # pylint: disable=arguments-differ
    """ Copy of addRootFile
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def getBKAncestors(self, lfns, depth=1, replica=True):
    """ This function allows to retrieve ancestor files from the Bookkeeping.

        Example Usage:

        >>> dirac.getBKAncestors('/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',2)
        {'OK': True, 'Value': ['/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',
        '/lhcb/data/2009/RAW/FULL/LHCb/COLLISION09/63807/063807_0000000004.raw']}

       @param lfn: Logical File Name (LFN)
       @type lfn: string or list
       @param depth: Ancestor depth
       @type depth: integer
    """

    result = self._bkClient.getFileAncestors(lfns, depth, replica=replica)
    if not result['OK']:
      self.log.error('Could not get ancestors', result['Message'])
      return result
    ancestors = set(x['FileName'] for ancestors in result['Value']['Successful'].itervalues() for x in ancestors)

    return S_OK(lfns + list(ancestors))

  #############################################################################
  def bkQueryRunsByDate(self, bkPath, startDate, endDate, dqFlag='All', selection='Runs'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ConfigurationName>/<Configuration Version>/<Condition Description><Processing Pass>/<Event Type>/<File Type>

        so an example for 2016 collisions data would be:

        /LHCb/Collision09//LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real Data/Reco16/Stripping26/90000000/EW.DST

        The startDate and endDate must be specified as yyyy-mm-dd.

        Runs can be selected based on their status e.g. the selection parameter
        has the following possible attributes:
         - Runs - data for all runs in the range are queried (default)
         - ProcessedRuns - data is retrieved for runs that are processed
         - NotProcessed - data is retrieved for runs that are not yet processed.

       Example Usage:

       >>> dirac.bkQueryRunsByDate('/LHCb/Collision16//Real Data/90000000/RAW',
                                   '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

      dirac.bkQueryRunsByDate('/LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real'
                              'Data/Reco16/Stripping26/90000000/EW.DST',
                              '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @param startDate: Start date  yyyy-mm-dd
       @param startDate: string
       @param endDate: End date  yyyy-mm-dd
       @param endDate: string
       @param selection: Either Runs, ProcessedRuns or NotProcessed
       @param selection: string
       @return: S_OK,S_ERROR
    """
    runSelection = ['Runs', 'ProcessedRuns', 'NotProcessed']
    if selection not in runSelection:
      return S_ERROR('Expected one of %s not "%s" for selection' % (', '.join(runSelection), selection))

    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkQuery = BKQuery().buildBKQuery(bkPath)
    if not bkQuery:
      return S_ERROR(
          'Please provide a BK path: '
          '/<ConfigurationName>/<Configuration Version>/<Condition Description>/<Processing Pass>'
          '/<Event Type>/<File Type>')

    if not startDate or not endDate:
      return S_ERROR('Expected both start and end dates to be defined in format: yyyy-mm-dd')

    if not isinstance(startDate, str) or not isinstance(endDate, str):
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    if not len(startDate.split('-')) == 3 or not len(endDate.split('-')) == 3:
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    start = time.time()
    result = self._bkClient.getRunsForAGivenPeriod({'StartDate': startDate, 'EndDate': endDate})
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)
    if not result['OK']:
      self.log.info('Could not get runs with given dates from BK with result: "%s"' % result)
      return result

    if not result['Value']:
      self.log.info('No runs selected from BK for specified dates')
      return result

    if selection not in result['Value']:
      return S_ERROR('No %s runs for specified dates' % (selection))

    runs = result['Value'][selection]
    self.log.info('Found the following %s runs:\n%s' % (len(runs), ', '.join([str(i) for i in runs])))
    # temporary until we can query for a discrete list of runs
    selectedData = []
    for run in runs:
      query = bkQuery.copy()
      query['StartRun'] = run
      query['EndRun'] = run
      query['CheckRunStatus'] = True if selection in ['ProcessedRuns', 'NotProcessed'] else False
      if dqFlag:
        check = self.__checkDQFlags(dqFlag)
        if not check['OK']:
          return check
        dqFlag = check['Value']
        query['DataQuality'] = dqFlag
      start = time.time()
      result = self._bkClient.getVisibleFilesWithMetadata(query)
      rtime = time.time() - start
      self.log.info('BK query time: %.2f sec' % rtime)
      self.log.verbose(result)
      if not result['OK']:
        return result
      self.log.info('Selected %s files for run %s' % (len(result['Value']), run))
      if result['Value']['LFNs']:
        selectedData += result['Value']['LFNs'].keys()

    self.log.info('Total files selected = %s' % (len(selectedData)))
    return S_OK(selectedData)

  #############################################################################
  def bkQueryRun(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<Run Number>/<Processing Pass>/<Event Type>/<File Type>

        so an example for 2009 collisions data would be:

       /63566/Real Data + RecoToDST-07/90000000/DST

       In addition users can specify a range of runs using the following convention:

       /<Run Number 1> - <Run Number 2>/<Processing Pass>/<Event Type>/<File Type>

       so extending the above example this would look like:

       /63566-63600/Real Data + RecoToDST-07/90000000/DST

       Example Usage:

       >>> dirac.bkQueryRun('/63566/Real Data/RecoToDST-07/90000000/DST')
       {'OK':True,'Value': ['/lhcb/data/2009/DST/00005842/0000/00005842_00000008_1.dst']}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if not len(bkPath) == 4:
      return S_ERROR('Expected 4 components to the BK path: /<Run Number>/<Processing Pass>/<Event Type>/<File Type>')

    runNumberString = bkPath[0].replace('--', '-').replace(' ', '')
    startRun = 0
    endRun = 0
    if '-' in runNumberString:
      runs = runNumberString.split('-')
      if len(runs) != 2:
        return S_ERROR('Could not determine run range from "%s", try "<Run 1> - <Run2>"' % (runNumberString))
      try:
        start = int(runs[0])
        end = int(runs[1])
      except Exception:
        return S_ERROR('Invalid run range: %s' % runNumberString)
      startRun = min(start, end)
      endRun = max(start, end)
    else:
      try:
        startRun = int(runNumberString)
        endRun = startRun
      except Exception:
        return S_ERROR('Invalid run number: %s' % runNumberString)

    query = self._bkQueryTemplate.copy()
    query['StartRun'] = startRun
    query['EndRun'] = endRun
    query['ProcessingPass'] = bkPath[1]
    query['EventType'] = bkPath[2]
    query['FileType'] = bkPath[3]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryProduction(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ProductionID>/[<Processing Pass>/<Event Type>/]<File Type>

        so an example for 2009 collisions data would be:

       /5842/Real Data + RecoToDST-07/90000000/DST

       Note that neither the processing pass nor the event type should be necessary. So either of them can be omitted

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryProduction('/5842/Real Data/RecoToDST-07/90000000/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if len(bkPath) < 2:
      return S_ERROR('Invalid bkPath: should at least contain /ProductionID/FileType')
    query = self._bkQueryTemplate.copy()
    try:
      query['Production'] = int(bkPath[0])
    except Exception:
      return S_ERROR('Invalid production ID')
    query['FileType'] = bkPath[-1]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    for key, val in query.items():
      if isinstance(val, basestring) and val.lower() == 'all':
        query.pop(key)
    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryPath(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

       /<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>
       /<Processing Pass>/<Event Type>/<File Type>

       so an example for 2009 collsions data would be:

       /LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data + RecoToDST-07/90000000/DST

       or for MC09 simulated data:

       /MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/2010-Sim01Reco01-withTruth/27163001/DST

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryPath('/MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/Sim07/Reco06-withTruth/10012004/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=3)
    if not len(bkPath) == 6:
      return S_ERROR('Expected 6 components to the BK path: '
                     '/<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>'
                     '/<Processing Pass>/<Event Type>/<File Type>')

    query = self._bkQueryTemplate.copy()
    query['ConfigName'] = bkPath[0]
    query['ConfigVersion'] = bkPath[1]
    query['ProcessingPass'] = bkPath[3]
    query['EventType'] = bkPath[4]
    query['FileType'] = bkPath[5]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    # The problem here is that we don't know if it's a sim or data taking condition,
    # assume that if configName=MC this is simulation
    if bkPath[0].lower() == 'mc':
      query['SimulationConditions'] = bkPath[2]
    else:
      query['DataTakingConditions'] = bkPath[2]

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bookkeepingQuery(self, SimulationConditions='All', DataTakingConditions='All',
                       ProcessingPass='******', FileType='All', EventType='All', ConfigName='All',
                       ConfigVersion='All', ProductionID=0, DataQuality='ALL'):
    """ This function will create and perform a BK query using the supplied arguments
        and return a list of LFNs.

        Example Usage:

        >>> dirac.bookkeepingQuery(ConfigName='LHCb',ConfigVersion='Collision09',
        EventType='90000000',ProcessingPass='******',DataTakingConditions='Beam450GeV-VeloOpen-MagDown')
        {'OK':True,'Value':<files>}

       @param  ConfigName: BK ConfigName
       @type ConfigName: string
       @param  EventType: BK EventType
       @type EventType: string
       @param  FileType: BK FileType
       @type FileType: string
       @param  ProcessingPass: BK ProcessingPass
       @type ProcessingPass: string
       @param  ProductionID: BK ProductionID
       @type ProductionID: integer
       @param  DataQuality: BK DataQuality
       @type DataQuality: string
       @param  ConfigVersion: BK ConfigVersion
       @type ConfigVersion: string
       @param  DataTakingConditions: BK DataTakingConditions
       @type DataTakingConditions: string
       @param  SimulationConditions: BK SimulationConditions
       @type SimulationConditions: string
       @return: S_OK,S_ERROR
    """
    query = self._bkQueryTemplate.copy()
    query['SimulationConditions'] = SimulationConditions
    query['DataTakingConditions'] = DataTakingConditions
    query['ProcessingPass'] = ProcessingPass
    query['FileType'] = FileType
    query['EventType'] = EventType
    query['ConfigName'] = ConfigName
    query['ConfigVersion'] = ConfigVersion
    query['Production'] = ProductionID
    query['DataQuality'] = DataQuality
    return self.bkQuery(query)

  #############################################################################
  def bkQuery(self, bkQueryDict):
    """ Developer function. Perform a query to the LHCb Bookkeeping to return
        a list of LFN(s). This method takes a BK query dictionary.

        Example Usage:

        >>> print dirac.bkQuery(query)
        {'OK':True,'Value':<files>}

       @param bkQueryDict: BK query
       @type bkQueryDict: dictionary (see bookkeepingQuery() for keys)
       @return: S_OK,S_ERROR
    """
    problematicFields = []
    # Remove the Visible flag as anyway the method is for visible files ;-)
    # bkQueryDict.setdefault( 'Visible', 'Yes' )
    for name, value in bkQueryDict.items():
      if name not in self._bkQueryTemplate:
        problematicFields.append(name)

    if problematicFields:
      msg = 'The following fields are not valid for a BK query: %s\nValid fields include: %s' % \
            (', '.join(problematicFields), ', '.join(self._bkQueryTemplate.keys()))
      return S_ERROR(msg)

    for name, value in bkQueryDict.items():
      if name == "Production" or name == "EventType" or name == "StartRun" or name == "EndRun":
        if value == 0:
          del bkQueryDict[name]
        else:
          bkQueryDict[name] = str(value)
      elif name == "FileType":
        if value.lower() == "all":
          bkQueryDict[name] = 'ALL'
      else:
        if str(value).lower() == "all":
          del bkQueryDict[name]

    if 'Production' in bkQueryDict or 'StartRun' in bkQueryDict or 'EndRun' in bkQueryDict:
      self.log.verbose('Found a specific query so loosening some restrictions to prevent BK overloading')
    else:
      if 'SimulationConditions' not in bkQueryDict and 'DataTakingConditions' not in bkQueryDict:
        return S_ERROR('A Simulation or DataTaking Condition must be specified for a BK query.')
      if 'EventType' not in bkQueryDict and 'ConfigName' not in bkQueryDict and 'ConfigVersion' not in bkQueryDict:
        return S_ERROR(
            'The minimal set of BK fields for a query is: EventType, ConfigName and ConfigVersion'
            ' in addition to a Simulation or DataTaking Condition')

    self.log.verbose('Final BK query dictionary is:')
    for item in bkQueryDict.iteritems():
      self.log.verbose('%s : %s' % item)

    start = time.time()
    result = self._bkClient.getVisibleFilesWithMetadata(bkQueryDict)
#    result = bk.getFilesWithGivenDataSets(bkQueryDict)
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)

    if not result['OK']:
      return S_ERROR('BK query returned an error: "%s"' % (result['Message']))

    if not result['Value']:
      return self._errorReport('No BK files selected')

    returnedFiles = len(result['Value'])
    self.log.verbose('%s files selected from the BK' % (returnedFiles))
    return result

  #############################################################################
  def __checkDQFlags(self, flags):
    """ Internal function.  Checks the provided flags against the list of
        possible DQ flag statuses from the Bookkeeping.
    """
    dqFlags = []
    if isinstance(flags, list):
      dqFlags = flags
    else:
      dqFlags = [flags]

    bkFlags = self.getAllDQFlags()
    if not bkFlags['OK']:
      return bkFlags

    final = []
    for flag in dqFlags:
      if flag.lower() == 'all':
        final.append(flag.upper())
      else:
        flag = flag.upper()
        if flag not in bkFlags['Value']:
          msg = 'Specified DQ flag "%s" is not in allowed list: %s' % (flag, ', '.join(bkFlags['Value']))
          self.log.error(msg)
          return S_ERROR(msg)
        else:
          final.append(flag)

    # when first coding this it was not possible to use a list ;)
    if len(final) == 1:
      final = final[0]

    return S_OK(final)

  #############################################################################
  def getAllDQFlags(self, printOutput=False):
    """ Helper function.  Returns the list of possible DQ flag statuses
        from the Bookkeeping.

        Example Usage:

        >>> print dirac.getAllDQFlags()
        {'OK':True,'Value':<flags>}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    result = self._bkClient.getAvailableDataQuality()
    if not result['OK']:
      self.log.error('Could not obtain possible DQ flags from BK with result:\n%s' % (result))
      return result

    if printOutput:
      flags = result['Value']
      self.log.info('Possible DQ flags from BK are: %s' % (', '.join(flags)))

    return result

  #############################################################################
  def getDataByRun(self, lfns, printOutput=False):
    """Sort the supplied lfn list by run. An S_OK object will be returned
       containing a dictionary of runs and the corresponding list of LFN(s)
       associated with them.

       Example usage:

       >>> print dirac.getDataByRun(lfns)
       {'OK': True, 'Value': {<RUN>:['<LFN>','<LFN>',...], <RUN>:['<LFN>',..]}}


       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    runDict = {}
    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    for lfn, metadata in result['Value']['Successful'].items():
      if 'RunNumber' in metadata:
        runNumber = metadata['RunNumber']
        runDict.setdefault(runNumber, []).append(lfn)
      else:
        self.log.warn('Could not find run number from BK for %s' % (lfn))

    if printOutput:
      self.log.notice(self.pPrint.pformat(runDict))

    return S_OK(runDict)

  #############################################################################
  def bkMetadata(self, lfns, printOutput=False):
    """Return metadata for the supplied lfn list. An S_OK object will be returned
       containing a dictionary of LFN(s) and the corresponding metadata associated
       with them.

       Example usage:

       >>> print dirac.bkMetadata(lfns)
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    if printOutput:
      self.log.notice(self.pPrint.pformat(result['Value']))

    return result

  #############################################################################

  def lhcbProxyInit(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-init script
    """
    os.system("dirac-proxy-init -o LogLevel=NOTICE -t --rfc %s" % "' '".join(args))

  #############################################################################

  def lhcbProxyInfo(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-info script
    """
    os.system("dirac-proxy-info -o LogLevel=NOTICE %s" % "' '".join(args))
  #############################################################################

  def gridWeather(self, printOutput=False):
    """This method gives a snapshot of the current Grid weather from the perspective
       of the DIRAC site and SE masks.  Tier-1 sites are returned with more detailed
       information.

       Example usage:

       >>> print dirac.gridWeather()
       {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    lcgSites = gConfig.getSections('/Resources/Sites/LCG')
    if not lcgSites['OK']:
      return lcgSites

    for lcgSite in lcgSites['Value']:

      tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2)
      if tier in (0, 1):
        self.tier1s.append(lcgSite)

    siteInfo = self.checkSites()
    if not siteInfo['OK']:
      return siteInfo
    siteInfo = siteInfo['Value']

    seInfo = self.checkSEs()
    if not seInfo['OK']:
      return seInfo
    seInfo = seInfo['Value']

    tierSEs = {}
    for site in self.tier1s:
      tierSEs[site] = getSEsForSite(site)['Value']

    tierInfo = {}
    for site, seList in tierSEs.items():
      tierInfo[site] = {}
      for se in seList:
        if se in seInfo:
          tierSEInfo = seInfo[se]
          tierInfo[site][se] = tierSEInfo
      if site in siteInfo['AllowedSites']:
        tierInfo[site]['MaskStatus'] = 'Allowed'
      else:
        tierInfo[site]['MaskStatus'] = 'Banned'

    if printOutput:
      self.log.notice('========> Tier-1 status in DIRAC site and SE masks')
      for site in sorted(self.tier1s):
        self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus']))
        self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))
        for se in sorted(tierSEs[site]):
          if se in tierInfo[site]:
            self.log.notice('%s %s %s' % (se.ljust(25),
                                          tierInfo[site][se]['ReadStatus'].rjust(15),
                                          tierInfo[site][se]['WriteStatus'].rjust(15))
                            )

      self.log.notice('\n========> Tier-2 status in DIRAC site mask\n')
      allowedSites = siteInfo['AllowedSites']
      bannedSites = siteInfo['BannedSites']
      for site in self.tier1s:
        if site in allowedSites:
          allowedSites.remove(site)
        if site in bannedSites:
          bannedSites.remove(site)
      self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites)))

    summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo}
    return S_OK(summary)

  #############################################################################
  def checkSites(self, printOutput=False):  # pylint: disable=no-self-use
    """Return the list of sites in the DIRAC site mask and those which are banned.

       Example usage:

       >>> print dirac.checkSites()
       {'OK': True, 'Value': {'AllowedSites':['<Site>',...],'BannedSites':[]}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    res = getSites()
    if not res['OK']:
      self.log.error('Could not get list of sites from CS', res['Message'])
      return res
    totalList = res['Value']

    res = DiracAdmin().getSiteMask()
    if not res['OK']:
      return res

    sites = res['Value']
    bannedSites = []
    for site in totalList:
      if site not in sites:
        bannedSites.append(site)

    if printOutput:
      self.log.notice('\n========> Allowed Sites\n')
      self.log.notice('\n'.join(sites))
      self.log.notice('\n========> Banned Sites\n')
      self.log.notice('\n'.join(bannedSites))
      self.log.notice('\nThere is a total of %s allowed sites and %s banned sites in the system.' % (len(sites),
                                                                                                     len(bannedSites)))

    return S_OK({'AllowedSites': sites, 'BannedSites': bannedSites})

  #############################################################################
  def checkSEs(self, printOutput=False):  # pylint: disable=no-self-use
    """Check the status of read and write operations in the DIRAC SE mask.

       Example usage:

       >>> print dirac.checkSEs()
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    res = gConfig.getSections('/Resources/StorageElements', True)

    if not res['OK']:
      self.log.error('Failed to get storage element information', res['Message'])
      return res

    if printOutput:
      self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))

    seList = sorted(res['Value'])
    result = {}
    rss = ResourceStatus()
    for se in seList:
      res = rss.getElementStatus(se, 'StorageElement')
      if not res['OK']:
        self.log.error("Failed to get StorageElement status for %s" % se)
      else:
        readState = res['Value'].get('ReadAccess', 'Active')
        writeState = res['Value'].get('WriteAccess', 'Active')
        result[se] = {'ReadStatus': readState, 'WriteStatus': writeState}
        if printOutput:
          self.log.notice('%s %s %s' % (se.ljust(25), readState.rjust(15), writeState.rjust(15)))

    return S_OK(result)

  def splitInputDataBySize(self, lfns, maxSizePerJob=20, printOutput=False):
    """Split the supplied lfn list by the replicas present at the possible
       destination sites, based on a maximum size.
       An S_OK object will be returned containing a list of
       lists in order to create the jobs.

       Example usage:

       >>> d.splitInputDataBySize(lfns,10)
       {'OK': True, 'Value': [['<LFN>'], ['<LFN>']]}


       @param lfns: Logical File Name(s) to split
       @type lfns: list
       @param maxSizePerJob: Maximum size (in GB) per bunch
       @type maxSizePerJob: integer
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    sitesForSE = {}
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except TypeError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    if not isinstance(maxSizePerJob, int):
      try:
        maxSizePerJob = int(maxSizePerJob)
      except ValueError as x:
        return self._errorReport(str(x), 'Expected integer for maxSizePerJob')
    maxSizePerJob *= 1000 * 1000 * 1000

    replicaDict = self.getReplicas(lfns)
    if not replicaDict['OK']:
      return replicaDict
    replicas = replicaDict['Value']['Successful']
    if not replicas:
      return self._errorReport(replicaDict['Value']['Failed'].items()[0],
                               'Failed to get replica information')
    siteLfns = {}
    for lfn, reps in replicas.items():
      possibleSites = set(site
                          for se in reps
                          for site in sitesForSE.setdefault(se, getSitesForSE(se).get('Value', [])))
      siteLfns.setdefault(','.join(sorted(possibleSites)), []).append(lfn)

    if '' in siteLfns:
      # Some files don't have active replicas
      return self._errorReport('No active replica found for', str(siteLfns['']))
    # Get size of files
    metadataDict = self.getLfnMetadata(lfns, printOutput)
    if not metadataDict['OK']:
      return metadataDict
    fileSizes = dict((lfn, metadataDict['Value']['Successful'].get(lfn, {}).get('Size', maxSizePerJob))
                     for lfn in lfns)

    lfnGroups = []
    # maxSize is in GB
    for files in siteLfns.values():
      # Now get bunches of files,
      # Sort in decreasing size
      files.sort(cmp=(lambda f1, f2: fileSizes[f2] - fileSizes[f1]))
      while files:
        # print [( lfn, fileSizes[lfn] ) for lfn in files]
        group = []
        sizeTot = 0
        for lfn in list(files):
          size = fileSizes[lfn]
          if size >= maxSizePerJob:
            lfnGroups.append([lfn])
          elif sizeTot + size < maxSizePerJob:
            sizeTot += size
            group.append(lfn)
            files.remove(lfn)
        if group:
          lfnGroups.append(group)

    if printOutput:
      self.log.notice(self.pPrint.pformat(lfnGroups))
    return S_OK(lfnGroups)

    #############################################################################

  def getAccessURL(self, lfn, storageElement, protocol=None, printOutput=False):
    """Allows to retrieve an access URL for an LFN replica given a valid DIRAC SE
       name.  Contacts the file catalog and contacts the site SRM endpoint behind
       the scenes.

       Example Usage:

       >>> print dirac.getAccessURL('/lhcb/data/CCRC08/DST/00000151/0000/00000151_00004848_2.dst','CERN-RAW')
       {'OK': True, 'Value': {'Successful': {'srm://...': {'SRM2': 'rfio://...'}}, 'Failed': {}}}

       :param lfn: Logical File Name (LFN)
       :type lfn: str or python:list
       :param storageElement: DIRAC SE name e.g. CERN-RAW
       :type storageElement: string
       :param printOutput: Optional flag to print result
       :type printOutput: boolean
       :returns: S_OK,S_ERROR
    """
    ret = self._checkFileArgument(lfn, 'LFN')
    if not ret['OK']:
      return ret
    lfn = ret['Value']
    if isinstance(lfn, basestring):
      lfn = [lfn]
    results = getAccessURL(lfn, storageElement, protocol=protocol)
    if printOutput:
      printDMResult(results, empty="File not at SE", script="dirac-dms-lfn-accessURL")
    return results

  #############################################################################

  def _getLocalInputData(self, parameters):
    """ LHCb extension of DIRAC API's _getLocalInputData. Only used for handling ancestors.
    """
    inputData = parameters.get('InputData')
    if inputData:
      self.log.debug("DiracLHCb._getLocalInputData. InputData: %s" % inputData)
      if isinstance(inputData, basestring):
        inputData = inputData.split(';')
      inputData = [lfn.strip('LFN:') for lfn in inputData]
      ancestorsDepth = int(parameters.get('AncestorDepth', 0))
      if ancestorsDepth:
        self.log.debug("DiracLHCb._getLocalInputData. ancestorsDepth: %d" % ancestorsDepth)
        res = self._bkClient.getFileAncestors(inputData, ancestorsDepth)
        if not res['OK']:
          self.log.error("Can't get ancestors", res['Message'])
          return res
        ancestorsLFNs = []
        for ancestorsLFN in res['Value']['Successful'].itervalues():
          ancestorsLFNs += [i['FileName'] for i in ancestorsLFN]
        self.log.info("DiracLHCb._getLocalInputData: adding %d ancestors" % len(ancestorsLFNs))
        self.log.verbose("%s", ', '.join(ancestorsLFNs))
        inputData += ancestorsLFNs

    return S_OK(inputData)