Esempio n. 1
0
    def testBkQuery(self, bkQuery, printOutput=False, bkClient=None):
        """ just pretty print of the result of a BK Query
    """

        if bkClient is None:
            bkClient = BookkeepingClient()

        res = bkClient.getFiles(bkQuery)
        if not res['OK']:
            return self._errorReport(res, 'Failed to perform BK query')
        gLogger.info('The supplied query returned %d files' %
                     len(res['Value']))
        if printOutput:
            self._prettyPrint(res)
        return S_OK(res['Value'])
Esempio n. 2
0
class BookkeepingWatchAgent(AgentModule, TransformationAgentsUtilities):
  """ LHCbDIRAC only agent. A threaded agent.
  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)
    TransformationAgentsUtilities.__init__(self)

    self.bkQueriesToBeChecked = Queue.Queue()
    self.bkQueriesInCheck = []

    self.fullUpdatePeriod = 86400
    self.bkUpdateLatency = 7200
    self.debug = False

    self.transInThread = {}

    self.pickleFile = 'BookkeepingWatchAgent.pkl'
    self.chunkSize = 1000

    self.pluginsWithNoRunInfo = ['LHCbStandard', 'ReplicateDataset', 'ArchiveDataset',
                                 'LHCbMCDSTBroadcastRandom', 'ReplicateToLocalSE',
                                 'RemoveReplicas', 'RemoveReplicasWhenProcessed',
                                 'RemoveReplicasWithAncestors', 'ReplicateWithAncestors',
                                 'ReduceReplicas', 'RemoveDatasetFromDisk',
                                 'DestroyDataset', 'DestroyDatasetWhenProcessed',
                                 'BySize', 'Standard']

    self.timeLog = {}
    self.fullTimeLog = {}
    self.bkQueries = {}

    self.transClient = None
    self.bkClient = None

  def initialize(self):
    """ Make the necessary initializations.
        The ThreadPool is created here, the _execute() method is what each thread will execute.
    """

    self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', self.fullUpdatePeriod)
    self.bkUpdateLatency = self.am_getOption('BKUpdateLatency', self.bkUpdateLatency)
    self.debug = self.am_getOption('verbose', self.debug)

    self.pickleFile = os.path.join(self.am_getWorkDirectory(), self.pickleFile)
    self.chunkSize = self.am_getOption('maxFilesPerChunk', self.chunkSize)

    self.pluginsWithNoRunInfo = Operations().getValue('TransformationPlugins/PluginsWithNoRunInfo',
                                                      self.pluginsWithNoRunInfo)

    self._logInfo('Full Update Period: %d seconds' % self.fullUpdatePeriod)
    self._logInfo('BK update latency : %d seconds' % self.bkUpdateLatency)
    self._logInfo('Plugins with no run info: %s' % ', '.join(self.pluginsWithNoRunInfo))

    self.transClient = TransformationClient()
    self.bkClient = BookkeepingClient()

    try:
      with open(self.pickleFile, 'r') as pf:
        self.timeLog = pickle.load(pf)
        self.fullTimeLog = pickle.load(pf)
        self.bkQueries = pickle.load(pf)
      self._logInfo("successfully loaded Log from", self.pickleFile, "initialize")
    except (EOFError, IOError):
      self._logInfo("failed loading Log from", self.pickleFile, "initialize")
      self.timeLog = {}
      self.fullTimeLog = {}
      self.bkQueries = {}

    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM)
    return S_OK()

  @gSynchro
  def __dumpLog(self):
    """ dump the log in the pickle file
    """
    if self.pickleFile:
      try:
        with open(self.pickleFile, 'w') as pf:
          pickle.dump(self.timeLog, pf)
          pickle.dump(self.fullTimeLog, pf)
          pickle.dump(self.bkQueries, pf)
        self._logVerbose("successfully dumped Log into %s" % self.pickleFile)
      except IOError as e:
        self._logError("fail to open %s: %s" % (self.pickleFile, e))
      except pickle.PickleError as e:
        self._logError("fail to dump %s: %s" % (self.pickleFile, e))
      except ValueError as e:
        self._logError("fail to close %s: %s" % (self.pickleFile, e))

  ################################################################################

  def execute(self):
    """ Main execution method. Just fills a list, and a queue, with BKKQueries ID.
    """

    gMonitor.addMark('Iteration', 1)
    # Get all the transformations
    result = self.transClient.getTransformations(condDict={'Status': ['Active', 'Idle']})
    if not result['OK']:
      self._logError("Failed to get transformations.", result['Message'])
      return S_OK()
    transIDsList = [long(transDict['TransformationID']) for transDict in result['Value']]
    res = self.transClient.getTransformationsWithBkQueries(transIDsList)
    if not res['OK']:
      self._logError("Failed to get transformations with Bk Queries.", res['Message'])
      return S_OK()
    transIDsWithBkQueriesList = res['Value']

    _count = 0
    # Process each transformation
    for transID in transIDsWithBkQueriesList:
      if transID in self.bkQueriesInCheck:
        continue
      self.bkQueriesInCheck.append(transID)
      self.bkQueriesToBeChecked.put(transID)
      _count += 1

    self._logInfo("Out of %d transformations, %d put in thread queue" % (len(result['Value']), _count))

    self.__dumpLog()
    return S_OK()

  def _execute(self, threadID):
    """ Real executor. This is what is executed by the single threads - so do not return here! Just continue
    """

    while True:  # not self.bkQueriesToBeChecked.empty():

      transID = None

      try:

        transID = self.bkQueriesToBeChecked.get()
        self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID))

        startTime = time.time()
        self._logInfo("Processing transformation %s." % transID, transID=transID)

        res = self.transClient.getTransformation(transID, extraParams=False)
        if not res['OK']:
          self._logError("Failed to get transformation", res['Message'], transID=transID)
          continue
        transPlugin = res['Value']['Plugin']

        res = self.transClient.getBookkeepingQuery(transID)
        if not res['OK']:
          self._logError("Failed to get BkQuery", res['Message'], transID=transID)
          continue
        bkQuery = res['Value']

        # Determine the correct time stamp to use for this transformation
        now = datetime.datetime.utcnow()
        self.__timeStampForTransformation(transID, bkQuery, now)

        try:
          files = self.__getFiles(transID, bkQuery, now)
        except RuntimeError as e:
          # In case we failed a full query, we should retry full query until successful
          if 'StartDate' not in bkQuery:
            self.bkQueries.pop(transID, None)
          self._logError("Failed to get response from the Bookkeeping: %s" % e, "", "__getFiles", transID)
          continue

        runDict = {}
        filesMetadata = {}
        # get the files metadata
        for lfnChunk in breakListIntoChunks(files, self.chunkSize):
          start = time.time()
          res = self.bkClient.getFileMetadata(lfnChunk)
          self._logVerbose("Got metadata from BK for %d files" % len(lfnChunk), transID=transID, reftime=start)
          if not res['OK']:
            self._logError("Failed to get BK metadata for %d files" % len(lfnChunk),
                           res['Message'], transID=transID)
            # No need to return as we only consider files that are successful...
          else:
            filesMetadata.update(res['Value']['Successful'])

        # There is no need to add the run information for a transformation that doesn't need it
        if transPlugin not in self.pluginsWithNoRunInfo:
          for lfn, metadata in filesMetadata.iteritems():
            runID = metadata.get('RunNumber', None)
            if isinstance(runID, (basestring, int, long)):
              runDict.setdefault(int(runID), []).append(lfn)
          try:
            self.__addRunsMetadata(transID, runDict.keys())
          except RuntimeError as e:
            self._logException("Failure adding runs metadata",
                               method="__addRunsMetadata",
                               lException=e,
                               transID=transID)
        else:
          runDict[None] = filesMetadata.keys()

        # Add all new files to the transformation
        for runID in sorted(runDict):
          lfnList = runDict[runID]
          # We enter all files of a run at once, otherwise do it by chunks
          lfnChunks = [lfnList] if runID else breakListIntoChunks(lfnList, self.chunkSize)
          for lfnChunk in lfnChunks:
            # Add the files to the transformation
            self._logVerbose('Adding %d lfns for transformation' % len(lfnChunk), transID=transID)
            result = self.transClient.addFilesToTransformation(transID, lfnChunk)
            if not result['OK']:
              self._logError("Failed to add %d lfns to transformation" % len(lfnChunk), result['Message'],
                             transID=transID)
              return result
            else:
              # Handle errors
              errors = {}
              for lfn, error in result['Value']['Failed'].iteritems():
                errors.setdefault(error, []).append(lfn)
              for error, lfns in errors.iteritems():
                self._logWarn("Failed to add files to transformation", error, transID=transID)
                self._logVerbose("\n\t".join([''] + lfns))
              # Add the metadata and RunNumber to the newly inserted files
              addedLfns = [lfn for (lfn, status) in result['Value']['Successful'].iteritems() if status == 'Added']
              if addedLfns:
                # Add files metadata: size and file type
                lfnDict = dict((lfn, {'Size': filesMetadata[lfn]['FileSize'],
                                      'FileType': filesMetadata[lfn]['FileType']})
                               for lfn in addedLfns)
                res = self.transClient.setParameterToTransformationFiles(transID, lfnDict)
                if not res['OK']:
                  self._logError("Failed to set transformation files metadata", res['Message'])
                  return res
                # Add run information if it exists
                if runID:
                  self._logInfo("Added %d files to transformation for run %d, now including run information"
                                % (len(addedLfns), runID), transID=transID)
                  self._logVerbose("Associating %d files to run %d" % (len(addedLfns), runID), transID=transID)
                  res = self.transClient.addTransformationRunFiles(transID, runID, addedLfns)
                  if not res['OK']:
                    self._logError("Failed to associate %d files to run %d" % (len(addedLfns), runID),
                                   res['Message'], transID=transID)
                    return res
                else:
                  self._logInfo("Added %d files to transformation" % len(addedLfns), transID=transID)

      except Exception as x:  # pylint: disable=broad-except
        self._logException('Exception while adding files to transformation',
                           lException=x,
                           method='_execute',
                           transID=transID)
      finally:
        self._logInfo("Processed transformation", transID=transID, reftime=startTime)
        if transID in self.bkQueriesInCheck:
          self.bkQueriesInCheck.remove(transID)
        self.transInThread.pop(transID, None)

    return S_OK()

  @gSynchro
  def __timeStampForTransformation(self, transID, bkQuery, now):
    """ Determine the correct time stamp to use for this transformation
    """

    fullTimeLog = self.fullTimeLog.setdefault(transID, now)
    bkQueryLog = self.bkQueries.setdefault(transID, {})

    bkQueryLog.pop('StartDate', None)
    self.bkQueries[transID] = bkQuery.copy()
    if transID in self.timeLog \
            and bkQueryLog == bkQuery \
            and (now - fullTimeLog) < datetime.timedelta(seconds=self.fullUpdatePeriod):
      # If it is more than a day since the last reduced query, make a full query just in case
      timeStamp = self.timeLog[transID]
      delta = datetime.timedelta(seconds=self.bkUpdateLatency)
      bkQuery['StartDate'] = (timeStamp - delta).strftime('%Y-%m-%d %H:%M:%S')
    if 'StartDate' not in bkQuery:
      self.fullTimeLog[transID] = now

  def __getFiles(self, transID, bkQuery, now):
    """ Perform the query to the Bookkeeping
    """
    self._logInfo("Using BK query for transformation: %s" % str(bkQuery), transID=transID)
    start = time.time()
    result = self.bkClient.getFiles(bkQuery)
    self._logVerbose("BK query time: %.2f seconds." % (time.time() - start), transID=transID)
    if not result['OK']:
      raise RuntimeError(result['Message'])
    else:
      self.__updateTimeStamp(transID, now)
      if result['Value']:
        self._logInfo("Obtained %d files from BK" % len(result['Value']), transID=transID)
      return result['Value']

  @gSynchro
  def __updateTimeStamp(self, transID, now):
    """
    Update time stamp for current transformation to now
    """
    self.timeLog[transID] = now

  def __addRunsMetadata(self, transID, runsList):
    """ Add the run metadata
    """
    runsInCache = self.transClient.getRunsInCache({'Name': ['TCK', 'CondDb', 'DDDB']})
    if not runsInCache['OK']:
      raise RuntimeError(runsInCache['Message'])
    newRuns = list(set(runsList) - set(runsInCache['Value']))
    if newRuns:
      self._logVerbose("Associating run metadata to %d runs" % len(newRuns), transID=transID)
      res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['TCK', 'CondDb', 'DDDB']})
      if not res['OK']:
        raise RuntimeError(res['Message'])
      else:
        for run, runMeta in res['Value'].iteritems():
          res = self.transClient.addRunsMetadata(run, runMeta)
          if not res['OK']:
            raise RuntimeError(res['Message'])
    # Add run duration to the metadata
    runsInCache = self.transClient.getRunsInCache({'Name': ['Duration']})
    if not runsInCache['OK']:
      raise RuntimeError(runsInCache['Message'])
    newRuns = list(set(runsList) - set(runsInCache['Value']))
    if newRuns:
      self._logVerbose("Associating run duration to %d runs" % len(newRuns), transID=transID)
      res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['JobStart', 'JobEnd']})
      if not res['OK']:
        raise RuntimeError(res['Message'])
      else:
        for run, runMeta in res['Value'].iteritems():
          duration = (runMeta['JobEnd'] - runMeta['JobStart']).seconds
          res = self.transClient.addRunsMetadata(run, {'Duration': duration})
          if not res['OK']:
            raise RuntimeError(res['Message'])

  def finalize(self):
    """ Gracious finalization
    """
    if self.bkQueriesInCheck:
      self._logInfo("Wait for queue to get empty before terminating the agent (%d tasks)" % len(self.transInThread))
      self.bkQueriesInCheck = []
      while self.transInThread:
        time.sleep(2)
      self.log.info("Threads are empty, terminating the agent...")
    return S_OK()
Esempio n. 3
0
class RequestTrackingAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.bkClient = None
        self.prodReq = None

    def initialize(self):
        """ Just initializing the clients
    """
        self.bkClient = BookkeepingClient()
        self.prodReq = RPCClient("ProductionManagement/ProductionRequest")

        return S_OK()

    def execute(self):
        """The RequestTrackingAgent execution method.
    """
        result = self.prodReq.getTrackedInput()
        update = []
        if result['OK']:
            gLogger.verbose(
                "Requests tracked: %s" %
                (','.join([str(req['RequestID']) for req in result['Value']])))
            for request in result['Value']:
                result = self.bkInputNumberOfEvents(request)
                if result['OK']:
                    update.append({
                        'RequestID': request['RequestID'],
                        'RealNumberOfEvents': result['Value']
                    })
                else:
                    gLogger.error(
                        'Input of %s is not updated: %s' %
                        (str(request['RequestID']), result['Message']))
        else:
            gLogger.error('Request service: %s' % result['Message'])
        if update:
            result = self.prodReq.updateTrackedInput(update)
            if not result['OK']:
                gLogger.error(result['Message'])

        return S_OK('Request Tracking information updated')

    def bkInputNumberOfEvents(self, request):
        """ Extremely dirty way...
    """
        dq = request.get('inDataQualityFlag', 'ALL')
        if dq != 'ALL':
            dq = [str(idq) for idq in dq.replace(' ', '').split(',')]
        try:
            condition = {
                'ProcessingPass':
                str(request.get('inProPass', '')).strip(),
                'FileType': [
                    str(ift) for ift in request.get('inFileType', '').replace(
                        ' ', '').split(',')
                ],
                'EventType':
                str(request.get('EventType', '')).replace(' ', ''),
                'ConfigName':
                str(request.get('configName', '')).replace(' ', ''),
                'ConfigVersion':
                str(request.get('configVersion', '')).replace(' ', ''),
                'DataQualityFlag':
                dq
            }
        except KeyError as ke:
            gLogger.error("%s is incomplete: %s" %
                          (request['RequestID'], repr(ke)))
            return S_ERROR(repr(ke))

        if 'condType' in request and request['condType'] == 'Run':
            condition['DataTakingConditions'] = str(request['SimCondition'])
        else:
            condition['SimulationConditions'] = str(request['SimCondition'])
        if str(request['inProductionID']) not in ('0', 'ALL'):
            condition['Production'] = [
                int(x) for x in str(request['inProductionID']).split(',')
            ]
        if 'inTCKs' in request and str(request['inTCKs']) != '':
            condition['TCK'] = [
                str(x) for x in str(request['inTCKs']).split(',')
            ]
        condition['NbOfEvents'] = True

        gLogger.verbose("Requesting: ", str(condition))
        result = self.bkClient.getFiles(condition)
        if not result['OK']:
            gLogger.error("Error requesting files from BK", result['Message'])
            return result
        if not result['Value'][0]:
            return S_OK(0)
        try:
            sum_nr = long(result['Value'][0])
        except ValueError as e:
            return S_ERROR("Can not convert result from BK call: %s" % str(e))
        return S_OK(sum_nr)
Esempio n. 4
0
class BKQuery():
    """
  It used to build a dictionary using a given Bookkeeping path
  which is used to query the Bookkeeping database.
  """
    def __init__(self,
                 bkQuery=None,
                 prods=None,
                 runs=None,
                 fileTypes=None,
                 visible=True,
                 eventTypes=None):
        prods = prods if prods is not None else []
        runs = runs if runs is not None else []
        fileTypes = fileTypes if fileTypes is not None else []

        self.extraBKitems = ("StartRun", "EndRun", "Production", "RunNumber")
        self.__bkClient = BookkeepingClient()
        bkPath = ''
        bkQueryDict = {}
        self.__bkFileTypes = set()
        self.__exceptFileTypes = set()
        self.__fakeAllDST = 'ZZZZZZZZALL.DST'
        self.__alreadyWarned = False
        if isinstance(bkQuery, BKQuery):
            bkQueryDict = bkQuery.getQueryDict().copy()
        elif isinstance(bkQuery, dict):
            bkQueryDict = bkQuery.copy()
        elif isinstance(bkQuery, basestring):
            bkPath = bkQuery
        bkQueryDict = self.buildBKQuery(bkPath=bkPath,
                                        bkQueryDict=bkQueryDict,
                                        prods=prods,
                                        runs=runs,
                                        fileTypes=fileTypes,
                                        eventTypes=eventTypes,
                                        visible=visible)
        self.__bkPath = bkPath
        self.__bkQueryDict = bkQueryDict
        if not bkQueryDict.get('Visible'):
            self.setVisible(visible)

    def __str__(self):
        return str(self.__bkQueryDict)

    def buildBKQuery(self,
                     bkPath='',
                     bkQueryDict=None,
                     prods=None,
                     runs=None,
                     fileTypes=None,
                     visible=True,
                     eventTypes=None):
        """ it builds a dictionary using a path
    """
        bkQueryDict = bkQueryDict if bkQueryDict is not None else {}
        prods = prods if prods is not None else []
        if not isinstance(prods, list):
            prods = [prods]
        runs = runs if runs is not None else []
        fileTypes = fileTypes if fileTypes is not None else []

        gLogger.verbose("BKQUERY.buildBKQuery: Path %s, Dict %s, \
      Prods %s, Runs %s, FileTypes %s, EventTypes %s, Visible %s" %
                        (bkPath, str(bkQueryDict), str(prods), str(runs),
                         str(fileTypes), str(eventTypes), visible))
        self.__bkQueryDict = {}
        if not bkPath and not prods and not bkQueryDict and not runs:
            return {}
        if bkQueryDict:
            bkQuery = bkQueryDict.copy()
        else:
            bkQuery = {}

        ###### Query given as a path /ConfigName/ConfigVersion/ConditionDescription/ProcessingPass/EventType/FileType ######
        # or if prefixed with evt: /ConfigName/ConfigVersion/EventType/ConditionDescription/ProcessingPass/FileType
        if bkPath:
            self.__getAllBKFileTypes()
            bkFields = ("ConfigName", "ConfigVersion", "ConditionDescription",
                        "ProcessingPass", "EventType", "FileType")
            url = bkPath.split(':', 1)
            if len(url) == 1:
                bkPath = url[0]
            else:
                if url[0] == 'evt':
                    bkFields = ("ConfigName", "ConfigVersion", "EventType",
                                "ConditionDescription", "ProcessingPass",
                                "FileType")
                elif url[0] == 'pp':
                    bkFields = ("ProcessingPass", "EventType", "FileType")
                elif url[0] == 'prod':
                    bkFields = ("Production", "ProcessingPass", "EventType",
                                "FileType")
                elif url[0] == 'runs':
                    bkFields = ("Runs", "ProcessingPass", "EventType",
                                "FileType")
                elif url[0] not in ('sim', 'daq', 'cond'):
                    gLogger.error('Invalid BK path:%s' % bkPath)
                    return self.__bkQueryDict
                bkPath = url[1]
            if bkPath[0] != '/':
                bkPath = '/' + bkPath
            if bkPath[0:2] == '//':
                bkPath = bkPath[1:]
            bkPath = bkPath.replace("RealData", "Real Data")
            i = 0
            processingPass = '******'
            defaultPP = False
            bk = bkPath.split('/')[1:] + len(bkFields) * ['']
            for bpath in bk:
                gLogger.verbose(
                    'buildBKQuery.1. Item #%d, Field %s, From Path %s, ProcessingPass %s'
                    % (i, bkFields[i], bpath, processingPass))
                if bkFields[i] == 'ProcessingPass':
                    if bpath != '' and bpath.upper() != 'ALL' and \
                        not bpath.split(',')[0].split(' ')[0].isdigit() and \
                            not bpath.upper() in self.__bkFileTypes:
                        processingPass = os.path.join(processingPass, bpath)
                        continue
                    # Set the PP
                    if processingPass != '/':
                        bkQuery['ProcessingPass'] = processingPass
                    else:
                        defaultPP = True
                    i += 1
                gLogger.verbose(
                    'buildBKQuery.2. Item #%d, Field %s, From Path %s, ProcessingPass %s'
                    % (i, bkFields[i], bpath, processingPass))
                if bkFields[i] == 'EventType' and bpath:
                    eventTypeList = []
                    # print b
                    if bpath.upper() == 'ALL':
                        bpath = 'ALL'
                    else:
                        for et in bpath.split(','):
                            try:
                                eventType = int(et.split(' ')[0])
                                eventTypeList.append(eventType)
                            except ValueError:
                                pass
                        if len(eventTypeList) == 1:
                            eventTypeList = eventTypeList[0]
                        bpath = eventTypeList
                        gLogger.verbose('buildBKQuery. Event types %s' %
                                        eventTypeList)
                # Set the BK dictionary item
                if bpath != '':
                    bkQuery[bkFields[i]] = bpath
                if defaultPP:
                    # PP was empty, try once more to get the Event Type
                    defaultPP = False
                else:
                    # Go to next item
                    i += 1
                if i == len(bkFields):
                    break

            gLogger.verbose('buildBKQuery. Query dict %s' % str(bkQuery))
            # Set default event type to real data
            if bkQuery.get('ConfigName') != 'MC' and not bkQuery.get(
                    'EventType'):
                bkQuery['EventType'] = '90000000'
            if bkQuery.get('EventType') == 'ALL':
                bkQuery.pop('EventType')

        # Run limits are given
        runs = bkQuery.pop('Runs', runs)
        if runs:
            try:
                bkQuery = parseRuns(bkQuery, runs)
            except BadRunRange:
                return self.__bkQueryDict

        ###### Query given as a list of production ######
        if prods and str(prods[0]).upper() != 'ALL':
            try:
                bkQuery.setdefault('Production',
                                   []).extend([int(prod) for prod in prods])
            except ValueError as ex:  # The prods list does not contains numbers
                gLogger.warn(ex)
                gLogger.error('Invalid production list', str(prods))
                return self.__bkQueryDict

        # If an event type is specified
        if eventTypes:
            bkQuery['EventType'] = eventTypes

        # Set the file type(s) taking into account excludes file types
        fileTypes = bkQuery.get('FileType', fileTypes)
        bkQuery.pop('FileType', None)
        self.__bkQueryDict = bkQuery.copy()
        fileType = self.__fileType(fileTypes)
        # print fileType
        if fileType:
            bkQuery['FileType'] = fileType

        # Remove all "ALL"'s in the dict, if any
        for i in self.__bkQueryDict:
            if isinstance(bkQuery[i], basestring) and bkQuery[i] == 'ALL':
                bkQuery.pop(i)

        # If there is only one production, make it faster with a single value rather than a list
        prodList = bkQuery.get('Production')
        if isinstance(prodList, list) and len(prodList) == 1:
            bkQuery['Production'] = prodList[0]
        self.__bkQueryDict = bkQuery.copy()
        self.setVisible(visible)

        # Set both event type entries
        # print "Before setEventType", self.__bkQueryDict
        if not self.setEventType(bkQuery.get('EventType')):
            self.__bkQueryDict = {}
            return self.__bkQueryDict
        # Set conditions
        # print "Before setConditions", self.__bkQueryDict
        self.setConditions(
            bkQuery.get(
                'ConditionDescription',
                bkQuery.get('DataTakingConditions',
                            bkQuery.get('SimulationConditions'))))
        # print "Returned value", self.__bkQueryDict
        return self.__bkQueryDict

    def setOption(self, key, val):
        """
    It insert an item to the dictionary. The key is an bookkeeping attribute (condition).
    """
        if val:
            self.__bkQueryDict[key] = val
        else:
            self.__bkQueryDict.pop(key, None)
        return self.__bkQueryDict

    def setConditions(self, cond=None):
        """ Set the dictionary items for a given condition, or remove it (cond=None) """
        if 'ConfigName' not in self.__bkQueryDict and cond:
            gLogger.warn(
                "Impossible to set Conditions to a BK Query without Configuration"
            )
            return self.__bkQueryDict
        # There are two items in the dictionary: ConditionDescription and Simulation/DataTaking-Conditions
        eventType = self.__bkQueryDict.get('EventType', 'ALL')
        if self.__bkQueryDict.get('ConfigName') == 'MC' or \
            (isinstance(eventType, basestring) and eventType.upper() != 'ALL' and
             eventType[0] != '9'):
            conditionsKey = 'SimulationConditions'
        else:
            conditionsKey = 'DataTakingConditions'
        self.setOption('ConditionDescription', cond)
        return self.setOption(conditionsKey, cond)

    def setFileType(self, fileTypes=None):
        """insert the file type to the Boookkeeping dictionary
    """
        return self.setOption('FileType', self.__fileType(fileTypes))

    def setDQFlag(self, dqFlag='OK'):
        """
    Sets the data quality.
    """
        if isinstance(dqFlag, basestring):
            dqFlag = dqFlag.upper()
        elif isinstance(dqFlag, list):
            dqFlag = [dq.upper() for dq in dqFlag]
        return self.setOption('DataQuality', dqFlag)

    def setStartDate(self, startDate):
        """
    Sets the start date.
    """
        return self.setOption('StartDate', startDate)

    def setEndDate(self, endDate):
        """
    Sets the end date
    """
        return self.setOption('EndDate', endDate)

    def setProcessingPass(self, processingPass):
        """
    Sets the processing pass
    """
        return self.setOption('ProcessingPass', processingPass)

    def setEventType(self, eventTypes=None):
        """
    Sets the event type
    """
        if eventTypes:
            if isinstance(eventTypes, basestring):
                eventTypes = eventTypes.split(',')
            elif not isinstance(eventTypes, list):
                eventTypes = [eventTypes]
            try:
                eventTypes = [str(int(et)) for et in eventTypes]
            except ValueError as ex:
                gLogger.warn(ex)
                gLogger.error('Invalid list of event types', eventTypes)
                return {}
            if isinstance(eventTypes, list) and len(eventTypes) == 1:
                eventTypes = eventTypes[0]
        return self.setOption('EventType', eventTypes)

    def setVisible(self, visible=None):
        """
    Sets the visibility flag
    """
        if visible is True or (isinstance(visible, basestring)
                               and visible[0].lower() == 'y'):
            visible = 'Yes'
        if visible is False:
            visible = 'No'
        return self.setOption('Visible', visible)

    def setExceptFileTypes(self, fileTypes):
        """
    Sets the expected file types
    """
        if not isinstance(fileTypes, list):
            fileTypes = [fileTypes]
        self.__exceptFileTypes.update(fileTypes)
        self.setFileType(
            [t for t in self.getFileTypeList() if t not in fileTypes])

    def getExceptFileTypes(self):
        return list(self.__exceptFileTypes)

    def getQueryDict(self):
        """
    Returns the bookkeeping dictionary
    """
        return self.__bkQueryDict

    def getPath(self):
        """
    Returns the Bookkeeping path
    """
        return self.__bkPath

    def makePath(self):
        """
    Builds a path from the dictionary
    """
        bk = self.__bkQueryDict
        fileType = bk.get('FileType', '')
        if isinstance(fileType, list):
            fileType = ','.join(fileType)
        path = os.path.join(
            '/', bk.get('ConfigName', ''), bk.get('ConfigVersion', ''),
            bk.get('ConditionDescription', '.'),
            bk.get('ProcessingPass', '.')[1:],
            str(bk.get('EventType', '.')).replace('90000000', '.'),
            fileType).replace('/./', '//')
        while True:
            if path.endswith('/'):
                path = path[:-1]
            else:
                return path

    def getFileTypeList(self):
        """
    Returns the file types
    """
        fileTypes = self.__bkQueryDict.get('FileType', [])
        if not isinstance(fileTypes, list):
            fileTypes = [fileTypes]
        return fileTypes

    def getEventTypeList(self):
        """
    Returns the event types
    """
        eventType = self.__bkQueryDict.get("EventType", [])
        if eventType:
            if not isinstance(eventType, list):
                eventType = [eventType]
        return eventType

    def getProcessingPass(self):
        """
    Returns the processing pass
    """
        return self.__bkQueryDict.get('ProcessingPass', '')

    def getConditions(self):
        """
    Returns the Simulation/data taking conditions
    """
        return self.__bkQueryDict.get('ConditionDescription', '')

    def getConfiguration(self):
        """
    Returns the configuration name and configuration version
    """
        configName = self.__bkQueryDict.get('ConfigName', '')
        configVersion = self.__bkQueryDict.get('ConfigVersion', '')
        if not configName or not configVersion:
            return ''
        return os.path.join('/', configName, configVersion)

    def isVisible(self):
        """
    Returns True/False depending on the visibility flag
    """
        return self.__bkQueryDict.get('Visible', 'All')

    def __fileType(self, fileType=None, returnList=False):
        """
    return the file types taking into account the expected file types
    """
        gLogger.verbose("BKQuery.__fileType: %s, fileType: %s" %
                        (self, fileType))
        if not fileType:
            return []
        self.__getAllBKFileTypes()
        if isinstance(fileType, list):
            fileTypes = fileType
        else:
            fileTypes = fileType.split(',')
        allRequested = None
        if fileTypes[0].lower() == "all":
            allRequested = True
            bkTypes = self.getBKFileTypes()
            gLogger.verbose('BKQuery.__fileType: bkTypes %s' % str(bkTypes))
            if bkTypes:
                fileTypes = list(set(bkTypes) - self.__exceptFileTypes)
            else:
                fileTypes = []
        expandedTypes = set()
        # print "Requested", fileTypes
        for fileType in fileTypes:
            if fileType.lower() == 'all.hist':
                allRequested = False
                expandedTypes.update([
                    t for t in self.__exceptFileTypes.union(self.__bkFileTypes)
                    if t.endswith('HIST')
                ])
            elif fileType.lower().find("all.") == 0:
                ext = '.' + fileType.split('.')[1]
                fileType = []
                if allRequested is None:
                    allRequested = True
                expandedTypes.update([
                    t for t in set(self.getBKFileTypes()) -
                    self.__exceptFileTypes if t.endswith(ext)
                ])
            else:
                expandedTypes.add(fileType)
        # Remove __exceptFileTypes only if not explicitly required
        # print "Obtained", fileTypes, expandedTypes
        gLogger.verbose(
            "BKQuery.__fileType: requested %s, expanded %s, except %s" %
            (allRequested, expandedTypes, self.__exceptFileTypes))
        if expandedTypes - self.__bkFileTypes and not self.__alreadyWarned:
            self.__alreadyWarned = True
            gLogger.always(
                "**** Take care: some requested file types do not exist!!",
                str(sorted(expandedTypes - self.__bkFileTypes)))
        if allRequested or not expandedTypes & self.__exceptFileTypes:
            expandedTypes -= self.__exceptFileTypes
        gLogger.verbose("BKQuery.__fileType: result %s" %
                        sorted(expandedTypes))
        if len(expandedTypes) == 1 and not returnList:
            return list(expandedTypes)[0]
        else:
            return list(expandedTypes)

    def __getAllBKFileTypes(self):
        """
    Returns the file types from the bookkeeping database
    """
        if not self.__bkFileTypes:
            self.__bkFileTypes = set([self.__fakeAllDST])
            warned = False
            while True:
                res = self.__bkClient.getAvailableFileTypes()
                if res['OK']:
                    dbresult = res['Value']
                    for record in dbresult['Records']:
                        if record[0].endswith('HIST') or \
                                record[0].endswith('ETC') or \
                                record[0] == 'LOG' or \
                                record[0].endswith('ROOT'):
                            self.__exceptFileTypes.add(record[0])
                        self.__bkFileTypes.add(record[0])
                    break
                if not warned:
                    gLogger.always('Error getting BK file types, retrying',
                                   res['Message'])
                    warned = True

    def __getBKFiles(self, bkQueryDict, retries=5):
        """
    Call BK getFiles() with some retries
    """
        if not retries:
            retries = sys.maxsize
        errorLogged = False
        while retries:
            res = self.__bkClient.getFiles(bkQueryDict)
            if res['OK']:
                break
            retries -= 1
            if not errorLogged:
                errorLogged = True
                gLogger.warn("Error getting files from BK, retrying...",
                             res['Message'])
        return res

    def getLFNsAndSize(self, getSize=True):
        """
    Returns the LFNs and their size for a given data set
    """
        self.__getAllBKFileTypes()
        res = self.__getBKFiles(self.__bkQueryDict)
        lfns = []
        lfnSize = 0
        if not res['OK']:
            gLogger.error("Error from BK for %s:" % self.__bkQueryDict,
                          res['Message'])
        else:
            lfns = set(res['Value'])
            exceptFiles = list(self.__exceptFileTypes)
            if exceptFiles and not self.__bkQueryDict.get('FileType'):
                res = self.__getBKFiles(
                    BKQuery(self.__bkQueryDict).setOption(
                        'FileType', exceptFiles))
                if res['OK']:
                    lfnsExcept = set(res['Value']) & lfns
                else:
                    gLogger.error(
                        "***** ERROR ***** Error in getting dataset from BK for %s files:"
                        % exceptFiles, res['Message'])
                    lfnsExcept = set()
                if lfnsExcept:
                    gLogger.warn(
                        "***** WARNING ***** Found %d files in BK query that will be \
          excluded (file type in %s)!" % (len(lfnsExcept), str(exceptFiles)))
                    gLogger.warn(
                        "                    If creating a transformation, set '--FileType ALL'"
                    )
                    lfns = lfns - lfnsExcept
                else:
                    exceptFiles = False
            if getSize:
                # Get size only if needed
                query = BKQuery(self.__bkQueryDict)
                query.setOption("FileSize", True)
                res = self.__getBKFiles(query.getQueryDict())
                if res['OK'] and isinstance(res['Value'],
                                            list) and res['Value'][0]:
                    lfnSize = res['Value'][0]
                if exceptFiles and not self.__bkQueryDict.get('FileType'):
                    res = self.__getBKFiles(
                        query.setOption('FileType', exceptFiles))
                    if res['OK'] and isinstance(res['Value'],
                                                list) and res['Value'][0]:
                        lfnSize -= res['Value'][0]

                lfnSize /= 1000000000000.
            else:
                lfnSize = 0.
        return {'LFNs': list(lfns), 'LFNSize': lfnSize}

    def getLFNSize(self, visible=None):
        """
    Returns the size of a  given data set
    """
        if visible is None:
            visible = self.isVisible()
        res = self.__getBKFiles(
            BKQuery(self.__bkQueryDict,
                    visible=visible).setOption('FileSize', True))
        if res['OK'] and isinstance(res['Value'], list) and res['Value'][0]:
            lfnSize = res['Value'][0]
        else:
            lfnSize = 0
        return lfnSize

    def getNumberOfLFNs(self, visible=None):
        """
    Returns the number of LFNs correspond to a given data set
    """
        if visible is None:
            visible = self.isVisible()
        if self.isVisible() != visible:
            query = BKQuery(self.__bkQueryDict, visible=visible)
        else:
            query = self
        fileTypes = query.getFileTypeList()
        nbFiles = 0
        size = 0
        for fileType in fileTypes:
            if fileType:
                res = self.__bkClient.getFilesSummary(
                    query.setFileType(fileType))
                # print query, res
                if res['OK']:
                    res = res['Value']
                    ind = res['ParameterNames'].index('NbofFiles')
                    if res['Records'][0][ind]:
                        nbFiles += res['Records'][0][ind]
                        ind1 = res['ParameterNames'].index('FileSize')
                        size += res['Records'][0][ind1]
                        # print 'Visible',query.isVisible(),fileType, 'Files:',
                        # res['Records'][0][ind], 'Size:', res['Records'][0][ind1]
        return {'NumberOfLFNs': nbFiles, 'LFNSize': size}

    def getLFNs(self, printSEUsage=False, printOutput=True, visible=None):
        """
    returns a list of lfns. It prints statistics about the data sets if it is requested.
    """
        if visible is None:
            visible = self.isVisible()

        if self.isVisible() != visible:
            query = BKQuery(self.__bkQueryDict, visible=visible)
        else:
            query = self

        # Loop for each production or each event type rather than make a single query
        loopItem = None
        prods = self.__bkQueryDict.get('Production')
        eventTypes = self.__bkQueryDict.get('EventType')
        if prods and isinstance(prods, list):
            loopItem = 'Production'
            loopList = prods
        elif eventTypes and isinstance(eventTypes, list):
            loopItem = 'EventType'
            loopList = eventTypes
        if loopItem:
            # It's faster to loop on a list of prods or event types than query the BK with a list as argument
            lfns = []
            lfnSize = 0
            if query == self:
                query = BKQuery(self.__bkQueryDict, visible=visible)
            for item in loopList:
                query.setOption(loopItem, item)
                lfnsAndSize = query.getLFNsAndSize(getSize=printOutput)
                lfns += lfnsAndSize['LFNs']
                lfnSize += lfnsAndSize['LFNSize']
        else:
            lfnsAndSize = query.getLFNsAndSize(getSize=printOutput)
            lfns = lfnsAndSize['LFNs']
            lfnSize = lfnsAndSize['LFNSize']

        if not lfns:
            gLogger.verbose("No files found for BK query %s" %
                            str(self.__bkQueryDict))
        else:
            lfns.sort()

            # Only for printing
            if printOutput:
                gLogger.notice("\n%d files (%.1f TB) in directories:" %
                               (len(lfns), lfnSize))
                dirs = {}
                for lfn in lfns:
                    directory = os.path.join(os.path.dirname(lfn), '')
                    dirs[directory] = dirs.setdefault(directory, 0) + 1
                for directory in sorted(dirs):
                    gLogger.notice("%s %s files" %
                                   (directory, dirs[directory]))
                if printSEUsage:
                    rpc = RPCClient('DataManagement/StorageUsage')
                    totalUsage = {}
                    totalSize = 0
                    for directory in dirs:
                        res = rpc.getStorageSummary(directory, '', '', [])
                        if res['OK']:
                            for se in [
                                    se for se in res['Value']
                                    if not se.endswith("-ARCHIVE")
                            ]:
                                totalUsage[se] = totalUsage.setdefault(
                                    se, 0) + res['Value'][se]['Size']
                                totalSize += res['Value'][se]['Size']
                    ses = sorted(totalUsage)
                    totalUsage['Total'] = totalSize
                    ses.append('Total')
                    gLogger.notice("\n%s %s" % ("SE".ljust(20), "Size (TB)"))
                    for se in ses:
                        gLogger.notice("%s %s" %
                                       (se.ljust(20),
                                        ('%.1f' %
                                         (totalUsage[se] / 1000000000000.))))
        return lfns

    def getDirs(self, printOutput=False, visible=None):
        """
    Returns the directories
    """
        if visible is None:
            visible = self.isVisible()
        lfns = self.getLFNs(printSEUsage=True,
                            printOutput=printOutput,
                            visible=visible)
        dirs = set()
        for lfn in lfns:
            dirs.add(os.path.dirname(lfn))
        return sorted(dirs)

    @staticmethod
    def __getProdStatus(prod):
        """
    Returns the status of a given transformation
    """
        res = TransformationClient().getTransformation(prod, extraParams=False)
        if not res['OK']:
            gLogger.error("Couldn't get information on production %d" % prod)
            return None
        return res['Value']['Status']

    def getBKRuns(self):
        """
    It returns a list of runs from the bookkeeping.
    """
        if self.getProcessingPass().replace('/', '') == 'Real Data':
            return self.getBKProductions()

    def getBKProductions(self, visible=None):
        """
    It returns a list of productions
    """
        if visible is None:
            visible = self.isVisible()
        prodList = self.__bkQueryDict.get('Production')
        if prodList:
            if not isinstance(prodList, list):
                prodList = [prodList]
            return sorted(prodList)
        if not self.getProcessingPass():
            gLogger.fatal(
                'Impossible to get a list of productions without the Processing Pass'
            )
            return []
        eventTypes = self.__bkQueryDict.get('EventType')
        if not isinstance(eventTypes, list):
            eventTypes = [eventTypes]
        fullList = set()
        for eventType in eventTypes:
            bkQ = BKQuery(self.__bkQueryDict)
            bkQ.setVisible(visible)
            bkDict = bkQ.setEventType(eventType)
            # gLogger.notice( 'Get productions for BK query', str( bkDict ) )
            res = self.__bkClient.getProductions(bkDict)
            if not res['OK']:
                gLogger.error('Error getting productions from BK',
                              res['Message'])
                return []
            if self.getProcessingPass().replace('/', '') != 'Real Data':
                fileTypes = self.getFileTypeList()
                prodList = set(prod for prods in res['Value']['Records']
                               for prod in prods
                               if self.__getProdStatus(prod) != 'Deleted')
                # print '\n', self.__bkQueryDict, res['Value']['Records'], '\nVisible:', visible, prodList
                pList = set()
                if fileTypes:
                    transClient = TransformationClient()
                    for prod in prodList:
                        res = transClient.getBookkeepingQuery(prod)
                        if res['OK'] and res['Value']['FileType'] in fileTypes:
                            pList.add(prod)
                if not pList:
                    pList = prodList
            else:
                runList = sorted(
                    [-run for r in res['Value']['Records'] for run in r])
                startRun = int(self.__bkQueryDict.get('StartRun', 0))
                endRun = int(self.__bkQueryDict.get('EndRun', sys.maxsize))
                pList = set(run for run in runList
                            if run >= startRun and run <= endRun)
            fullList.update(pList)
        return sorted(fullList)

    def getBKConditions(self):
        """
    It returns the data taking / simulation conditions
    """
        conditions = self.__bkQueryDict.get('ConditionDescription')
        if conditions:
            if not isinstance(conditions, list):
                conditions = [conditions]
            return conditions
        result = self.__bkClient.getConditions(self.__bkQueryDict)
        if result['OK']:
            resList = result['Value']
        else:
            return []
        conditions = []
        for res in resList:
            ind = res['ParameterNames'].index('Description')
            if res['Records']:
                conditions += [par[ind] for par in res['Records']]
                break
        return sorted(conditions)

    def getBKEventTypes(self):
        """
    It returns the event types
    """
        eventType = self.getEventTypeList()
        if eventType:
            return eventType
        res = self.__bkClient.getEventTypes(self.__bkQueryDict)['Value']
        ind = res['ParameterNames'].index('EventType')
        eventTypes = sorted([rec[ind] for rec in res['Records']])
        return eventTypes

    def getBKFileTypes(self, bkDict=None):
        """
    It returns the file types.
    """
        fileTypes = self.getFileTypeList()
        # print "Call getBKFileTypes:", self, fileTypes
        if not fileTypes:
            if not bkDict:
                bkDict = self.__bkQueryDict.copy()
            else:
                bkDict = bkDict.copy()
            bkDict.setdefault('Visible', 'All')
            bkDict.pop('RunNumber', None)
            fileTypes = []
            eventTypes = bkDict.get('EventType')
            if isinstance(eventTypes, list):
                for et in eventTypes:
                    bkDict['EventType'] = et
                    fileTypes += self.getBKFileTypes(bkDict)
            else:
                res = self.__bkClient.getFileTypes(bkDict)
                if res['OK']:
                    res = res['Value']
                    ind = res['ParameterNames'].index('FileTypes')
                    fileTypes = [
                        rec[ind] for rec in res['Records']
                        if rec[ind] not in self.__exceptFileTypes
                    ]
        if 'ALL.DST' in fileTypes:
            fileTypes.remove('ALL.DST')
            fileTypes.append(self.__fakeAllDST)
        # print 'FileTypes1', fileTypes
        fileTypes = self.__fileType(fileTypes, returnList=True)
        # print 'FileTypes2', fileTypes
        if self.__fakeAllDST in fileTypes:
            fileTypes.remove(self.__fakeAllDST)
            fileTypes.append('ALL.DST')
        # print 'FileTypes3', fileTypes
        return fileTypes

    def getBKProcessingPasses(self, queryDict=None, depth=None):
        """
    It returns the processing pass.
    """
        if depth is None:
            depth = sys.maxsize
        processingPasses = {}
        if not queryDict:
            queryDict = self.__bkQueryDict.copy()
        initialPP = queryDict.get('ProcessingPass', '/')
        # print "Start", initialPP, queryDict
        res = self.__bkClient.getProcessingPass(queryDict, initialPP)
        if not res['OK']:
            if 'Empty Directory' not in res['Message']:
                gLogger.error(
                    "ERROR getting processing passes for %s" % queryDict,
                    res['Message'])
            return {}
        ppRecords = res['Value'][0]
        if 'Name' in ppRecords['ParameterNames']:
            ind = ppRecords['ParameterNames'].index('Name')
            passes = sorted([
                os.path.join(initialPP, rec[ind])
                for rec in ppRecords['Records']
            ])
        else:
            passes = []
        evtRecords = res['Value'][1]
        if 'EventType' in evtRecords['ParameterNames']:
            ind = evtRecords['ParameterNames'].index('EventType')
            eventTypes = [str(rec[ind]) for rec in evtRecords['Records']]
        else:
            eventTypes = []

        if passes and depth:
            depth -= 1
            nextProcessingPasses = {}
            for pName in passes:
                processingPasses[pName] = []
                queryDict['ProcessingPass'] = pName
                nextProcessingPasses.update(
                    self.getBKProcessingPasses(queryDict, depth=depth))
            processingPasses.update(nextProcessingPasses)
        if eventTypes:
            processingPasses[initialPP] = eventTypes
        for pName in ('/Real Data', '/'):
            if pName in processingPasses:
                processingPasses.pop(pName)
        # print "End", initialPP, [( key, processingPasses[key] ) for key in sorted( processingPasses.keys() )]
        return processingPasses