Beispiel #1
0
  def __getFilesLocaly(self):
    """ Download the files.
    """
    numberofeventstoget = ceil(self.BXOverlay * self.ggtohadint)
    nbfiles = len(self.lfns)
    availableevents = nbfiles * self.nbofeventsperfile
    if availableevents < numberofeventstoget:
      return S_ERROR("Number of %s events available is less than requested" % ( self.BkgEvtType ))

    if not self.NbSigEvtsPerJob:
      ##Compute Nsignal events
      self.NbSigEvtsPerJob = self.nbinputsigfile * self.nbsigeventsperfile
    if not self.NbSigEvtsPerJob:
      return S_ERROR('Could not determine the number of signal events per job')
    LOG.verbose("There are %s signal event" % self.NbSigEvtsPerJob)
    ##Now determine how many files are needed to cover all signal events
    totnboffilestoget = int(ceil(self.NbSigEvtsPerJob * numberofeventstoget / self.nbofeventsperfile))

    ##Limit ourself to some configuration maximum
    levels = [self.machine, self.energytouse, self.detectormodel, self.BkgEvtType]
    maxNbFilesToGet = getOptionValue(ops=self.ops, basePath="/Overlay", optionName="MaxNbFilesToGet", defaultValue=20,
                                     levels=levels)

    if totnboffilestoget > maxNbFilesToGet:
      totnboffilestoget = maxNbFilesToGet

    self.__disableWatchDog()
    overlaymon = OverlaySystemClient()
    ##Now need to check that there are not that many concurrent jobs getting the overlay at the same time
    error_count = 0
    count = 0
    while 1:
      if error_count > 10 :
        LOG.error('OverlayDB returned too many errors')
        return S_ERROR('Failed to get number of concurrent overlay jobs')

      res = overlaymon.canRun(self.site)
      if not res['OK']:
        error_count += 1
        time.sleep(60)
        continue
      error_count = 0
      #if running < max_concurrent_running:
      if res['Value']:
        break
      else:
        count += 1
        if count > 300:
          return S_ERROR("Waited too long: 5h, so marking job as failed")
        if count % 10 == 0 :
          self.setApplicationStatus("Overlay standby number %s" % count)
        time.sleep(60)

    self.__enableWatchDog()

    self.setApplicationStatus('Getting overlay files')

    LOG.info('Will obtain %s files for overlay' % totnboffilestoget)

    os.mkdir("./overlayinput_" + self.metaEventType)
    os.chdir("./overlayinput_" + self.metaEventType)
    filesobtained = []
    usednumbers = []
    fail = False
    fail_count = 0

    max_fail_allowed = self.ops.getValue("/Overlay/MaxFailedAllowed", 20)
    while not len(filesobtained) == totnboffilestoget:
      if fail_count > max_fail_allowed:
        fail = True
        break

      fileindex = random.randrange(nbfiles)
      if fileindex in usednumbers:
        continue

      usednumbers.append(fileindex)

      triedDataManager = False

      if self.site == 'LCG.CERN.ch':
        res = self.getEOSFile(self.lfns[fileindex])
      elif self.site == 'LCG.IN2P3-CC.fr':
        res = self.getLyonFile(self.lfns[fileindex])
      elif self.site == 'LCG.UKI-LT2-IC-HEP.uk':
        res = self.getImperialFile(self.lfns[fileindex])
      elif self.site == 'LCG.RAL-LCG2.uk':
        res = self.getRALFile(self.lfns[fileindex])
      elif self.site == 'LCG.KEK.jp':
        res = self.getKEKFile(self.lfns[fileindex])
      else:
        self.__disableWatchDog()
        res = self.datMan.getFile(self.lfns[fileindex])
        triedDataManager = True

      # In case the specific copying did not work (mostly because the files do
      # not exist locally) try again to get the file via the DataManager
      if (not res['OK']) and (not triedDataManager):
        res = self.datMan.getFile(self.lfns[fileindex])

      if not res['OK']:
        LOG.warn('Could not obtain %s' % self.lfns[fileindex])
        fail_count += 1
        # Wait for a random time around 3 minutes
        LOG.verbose("Waste happily some CPU time (on average 3 minutes)")
        resWaste = wasteCPUCycles(60 * random.gauss(3, 0.1))
        if not resWaste['OK']:
          LOG.error("Could not waste as much CPU time as wanted, but whatever!")
        continue

      filesobtained.append(self.lfns[fileindex])

      # If no file could be obtained, need to make sure the job fails
      if len(usednumbers) == nbfiles and not filesobtained:
        fail = True
        break

    ## Remove all scripts remaining
    scripts = glob.glob("*.sh")
    for script in scripts:
      os.remove(script)
      
    ##Print the file list
    mylist = os.listdir(os.getcwd())
    LOG.info("List of Overlay files:")
    LOG.info("\n".join(mylist))
    os.chdir(self.curdir)
    res = overlaymon.jobDone(self.site)
    if not res['OK']:
      LOG.error("Could not declare the job as finished getting the files")
    if fail:
      LOG.error("Did not manage to get all files needed, too many errors")
      return S_ERROR("Failed to get files")
    LOG.info('Got all files needed.')
    return S_OK()