Example #1
0
    def getReplicasPresence(self, lfns):
        """get the replicas using the standard FileCatalog.getReplicas()"""
        present = set()
        notPresent = set()

        chunkSize = 100
        printProgress = len(lfns) > chunkSize
        startTime = time.time()
        self.__write(
            "Checking replicas for %d files%s" %
            (len(lfns),
             (" (chunks of %d)" % chunkSize) if printProgress else "... "))
        for chunk in breakListIntoChunks(lfns, chunkSize):
            if printProgress:
                self.__write(".")
            for _ in range(1, 10):
                res = self.fileCatalog.getReplicas(chunk)
                if res["OK"]:
                    present.update(res["Value"]["Successful"])
                    self.cachedReplicas.update(res["Value"]["Successful"])
                    notPresent.update(res["Value"]["Failed"])
                    break
                else:
                    time.sleep(0.1)
        self.__write(" (%.1f seconds)\n" % (time.time() - startTime))

        if notPresent:
            self.__logVerbose("Files without replicas:",
                              "\n".join([""] + sorted(notPresent)))
        return list(present), list(notPresent)
Example #2
0
  def cleanOutputs( self, jobInfo ):
    """remove all job outputs"""
    if len(jobInfo.outputFiles) == 0:
      return
    descendants = self.__findAllDescendants( jobInfo.outputFiles )
    existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status=="Exists" ]
    filesToDelete = existingOutputFiles + descendants

    if not filesToDelete:
      return

    if not self.enabled:
      self.log.notice( "Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete) )
      return
    self.log.notice( "Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete) )

    errorReasons = defaultdict(list)
    successfullyRemoved = 0

    for lfnList in breakListIntoChunks(filesToDelete, 200):
      with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult:
        if not proxyResult['OK']:
          raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message'])
        result = DataManager().removeFile(lfnList)
        if not result['OK']:
          self.log.error("Failed to remove LFNs", result['Message'])
          raise RuntimeError("Failed to remove LFNs: %s" % result['Message'])
        for lfn, err in result['Value']['Failed'].items():
          reason = str(err)
          errorReasons[reason].append(lfn)
        successfullyRemoved += len(result['Value']['Successful'].keys())
    for reason, lfns in errorReasons.items():
      self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason))
    self.log.notice("Successfully removed %d files" % successfullyRemoved)
Example #3
0
    def _getJobStatusOnHost(self, jobIDList, host=None):
        """Get the status information for the given list of jobs"""

        resultDict = {}
        jobDict = {}
        for job in jobIDList:
            stamp = os.path.basename(urlparse(job).path)
            jobDict[stamp] = job
        stampList = list(jobDict)

        for jobList in breakListIntoChunks(stampList, 100):
            resultCommand = self.__executeHostCommand("getJobStatus",
                                                      {"JobIDList": jobList},
                                                      host=host)
            if not resultCommand["OK"]:
                return resultCommand

            result = resultCommand["Value"]
            if result["Status"] != 0:
                return S_ERROR("Failed to get job status: %s" %
                               result["Message"])

            for stamp in result["Jobs"]:
                resultDict[jobDict[stamp]] = result["Jobs"][stamp]

        return S_OK(resultDict)
Example #4
0
    def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):

        log = gLogger.getSubLogger("_prepareNewJobs", child=True)

        filesToSubmit = self._getFilesToSubmit(
            maxAttemptsPerFile=maxAttemptsPerFile)
        log.debug("%s ftsFiles to submit" % len(filesToSubmit))

        newJobs = []

        # {targetSE : [FTS3Files] }
        filesGroupedByTarget = FTS3Utilities.groupFilesByTarget(filesToSubmit)

        for targetSE, ftsFiles in filesGroupedByTarget.iteritems():

            res = self._checkSEAccess(targetSE, 'ReadAccess', vo=self.vo)
            if not res['OK']:
                log.error(res)
                continue

            for ftsFilesChunk in breakListIntoChunks(ftsFiles, maxFilesPerJob):

                newJob = self._createNewJob('Staging',
                                            ftsFilesChunk,
                                            targetSE,
                                            sourceSE=targetSE)
                newJobs.append(newJob)

        return S_OK(newJobs)
Example #5
0
  def killJob(self, jobIDList):
    """ Kill the specified jobs
    """

    result = self._prepareProxy()
    if not result['OK']:
      self.log.error('ARCComputingElement: failed to set up proxy', result['Message'])
      return result
    self.usercfg.ProxyPath(os.environ['X509_USER_PROXY'])

    jobList = list(jobIDList)
    if isinstance(jobIDList, six.string_types):
      jobList = [jobIDList]

    self.log.debug("Killing jobs %s" % jobIDList)
    jobs = []
    for jobID in jobList:
      jobs.append(self.__getARCJob(jobID))

    # JobSupervisor is able to aggregate jobs to perform bulk operations and thus minimizes the communication overhead
    # We still need to create chunks to avoid timeout in the case there are too many jobs to supervise
    for chunk in breakListIntoChunks(jobs, 100):
      job_supervisor = arc.JobSupervisor(self.usercfg, chunk)
      if not job_supervisor.Cancel():
        errorString = ' - '.join(jobList).strip()
        return S_ERROR('Failed to kill at least one of these jobs: %s. CE(?) not reachable?' % errorString)

    return S_OK()
Example #6
0
    def getReplicas(self, lfns, allStatus=False):
        """Returns replicas for an LFN or list of LFNs"""
        result = {"OK": True, "Value": {"Successful": {}, "Failed": {}}}
        lfnChunks = breakListIntoChunks(lfns, 1000)

        for lfnList in lfnChunks:
            try:
                didList = [
                    self.__getDidsFromLfn(lfn) for lfn in lfnList if lfn
                ]
                for rep in self.client.list_replicas(didList):
                    if rep:
                        lfn = rep["name"]
                        if self.convertUnicode:
                            lfn = str(lfn)
                        if lfn not in result["Value"]["Successful"]:
                            result["Value"]["Successful"][lfn] = {}
                        for rse in rep["rses"]:
                            if self.convertUnicode:
                                result["Value"]["Successful"][lfn][str(
                                    rse)] = str(rep["rses"][rse][0])
                            else:
                                result["Value"]["Successful"][lfn][rse] = rep[
                                    "rses"][rse][0]
                    else:
                        for did in didList:
                            result["Value"]["Failed"][did["name"]] = "Error"
            except Exception as err:
                return S_ERROR(str(err))
        return result
Example #7
0
  def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):

    log = gLogger.getSubLogger("_prepareNewJobs", child=True)

    filesToSubmit = self._getFilesToSubmit(maxAttemptsPerFile=maxAttemptsPerFile)
    log.debug("%s ftsFiles to submit" % len(filesToSubmit))

    newJobs = []

    # {targetSE : [FTS3Files] }
    filesGroupedByTarget = FTS3Utilities.groupFilesByTarget(filesToSubmit)

    for targetSE, ftsFiles in filesGroupedByTarget.iteritems():

      res = self._checkSEAccess(targetSE, 'ReadAccess', vo=self.vo)
      if not res['OK']:
        log.error(res)
        continue

      for ftsFilesChunk in breakListIntoChunks(ftsFiles, maxFilesPerJob):

        newJob = self._createNewJob('Staging', ftsFilesChunk, targetSE, sourceSE=targetSE)
        newJobs.append(newJob)

    return S_OK(newJobs)
  def getReplicasPresence(self, lfns):
    """ get the replicas using the standard FileCatalog.getReplicas()
    """
    present = set()
    notPresent = set()

    chunkSize = 100
    printProgress = (len(lfns) > chunkSize)
    startTime = time.time()
    self.__write("Checking replicas for %d files%s" %
                 (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... '))
    for chunk in breakListIntoChunks(lfns, chunkSize):
      if printProgress:
        self.__write('.')
      for _ in xrange(1, 10):
        res = self.fileCatalog.getReplicas(chunk)
        if res['OK']:
          present.update(res['Value']['Successful'])
          self.cachedReplicas.update(res['Value']['Successful'])
          notPresent.update(res['Value']['Failed'])
          break
        else:
          time.sleep(0.1)
    self.__write(' (%.1f seconds)\n' % (time.time() - startTime))

    if notPresent:
      self.__logVerbose("Files without replicas:",
                        '\n'.join([''] + sorted(notPresent)))
    return list(present), list(notPresent)
Example #9
0
def __getGlue2ExecutionEnvironmentInfo(host, executionEnvironments):
    """Find all the executionEnvironments.

  :param str host: BDII host to query
  :param list executionEnvironments: list of the execution environments to get some information from
  :returns: result of the ldapsearch for all executionEnvironments, Glue2 schema
  """
    listOfValues = []
    # break up to avoid argument list too long, it started failing at about 1900 entries
    for exeEnvs in breakListIntoChunks(executionEnvironments, 1000):
        exeFilter = ''
        for execEnv in exeEnvs:
            exeFilter += '(GLUE2ResourceID=%s)' % execEnv
        filt = "(&(objectClass=GLUE2ExecutionEnvironment)(|%s))" % exeFilter
        response = __ldapsearchBDII(filt=filt,
                                    attr=None,
                                    host=host,
                                    base="o=glue",
                                    selectionString="GLUE2")
        if not response['OK']:
            return response
        if not response['Value']:
            sLog.error("No information found for %s" % executionEnvironments)
            continue
        listOfValues += response['Value']
    if not listOfValues:
        return S_ERROR("No information found for executionEnvironments")
    return S_OK(listOfValues)
Example #10
0
    def getReplicasPresence(self, lfns):
        """ get the replicas using the standard FileCatalog.getReplicas()
    """
        present = set()
        notPresent = set()

        chunkSize = 100
        printProgress = (len(lfns) > chunkSize)
        startTime = time.time()
        self.__write(
            "Checking replicas for %d files%s" %
            (len(lfns),
             (' (chunks of %d)' % chunkSize) if printProgress else '... '))
        for chunk in breakListIntoChunks(lfns, chunkSize):
            if printProgress:
                self.__write('.')
            for _ in xrange(1, 10):
                res = self.fc.getReplicas(chunk)
                if res['OK']:
                    present.update(res['Value']['Successful'])
                    self.cachedReplicas.update(res['Value']['Successful'])
                    notPresent.update(res['Value']['Failed'])
                    break
                else:
                    time.sleep(0.1)
        self.__write(' (%.1f seconds)\n' % (time.time() - startTime))

        if notPresent:
            self.__logVerbose("Files without replicas:",
                              '\n'.join([''] + sorted(notPresent)))
        return list(present), list(notPresent)
Example #11
0
    def _getJobStatusOnHost(self, jobIDList, host=None):
        """ Get the status information for the given list of jobs
    """

        resultDict = {}
        jobDict = {}
        for job in jobIDList:
            stamp = os.path.basename(urlparse(job).path)
            jobDict[stamp] = job
        stampList = jobDict.keys()

        for jobList in breakListIntoChunks(stampList, 100):
            resultCommand = self.__executeHostCommand('getJobStatus',
                                                      {'JobIDList': jobList},
                                                      host=host)
            if not resultCommand['OK']:
                return resultCommand

            result = resultCommand['Value']
            if result['Status'] != 0:
                return S_ERROR('Failed to get job status: %s' %
                               result['Message'])

            for stamp in result['Jobs']:
                resultDict[jobDict[stamp]] = result['Jobs'][stamp]

        return S_OK(resultDict)
Example #12
0
    def _splitByData(self):
        """a job is submitted per input data.

    :return: parameter name and parameter values for setParameterSequence()
    :rtype: tuple of (str, list, bool/str)

    """

        # reset split attribute to avoid infinite loop
        self.splittingOption = None

        self.log.info("Job splitting: Splitting 'byData' method...")

        # Ensure that data have been specified by setInputData() method
        if not self._data:
            errorMessage = "Job splitting: missing input data"
            self.log.error(errorMessage)
            return False

        if self.numberOfFilesPerJob > len(self._data):
            errorMessage = "Job splitting: 'numberOfFilesPerJob' must be less/equal than the number of input data"
            self.log.error(errorMessage)
            return False

        self._data = breakListIntoChunks(self._data, self.numberOfFilesPerJob)

        self.log.info("Job splitting: submission consists of %d job(s)" %
                      len(self._data))

        return ["InputData", self._data, 'ParametricInputData']
Example #13
0
  def _Broadcast( self ):
    """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.
    """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." )

    targetseParam = self.params['TargetSE']
    targetSEs = []
    sourceSEs = eval( self.params['SourceSE'] )
    if targetseParam.count( '[' ):
      targetSEs = eval( targetseParam )
    elif isinstance( targetseParam, list ):
      targetSEs = targetseParam
    else:
      targetSEs = [targetseParam]
    # sourceSEs = eval(self.params['SourceSE'])
    # targetSEs = eval(self.params['TargetSE'])
    destinations = int( self.params.get( 'Destinations', 0 ) )
    if destinations and ( destinations >= len( targetSEs ) ):
      destinations = 0

    status = self.params['Status']
    groupSize = self.params['GroupSize']  # Number of files per tasks

    fileGroups = getFileGroups( self.data )  # groups by SE
    targetSELfns = {}
    for replicaSE, lfns in fileGroups.items():
      ses = replicaSE.split( ',' )
      # sourceSites = self._getSitesForSEs(ses)
      atSource = False
      for se in ses:
        if se in sourceSEs:
          atSource = True
      if not atSource:
        continue

      for lfn in lfns:
        targets = []
        sources = self._getSitesForSEs( ses )
        random.shuffle( targetSEs )
        for targetSE in targetSEs:
          site = self._getSiteForSE( targetSE )['Value']
          if not site in sources:
            if ( destinations ) and ( len( targets ) >= destinations ):
              continue
            sources.append( site )
          targets.append( targetSE )  # after all, if someone wants to copy to the source, it's his choice
        strTargetSEs = str.join( ',', sorted( targets ) )
        if not targetSELfns.has_key( strTargetSEs ):
          targetSELfns[strTargetSEs] = []
        targetSELfns[strTargetSEs].append( lfn )
    tasks = []
    for ses, lfns in targetSELfns.items():
      tasksLfns = breakListIntoChunks( lfns, groupSize )
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          # do not allow groups smaller than the groupSize, except if transformation is in flush state
          tasks.append( ( ses, taskLfns ) )
    return S_OK( tasks )
Example #14
0
  def _Broadcast( self ):
    """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.
    """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." )

    targetseParam = self.params['TargetSE']
    targetSEs = []
    sourceSEs = eval( self.params['SourceSE'] )
    if targetseParam.count( '[' ):
      targetSEs = eval( targetseParam )
    elif isinstance( targetseParam, list ):
      targetSEs = targetseParam
    else:
      targetSEs = [targetseParam]
    # sourceSEs = eval(self.params['SourceSE'])
    # targetSEs = eval(self.params['TargetSE'])
    destinations = int( self.params.get( 'Destinations', 0 ) )
    if destinations and ( destinations >= len( targetSEs ) ):
      destinations = 0

    status = self.params['Status']
    groupSize = self.params['GroupSize']  # Number of files per tasks

    fileGroups = getFileGroups( self.data )  # groups by SE
    targetSELfns = {}
    for replicaSE, lfns in fileGroups.items():
      ses = replicaSE.split( ',' )
      # sourceSites = self._getSitesForSEs(ses)
      atSource = False
      for se in ses:
        if se in sourceSEs:
          atSource = True
      if not atSource:
        continue

      for lfn in lfns:
        targets = []
        sources = self._getSitesForSEs( ses )
        random.shuffle( targetSEs )
        for targetSE in targetSEs:
          site = self._getSiteForSE( targetSE )['Value']
          if not site in sources:
            if ( destinations ) and ( len( targets ) >= destinations ):
              continue
            sources.append( site )
          targets.append( targetSE )  # after all, if someone wants to copy to the source, it's his choice
        strTargetSEs = str.join( ',', sorted( targets ) )
        if not targetSELfns.has_key( strTargetSEs ):
          targetSELfns[strTargetSEs] = []
        targetSELfns[strTargetSEs].append( lfn )
    tasks = []
    for ses, lfns in targetSELfns.items():
      tasksLfns = breakListIntoChunks( lfns, groupSize )
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          # do not allow groups smaller than the groupSize, except if transformation is in flush state
          tasks.append( ( ses, taskLfns ) )
    return S_OK( tasks )
Example #15
0
  def groupByReplicas( self, files, status ):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not len( files ):
      return S_OK( tasks )

    files = dict( files )

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam( 'GroupSize', 10 )
    flush = ( status == 'Flush' )
    self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % ( len( files ), self.groupSize, flush ) )

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in ( True, False ):
      if not files:
        break
      seFiles = getFileGroups( files, groupSE = groupSE )
      self.logDebug( "fileGroups set: ", seFiles )

      for replicaSE in sortSEs( seFiles ):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks( lfns, self.groupSize )
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if ( flush and not groupSE ) or ( len( taskLfns ) >= self.groupSize ):
              tasks.append( ( replicaSE, taskLfns ) )
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop( lfn )
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks" % ( len( tasks ) - nTasks,
                                                                                                            str( groupSE ),
                                                                                                            len( files ) ) )
      nTasks = len( tasks )

    return S_OK( tasks )
Example #16
0
  def groupByReplicas( self, files, status ):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not len( files ):
      return S_OK( tasks )

    files = dict( files )

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam( 'GroupSize', 10 )
    flush = ( status == 'Flush' )
    self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % ( len( files ), self.groupSize, flush ) )

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in ( True, False ):
      if not files:
        break
      seFiles = getFileGroups( files, groupSE = groupSE )
      self.logDebug( "fileGroups set: ", seFiles )

      for replicaSE in sortSEs( seFiles ):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks( lfns, self.groupSize )
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if ( flush and not groupSE ) or ( len( taskLfns ) >= self.groupSize ):
              tasks.append( ( replicaSE, taskLfns ) )
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop( lfn )
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks" % ( len( tasks ) - nTasks,
                                                                                                            str( groupSE ),
                                                                                                            len( files ) ) )
      nTasks = len( tasks )

    return S_OK( tasks )
Example #17
0
    def _Broadcast(self):
        """This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.

        Parameters used by this plugin:

        * SourceSE: Optional: only files at this location are treated
        * TargetSE: Where to broadcast files to
        * Destinations: Optional: integer, files are only broadcast to this number of TargetSEs, Destinations has to be
          larger than the number of TargetSEs
        * GroupSize: number of files per task
        """
        if not self.params:
            return S_ERROR(
                "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters."
            )

        sourceSEs = set(
            self.util.seParamtoList(self.params.get("SourceSE", [])))
        targetSEs = self.util.seParamtoList(self.params["TargetSE"])
        destinations = int(self.params.get("Destinations", 0))
        if destinations and (destinations >= len(targetSEs)):
            destinations = 0

        status = self.params["Status"]
        groupSize = self.params["GroupSize"]  # Number of files per tasks

        fileGroups = getFileGroups(self.data)  # groups by SE
        targetSELfns = {}
        for replicaSE, lfns in fileGroups.items():
            ses = replicaSE.split(",")
            atSource = (not sourceSEs) or set(ses).intersection(sourceSEs)
            if not atSource:
                continue

            for lfn in lfns:
                targets = []
                sourceSites = self._getSitesForSEs(ses)
                random.shuffle(targetSEs)
                for targetSE in targetSEs:
                    site = self._getSiteForSE(targetSE)["Value"]
                    if site not in sourceSites:
                        if (destinations) and (len(targets) >= destinations):
                            continue
                        sourceSites.append(site)
                    targets.append(
                        targetSE
                    )  # after all, if someone wants to copy to the source, it's his choice
                strTargetSEs = ",".join(sorted(targets))
                targetSELfns.setdefault(strTargetSEs, []).append(lfn)
        tasks = []
        for ses, lfns in targetSELfns.items():
            tasksLfns = breakListIntoChunks(lfns, groupSize)
            for taskLfns in tasksLfns:
                if (status == "Flush") or (len(taskLfns) >= int(groupSize)):
                    # do not allow groups smaller than the groupSize, except if transformation is in flush state
                    tasks.append((ses, taskLfns))
        return S_OK(tasks)
Example #18
0
    def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):

        log = self._log.getSubLogger("_prepareNewJobs", child=True)

        filesToSubmit = self._getFilesToSubmit(
            maxAttemptsPerFile=maxAttemptsPerFile)
        log.debug("%s ftsFiles to submit" % len(filesToSubmit))

        newJobs = []

        # {targetSE : [FTS3Files] }
        res = FTS3Utilities.groupFilesByTarget(filesToSubmit)
        if not res['OK']:
            return res
        filesGroupedByTarget = res['Value']

        for targetSE, ftsFiles in filesGroupedByTarget.iteritems():

            res = self._checkSEAccess(targetSE, 'WriteAccess', vo=self.vo)

            if not res['OK']:
                # If the SE is currently banned, we just skip it
                if cmpError(res, errno.EACCES):
                    log.info(
                        "Write access currently not permitted to %s, skipping."
                        % targetSE)
                else:
                    log.error(res)
                    for ftsFile in ftsFiles:
                        ftsFile.attempt += 1
                continue

            sourceSEs = self.sourceSEs.split(
                ',') if self.sourceSEs is not None else []
            # { sourceSE : [FTSFiles] }
            res = FTS3Utilities.selectUniqueRandomSource(
                ftsFiles, allowedSources=sourceSEs)

            if not res['OK']:
                return res

            uniqueTransfersBySource = res['Value']

            # We don't need to check the source, since it is already filtered by the DataManager
            for sourceSE, ftsFiles in uniqueTransfersBySource.iteritems():

                for ftsFilesChunk in breakListIntoChunks(
                        ftsFiles, maxFilesPerJob):

                    newJob = self._createNewJob('Transfer',
                                                ftsFilesChunk,
                                                targetSE,
                                                sourceSE=sourceSE)

                    newJobs.append(newJob)

        return S_OK(newJobs)
Example #19
0
    def _getFileReplicas(self,
                         fileIDs,
                         fields_input=None,
                         allStatus=False,
                         connection=False):
        """Get replicas for the given list of files specified by their fileIDs
        :param fileIDs : list of file ids
        :param fields_input : metadata of the Replicas we are interested in (default to PFN)
        :param allStatus : if True, all the Replica statuses will be considered,
                           otherwise, only the db.visibleReplicaStatus

        :returns S_OK with a dict { fileID : { SE name : dict of metadata } }
        """

        if fields_input is None:
            fields_input = ["PFN"]

        fields = list(fields_input)

        # always add Status in the list of required fields
        if "Status" not in fields:
            fields.append("Status")

        # We initialize the dictionary with empty dict
        # as default value, because this is what we want for
        # non existing replicas
        replicas = {fileID: {} for fileID in fileIDs}

        # Format the status to be used in a IN clause in the stored procedure
        fStatus = stringListToString(self.db.visibleReplicaStatus)

        fieldNames = [
            "FileID", "SE", "Status", "RepType", "CreationDate",
            "ModificationDate", "PFN"
        ]

        for chunks in breakListIntoChunks(fileIDs, 1000):
            # Format the FileIDs to be used in a IN clause in the stored procedure
            formatedFileIds = intListToString(chunks)
            result = self.db.executeStoredProcedureWithCursor(
                "ps_get_all_info_of_replicas_bulk",
                (formatedFileIds, allStatus, fStatus))

            if not result["OK"]:
                return result

            rows = result["Value"]

            for row in rows:
                rowDict = dict(zip(fieldNames, row))
                se = rowDict["SE"]
                fileID = rowDict["FileID"]
                replicas[fileID][se] = dict(
                    (key, rowDict.get(key, "Unknown metadata field"))
                    for key in fields)

        return S_OK(replicas)
Example #20
0
 def getFileMetadata(self, lfns, ownership=False):
     """Returns the file metadata associated to a supplied LFN"""
     successful, failed = {}, {}
     lfnChunks = breakListIntoChunks(lfns, 1000)
     listFiles = deepcopy(list(lfns))
     for chunk in lfnChunks:
         try:
             dids = [self.__getDidsFromLfn(lfn) for lfn in chunk]
             for meta in self.client.get_metadata_bulk(dids):
                 lfn = str(meta["name"])
                 if meta["did_type"] in ["DATASET", "CONTAINER"]:
                     nlinks = len([
                         child for child in self.client.list_content(
                             meta["scope"], meta["name"])
                     ])
                     successful[lfn] = {
                         "Checksum": "",
                         "ChecksumType": "",
                         "CreationDate": meta["created_at"],
                         "GUID": "",
                         "Mode": 509,
                         "ModificationDate": meta["updated_at"],
                         "NumberOfLinks": nlinks,
                         "Size": 0,
                         "Status": "-",
                     }
                     try:
                         listFiles.remove(lfn)
                     except ValueError:
                         pass
                 else:
                     guid = meta["guid"]
                     if guid:
                         guid = str(uuid.UUID(guid))
                     successful[lfn] = {
                         "Checksum": str(meta["adler32"]),
                         "ChecksumType": "AD",
                         "CreationDate": meta["created_at"],
                         "GUID": guid,
                         "Mode": 436,
                         "ModificationDate": meta["updated_at"],
                         "NumberOfLinks": 1,
                         "Size": meta["bytes"],
                         "Status": "-",
                     }
                     try:
                         listFiles.remove(lfn)
                     except ValueError:
                         pass
         except DataIdentifierNotFound as err:
             failed[lfn] = str(err)
         except Exception as err:
             return S_ERROR(str(err))
     for lfn in listFiles:
         failed[lfn] = "No such file or directory"
     resDict = {"Failed": failed, "Successful": successful}
     return S_OK(resDict)
Example #21
0
    def __insertExistingTransformationFiles(self,
                                            transID,
                                            fileTuplesList,
                                            connection=False):
        """ extends DIRAC.__insertExistingTransformationFiles
        Does not add userSE and adds runNumber
    """

        gLogger.info("Inserting %d files in TransformationFiles" %
                     len(fileTuplesList))
        # splitting in various chunks, in case it is too big
        for fileTuples in breakListIntoChunks(fileTuplesList, 10000):
            gLogger.verbose(
                "Adding first %d files in TransformationFiles (out of %d)" %
                (len(fileTuples), len(fileTuplesList)))
            req = "INSERT INTO TransformationFiles (TransformationID,Status,TaskID,FileID, \
      TargetSE,LastUpdate,RunNumber,Size,FileType,RAWAncestors) VALUES"

            candidates = False

            for ft in fileTuples:
                _lfn, originalID, fileID, status, taskID, targetSE, _usedSE, _errorCount, _lastUpdate, \
                    _insertTime, runNumber, size, fileType, rawAncestors = ft[:14]
                if status not in ('Removed', ):
                    candidates = True
                    if not re.search('-', status):
                        status = "%s-inherited" % status
                        if taskID:
                            taskID = 1000000 * int(originalID) + int(taskID)
                    req = "%s (%d,'%s',%s,%d,'%s',UTC_TIMESTAMP(),%s,%s,'%s',%s)," % (
                        req, transID, status, taskID, fileID, targetSE,
                        runNumber, size, fileType, rawAncestors)
            if not candidates:
                continue
            req = req.rstrip(",")
            res = self._update(req, connection)
            if not res['OK']:
                return res

        # We must also copy the run table entries if any
        result = self.getTransformationRuns({'TransformationID': originalID})
        if not result['OK']:
            return result
        for runDict in res['Value']:
            runID = runDict['RunNumber']
            selectedSite = runDict['SelectedSite']
            status = runDict['Status']
            res = self.insertTransformationRun(transID,
                                               runID,
                                               selectedSite=selectedSite,
                                               status=status,
                                               connection=connection)
            if not res['OK']:
                return res

        return S_OK()
def removeRemoteFiles(dm,lfns):
  """
  Remove file from the catalog
  """
  for lfnList in breakListIntoChunks( lfns, 100 ):
    res = dm.removeFile( lfnList )
    if not res['OK']:
      return S_ERROR( "Failed to remove files:" + lfnList + res['Message'] )
    else:
      return S_OK()
Example #23
0
def removeRemoteFiles(dm, lfns):
    """
    Remove file from the catalog
    """
    for lfnList in breakListIntoChunks(lfns, 100):
        res = dm.removeFile(lfnList)
        if not res["OK"]:
            return S_ERROR("Failed to remove files:" + lfnList + res["Message"])
        else:
            return S_OK()
Example #24
0
    def getJobStatus(self, jobIDList):
        """Get the status information for the given list of jobs"""
        # If we use a local schedd, then we have to cleanup executables regularly
        if self.useLocalSchedd:
            self.__cleanup()

        self.log.verbose("Job ID List for status: %s " % jobIDList)
        if isinstance(jobIDList, six.string_types):
            jobIDList = [jobIDList]

        resultDict = {}
        condorIDs = {}
        # Get all condorIDs so we can just call condor_q and condor_history once
        for jobRef in jobIDList:
            job, _, jobID = condorIDAndPathToResultFromJobRef(jobRef)
            condorIDs[job] = jobID

        qList = []
        for _condorIDs in breakListIntoChunks(condorIDs.values(), 100):

            # This will return a list of 1245.75 3
            status, stdout_q = commands.getstatusoutput(
                "condor_q %s %s -af:j JobStatus " %
                (self.remoteScheddOptions, " ".join(_condorIDs)))
            if status != 0:
                return S_ERROR(stdout_q)
            _qList = stdout_q.strip().split("\n")
            qList.extend(_qList)

            # FIXME: condor_history does only support j for autoformat from 8.5.3,
            # format adds whitespace for each field This will return a list of 1245 75 3
            # needs to cocatenate the first two with a dot
            condorHistCall = "condor_history %s %s -af ClusterId ProcId JobStatus" % (
                self.remoteScheddOptions,
                " ".join(_condorIDs),
            )

            treatCondorHistory(condorHistCall, qList)

        for job, jobID in condorIDs.items():

            pilotStatus = parseCondorStatus(qList, jobID)
            if pilotStatus == "HELD":
                # make sure the pilot stays dead and gets taken out of the condor_q
                _rmStat, _rmOut = commands.getstatusoutput(
                    "condor_rm %s %s " % (self.remoteScheddOptions, jobID))
                # self.log.debug( "condor job killed: job %s, stat %s, message %s " % ( jobID, rmStat, rmOut ) )
                pilotStatus = PilotStatus.ABORTED

            resultDict[job] = pilotStatus

        self.log.verbose("Pilot Statuses: %s " % resultDict)
        return S_OK(resultDict)
Example #25
0
    def getLFNStatus(self, jobs):
        """Get all the LFNs for the jobs and get their status."""
        self.log.notice('Collecting LFNs...')
        lfnExistence = {}
        lfnCache = []
        counter = 0
        jobInfoStart = time.time()
        for counter, job in enumerate(jobs.values()):
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    'Getting JobInfo: %d/%d: %3.1fs' %
                    (counter, len(jobs), float(time.time() - jobInfoStart)))
            while True:
                try:
                    job.getJobInformation(self.diracAPI,
                                          self.jobMon,
                                          jdlOnly=self.getJobInfoFromJDLOnly)
                    lfnCache.extend(job.inputFiles)
                    lfnCache.extend(job.outputFiles)
                    break
                except RuntimeError as e:  # try again
                    self.log.error('+++++ Failure for job:', job.jobID)
                    self.log.error('+++++ Exception: ', str(e))

        timeSpent = float(time.time() - jobInfoStart)
        self.log.notice('Getting JobInfo Done: %3.1fs (%3.3fs per job)' %
                        (timeSpent, timeSpent / counter))

        counter = 0
        fileInfoStart = time.time()
        for lfnChunk in breakListIntoChunks(list(lfnCache), 200):
            counter += 200
            if counter % 1000 == 0:
                self.log.notice('Getting FileInfo: %d/%d: %3.1fs' %
                                (counter, len(lfnCache),
                                 float(time.time() - fileInfoStart)))
            while True:
                try:
                    reps = self.fcClient.exists(lfnChunk)
                    if not reps['OK']:
                        self.log.error(
                            'Failed to check file existence, try again...',
                            reps['Message'])
                        raise RuntimeError('Try again')
                    statuses = reps['Value']
                    lfnExistence.update(statuses['Successful'])
                    break
                except RuntimeError:  # try again
                    pass
        self.log.notice('Getting FileInfo Done: %3.1fs' %
                        (float(time.time() - fileInfoStart)))

        return lfnExistence
  def __removeWMSTasks( self, jobIDs ):
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        gLogger.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ):
        gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif res.has_key( 'NonauthorizedJobIDs' ):
        gLogger.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif res.has_key( 'FailedJobIDs' ):
        gLogger.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        gLogger.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ):
        gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif res.has_key( 'NonauthorizedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif res.has_key( 'FailedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    gLogger.info( "Successfully removed all tasks from the WMS" )
    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      gLogger.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    gLogger.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        gLogger.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        gLogger.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      gLogger.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      gLogger.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    gLogger.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Example #27
0
 def removeFile(self, path):
     """Remove physically the file specified by its path
 """
     res = self.__checkArgumentFormat(path)
     if not res['OK']:
         return res
     urls = res['Value']
     successful = {}
     failed = {}
     listOfLists = breakListIntoChunks(urls, 100)
     for urls in listOfLists:
         gLogger.debug(
             "RFIOStorage.removeFile: Attempting to remove %s files." %
             len(urls))
         comm = 'stager_rm -S %s' % self.spaceToken
         for url in urls:
             comm = "%s -M %s" % (comm, url)
         res = shellCall(100, comm)
         if res['OK']:
             returncode, _stdout, stderr = res['Value']
             if returncode in [0, 1]:
                 comm = 'nsrm -f'
                 for url in urls:
                     comm = "%s %s" % (comm, url)
                 res = shellCall(100, comm)
                 if res['OK']:
                     returncode, _stdout, stderr = res['Value']
                     if returncode in [0, 1]:
                         for pfn in urls:
                             successful[pfn] = True
                     else:
                         errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver."
                         gLogger.error(errStr, stderr)
                         for pfn in urls:
                             failed[pfn] = errStr
                 else:
                     errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver."
                     gLogger.error(errStr, res['Message'])
                     for pfn in urls:
                         failed[pfn] = errStr
             else:
                 errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager."
                 gLogger.error(errStr, stderr)
                 for pfn in urls:
                     failed[pfn] = errStr
         else:
             errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager."
             gLogger.error(errStr, res['Message'])
             for pfn in urls:
                 failed[pfn] = errStr
     resDict = {'Failed': failed, 'Successful': successful}
     return S_OK(resDict)
 def __verifyPfns( self, pfnSizes, storageElements ):
   gLogger.info( 'Checking %s storage files exist in the catalog' % len( pfnSizes ) )
   pfnsToRemove = []
   incorrectlyRegistered = []
   allDone = True
   # First get all the PFNs as they should be registered in the catalog
   for pfns in breakListIntoChunks( sortList( pfnSizes.keys() ), 100 ):
     res = self.replicaManager.getPfnForProtocol( pfns, storageElements[0], withPort = False )
     if not res['OK']:
       allDone = False
       continue
     for pfn, error in res['Value']['Failed'].items():
       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
     if res['Value']['Failed']:
       allDone = False
     catalogStoragePfns = res['Value']['Successful']
     # Determine whether these PFNs are registered and if so obtain the LFN
     res = self.replicaManager.getCatalogLFNForPFN( catalogStoragePfns.values() )
     if not res['OK']:
       allDone = False
       continue
     for surl in sortList( res['Value']['Failed'].keys() ):
       if res['Value']['Failed'][surl] == 'No such file or directory':
         #pfnsToRemove.append(surl)
         print surl
       else:
         gLogger.error( 'Failed to get LFN for PFN', '%s %s' % ( surl, res['Value']['Failed'][surl] ) )
     existingLFNs = res['Value']['Successful'].values()
     if existingLFNs:
       res = self.replicaManager.getCatalogReplicas( existingLFNs )
       if not res['OK']:
         allDone = False
         continue
       for lfn, error in res['Value']['Failed'].items():
         gLogger.error( 'Failed to obtain registered replicas for LFN', '%s %s' % ( lfn, error ) )
       if res['Value']['Failed']:
         allDone = False
       for lfn, replicas in res['Value']['Successful'].items():
         match = False
         for storageElement in storageElements:
           if storageElement in replicas.keys():
             match = True
         if not match:
           pass#incorrectlyRegistered.append(lfn)
           #print lfn
   gLogger.info( "Verification of PFNs complete" )
   if incorrectlyRegistered:
     gLogger.info( "Found %d files incorrectly registered" % len( incorrectlyRegistered ) )
   if pfnsToRemove:
     gLogger.info( "Found %d files to be removed" % len( pfnsToRemove ) )
   resDict = {'Remove':pfnsToRemove, 'ReRegister':incorrectlyRegistered, 'AllDone':allDone}
   return S_OK( resDict )
Example #29
0
 def removeFile( self, lfns ):
   rpcClient = self._getRPC()
   successful = {}
   failed = {}
   listOfLists = breakListIntoChunks( lfns, 100 )
   for fList in listOfLists:
     res = rpcClient.removeFile( fList )
     if not res['OK']:
       return res
     successful.update( res['Value']['Successful'] )
     failed.update( res['Value']['Failed'] )
   resDict = {'Successful': successful, 'Failed':failed}
   return S_OK( resDict )
Example #30
0
 def removeFile(self, lfns):
     rpcClient = self._getRPC()
     successful = {}
     failed = {}
     listOfLists = breakListIntoChunks(lfns, 100)
     for fList in listOfLists:
         res = rpcClient.removeFile(fList)
         if not res['OK']:
             return res
         successful.update(res['Value']['Successful'])
         failed.update(res['Value']['Failed'])
     resDict = {'Successful': successful, 'Failed': failed}
     return S_OK(resDict)
Example #31
0
 def removeFile(self, lfns):
     rpcClient = self._getRPC()
     successful = {}
     failed = {}
     listOfLists = breakListIntoChunks(lfns, 100)
     for fList in listOfLists:
         res = rpcClient.removeFile(fList)
         if not res["OK"]:
             return res
         successful.update(res["Value"]["Successful"])
         failed.update(res["Value"]["Failed"])
     resDict = {"Successful": successful, "Failed": failed}
     return S_OK(resDict)
Example #32
0
 def __removeWMSTasks(self, jobIDs):
     allRemove = True
     for jobList in breakListIntoChunks(jobIDs, 500):
         res = self.wmsClient.deleteJob(jobList)
         if res['OK']:
             gLogger.info("Successfully removed %d jobs from WMS" %
                          len(jobList))
         elif (res.has_key('InvalidJobIDs')) and (
                 not res.has_key('NonauthorizedJobIDs')) and (
                     not res.has_key('FailedJobIDs')):
             gLogger.info("Found %s jobs which did not exist in the WMS" %
                          len(res['InvalidJobIDs']))
         elif res.has_key('NonauthorizedJobIDs'):
             gLogger.error(
                 "Failed to remove %s jobs because not authorized" %
                 len(res['NonauthorizedJobIDs']))
             allRemove = False
         elif res.has_key('FailedJobIDs'):
             gLogger.error("Failed to remove %s jobs" %
                           len(res['FailedJobIDs']))
             allRemove = False
     if not allRemove:
         return S_ERROR("Failed to remove all remnants from WMS")
     gLogger.info("Successfully removed all tasks from the WMS")
     res = self.requestClient.getRequestForJobs(jobIDs)
     if not res['OK']:
         gLogger.error("Failed to get requestID for jobs.", res['Message'])
         return res
     failoverRequests = res['Value']
     gLogger.info("Found %d jobs with associated failover requests" %
                  len(failoverRequests))
     if not failoverRequests:
         return S_OK()
     failed = 0
     for jobID, requestName in failoverRequests.items():
         res = self.requestClient.deleteRequest(requestName)
         if not res['OK']:
             gLogger.error("Failed to remove request from RequestDB",
                           res['Message'])
             failed += 1
         else:
             gLogger.verbose("Removed request %s associated to job %d." %
                             (requestName, jobID))
     if failed:
         gLogger.info("Successfully removed %s requests" %
                      (len(failoverRequests) - failed))
         gLogger.info("Failed to remove %s requests" % failed)
         return S_ERROR("Failed to remove all the request from RequestDB")
     gLogger.info(
         "Successfully removed all the associated failover requests")
     return S_OK()
Example #33
0
 def __exists(self, lfns):
     server = RPCClient(self.url, timeout=120)
     successful = {}
     failed = {}
     for lfnList in breakListIntoChunks(lfns, self.splitSize):
         res = server.exists(lfnList)
         if not res['OK']:
             for lfn in lfnList:
                 failed[lfn] = res['Message']
         else:
             for lfn, exists in res['Value'].items():
                 successful[lfn] = exists
     resDict = {'Successful': successful, 'Failed': {}}
     return S_OK(resDict)
Example #34
0
 def __exists(self, lfns):
     server = RPCClient(self.url, timeout=120)
     successful = {}
     failed = {}
     for lfnList in breakListIntoChunks(lfns, self.splitSize):
         res = server.exists(lfnList)
         if not res["OK"]:
             for lfn in lfnList:
                 failed[lfn] = res["Message"]
         else:
             for lfn, exists in res["Value"].items():
                 successful[lfn] = exists
     resDict = {"Successful": successful, "Failed": {}}
     return S_OK(resDict)
 def __exists( self, lfns ):
   server = RPCClient( self.url, timeout = 120 )
   successful = {}
   failed = {}
   for lfnList in breakListIntoChunks( lfns, self.splitSize ):
     res = server.exists( lfnList )
     if not res['OK']:
       for lfn in lfnList:
         failed[lfn] = res['Message']
     else:
       for lfn, exists in res['Value'].items():
         successful[lfn] = exists
   resDict = {'Successful':successful, 'Failed':{}}
   return S_OK( resDict )
Example #36
0
 def removeFile( self, path ):
   """Remove physically the file specified by its path
   """
   res = self.__checkArgumentFormat( path )
   if not res['OK']:
     return res
   urls = res['Value']
   successful = {}
   failed = {}
   listOfLists = breakListIntoChunks( urls, 100 )
   for urls in listOfLists:
     gLogger.debug( "RFIOStorage.removeFile: Attempting to remove %s files." % len( urls ) )
     comm = 'stager_rm -S %s' % self.spaceToken
     for url in urls:
       comm = "%s -M %s" % ( comm, url )
     res = shellCall( 100, comm )
     if res['OK']:
       returncode, _stdout, stderr = res['Value']
       if returncode in [0, 1]:
         comm = 'nsrm -f'
         for url in urls:
           comm = "%s %s" % ( comm, url )
         res = shellCall( 100, comm )
         if res['OK']:
           returncode, _stdout, stderr = res['Value']
           if returncode in [0, 1]:
             for pfn in urls:
               successful[pfn] = True
           else:
             errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver."
             gLogger.error( errStr, stderr )
             for pfn in urls:
               failed[pfn] = errStr
         else:
           errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver."
           gLogger.error( errStr, res['Message'] )
           for pfn in urls:
             failed[pfn] = errStr
       else:
         errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager."
         gLogger.error( errStr, stderr )
         for pfn in urls:
           failed[pfn] = errStr
     else:
       errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager."
       gLogger.error( errStr, res['Message'] )
       for pfn in urls:
         failed[pfn] = errStr
   resDict = {'Failed':failed, 'Successful':successful}
   return S_OK( resDict )
Example #37
0
 def addFile(self, lfns):
     """Register supplied files"""
     failed = {}
     successful = {}
     deterministicDictionary = {}
     for lfnList in breakListIntoChunks(lfns, 100):
         listLFNs = []
         for lfn in list(lfnList):
             lfnInfo = lfns[lfn]
             pfn = None
             se = lfnInfo["SE"]
             if se not in deterministicDictionary:
                 isDeterministic = self.client.get_rse(se)["deterministic"]
                 deterministicDictionary[se] = isDeterministic
             if not deterministicDictionary[se]:
                 pfn = lfnInfo["PFN"]
             size = lfnInfo["Size"]
             guid = lfnInfo.get("GUID", None)
             checksum = lfnInfo["Checksum"]
             rep = {
                 "lfn": lfn,
                 "bytes": size,
                 "adler32": checksum,
                 "rse": se
             }
             if pfn:
                 rep["pfn"] = pfn
             if guid:
                 rep["guid"] = guid
             listLFNs.append(rep)
         try:
             self.client.add_files(lfns=listLFNs, ignore_availability=True)
             for lfn in list(lfnList):
                 successful[lfn] = True
         except Exception as err:
             # Try inserting one by one
             sLog.warn("Cannot bulk insert files", "error : %s" % repr(err))
             for lfn in listLFNs:
                 try:
                     self.client.add_files(lfns=[lfn],
                                           ignore_availability=True)
                     successful[lfn["lfn"]] = True
                 except FileReplicaAlreadyExists:
                     successful[lfn["lfn"]] = True
                 except Exception as err:
                     failed[lfn["lfn"]] = str(err)
     resDict = {"Failed": failed, "Successful": successful}
     sLog.debug(resDict)
     return S_OK(resDict)
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    ssh = SSH( parameters = self.ceParameters )

    for jobList in breakListIntoChunks( jobIDList, 100 ):
      
      jobDict = {}
      for job in jobList:
        result = pfnparse( job )
        if result['OK']:
          stamp = result['Value']['FileName'].split('.')[0] 
        else:
          self.log.error( 'Invalid job id', job )
          continue  
        jobDict[stamp] = job
      stampList = jobDict.keys() 

      cmd = [ 'qstat', ' '.join( stampList ) ]
      result = ssh.sshCall( 10, cmd )
      if not result['OK']:
        return result
      
      status = result['Value'][0]
      if status == -1:
        return S_ERROR( 'Timeout while SSH call' )
      elif status != 0:
        return S_ERROR( 'Error while SSH call' )
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:
          if line.find( job ) != -1:
            if line.find( 'Unknown' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              torqueStatus = line.split()[4]
              if torqueStatus in ['E', 'C']:
                resultDict[jobDict[job]] = 'Done'
              elif torqueStatus in ['R']:
                resultDict[jobDict[job]] = 'Running'
              elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']:
                resultDict[jobDict[job]] = 'Waiting'

    return S_OK( resultDict )
Example #39
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """

        resultDict = {}
        ssh = SSH(parameters=self.ceParameters)

        for jobList in breakListIntoChunks(jobIDList, 100):

            jobDict = {}
            for job in jobList:
                result = pfnparse(job)
                if result['OK']:
                    stamp = result['Value']['FileName'].split('.')[0]
                else:
                    self.log.error('Invalid job id', job)
                    continue
                jobDict[stamp] = job
            stampList = jobDict.keys()

            cmd = ['qstat', ' '.join(stampList)]
            result = ssh.sshCall(10, cmd)
            if not result['OK']:
                return result

            status = result['Value'][0]
            if status == -1:
                return S_ERROR('Timeout while SSH call')
            elif status != 0:
                return S_ERROR('Error while SSH call')
            output = result['Value'][1].replace('\r', '')
            lines = output.split('\n')
            for job in jobDict:
                resultDict[jobDict[job]] = 'Unknown'
                for line in lines:
                    if line.find(job) != -1:
                        if line.find('Unknown') != -1:
                            resultDict[jobDict[job]] = 'Unknown'
                        else:
                            torqueStatus = line.split()[4]
                            if torqueStatus in ['E', 'C']:
                                resultDict[jobDict[job]] = 'Done'
                            elif torqueStatus in ['R']:
                                resultDict[jobDict[job]] = 'Running'
                            elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']:
                                resultDict[jobDict[job]] = 'Waiting'

        return S_OK(resultDict)
Example #40
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """
        resultDict = {}
        ssh = SSH(self.sshUser, self.sshHost, self.sshPassword)
        for jobList in breakListIntoChunks(jobIDList, 100):
            jobDict = {}
            for job in jobList:
                jobNumber = job.split('.')[0]
                if jobNumber:
                    jobDict[jobNumber] = job
            cmd = ("source %s; qstat") % (self.geEnv)
            result = ssh.sshCall(10, cmd)
            if not result['OK']:
                return result

            output = result['Value'][1].replace('\r', '')
            lines = output.split('\n')
            for job in jobDict:
                resultDict[jobDict[job]] = 'Unknown'
                for line in lines:

                    if line.find(job) != -1:
                        if line.find('Unknown') != -1:
                            resultDict[jobDict[job]] = 'Unknown'
                        else:
                            torqueStatus = line.split()[4]
                            if torqueStatus in ['Tt', 'Tr']:
                                resultDict[jobDict[job]] = 'Done'
                            elif torqueStatus in ['Rr', 'r']:
                                resultDict[jobDict[job]] = 'Running'
                            elif torqueStatus in ['qw', 'h']:
                                resultDict[jobDict[job]] = 'Waiting'
                    else:
                        if resultDict[jobDict[job]] == 'Unknown':
                            cmd = ("ls -la  %s/*%s*") % (self.batchOutput, job)
                            result = ssh.sshCall(10, cmd)
                            subS = ("No such file or directory")
                            if subS in result['Value']:
                                self.log.debug("Output no ready")
                            else:
                                resultDict[jobDict[job]] = 'Done'
                        else:
                            continue

        self.log.debug("Result dict: ")
        self.log.debug(resultDict)
        return S_OK(resultDict)
Example #41
0
  def registerCopiedFiles(self, filesNewlyCopied, copiedFiles, allUnmigratedFilesMeta):
    """
      Register successfuly copied files (newly, or in Copied status in the DB) in the DFC.

      :param filesNewlyCopied: [lfns] of files newly copied
      :param copiedFiles: {lfn:RIDb metadata} of files that were in Copied state.
      :param allUnmigratedFilesMeta: {lfn:RI Db metadata} for all lfns non migrated at
                                    the beginning of the loop.

      :return: {lfn:True} for successfuly registered lfns
    """
    if filesNewlyCopied or copiedFiles:
      self.log.info("Attempting to register %s newly copied and %s previously copied files" %
                    (len(filesNewlyCopied), len(copiedFiles)))
    else:
      self.log.info("No files to be registered")

    # Update copiedFiles to also contain the newly copied files
    copiedFiles.update(dict((lfn, allUnmigratedFilesMeta[lfn]) for lfn in filesNewlyCopied))

    successfulRegister = {}
    failedRegister = {}

    # Try to register them by batch
    for lfnChunk in breakListIntoChunks(copiedFiles, 100):
      # Add the metadata
      lfnDictChuck = dict((lfn, copiedFiles[lfn]) for lfn in lfnChunk)
      res = self.fileCatalog.addFile(lfnDictChuck)

      if not res['OK']:
        self.log.error("Completely failed to register some successfully copied file.",
                       res['Message'])
        failedRegister.update(dict((lfn, res['Message']) for lfn in lfnDictChuck))
      else:
        successfulRegister.update(res['Value']['Successful'])
        failedRegister.update(res['Value']['Failed'])

    gMonitor.addMark("ErrorRegister", len(failedRegister))
    for lfn, reason in failedRegister.iteritems():
      self.log.error("Failed to register lfn. Setting to Copied", "%s: %s" % (lfn, reason))
      res = self.rawIntegrityDB.setFileStatus(lfn, 'Copied')
      if not res['OK']:
        self.log.error("Error setting file status to Copied", "%s: %s" % (lfn, res['Message']))

    for lfn in successfulRegister:
      self.log.info("Successfully registered %s in the File Catalog." % lfn)

    return successfulRegister
Example #42
0
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """
    resultDict = {}
    ssh = SSH( self.sshUser, self.sshHost, self.sshPassword )
    for jobList in breakListIntoChunks(jobIDList,100):
      jobDict = {}
      for job in jobList:
        jobNumber = job.split('.')[0]
        if jobNumber:
          jobDict[jobNumber] = job
      cmd = ("source %s; qstat")%(self.geEnv)
      result = ssh.sshCall( 10, cmd )
      if not result['OK']:
        return result
  
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:

          if line.find( job ) != -1:
            if line.find( 'Unknown' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              torqueStatus = line.split()[4]
              if torqueStatus in ['Tt', 'Tr']:
                resultDict[jobDict[job]] = 'Done'
              elif torqueStatus in ['Rr', 'r']:
                resultDict[jobDict[job]] = 'Running'
              elif torqueStatus in ['qw', 'h']:
                resultDict[jobDict[job]] = 'Waiting'
          else:
            if resultDict[jobDict[job]] == 'Unknown':
              cmd = ("ls -la  %s/*%s*")%(self.batchOutput,job)
              result = ssh.sshCall( 10, cmd )
              subS = ("No such file or directory")
              if subS in result['Value']:
                self.log.debug ("Output no ready")
              else:
                resultDict[jobDict[job]] = 'Done' 
            else:
              continue
 
    self.log.debug("Result dict: ")
    self.log.debug(resultDict)
    return S_OK( resultDict )
Example #43
0
  def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):

    log = self._log.getSubLogger("_prepareNewJobs", child=True)

    filesToSubmit = self._getFilesToSubmit(maxAttemptsPerFile=maxAttemptsPerFile)
    log.debug("%s ftsFiles to submit" % len(filesToSubmit))

    newJobs = []

    # {targetSE : [FTS3Files] }
    res = FTS3Utilities.groupFilesByTarget(filesToSubmit)
    if not res['OK']:
      return res
    filesGroupedByTarget = res['Value']

    for targetSE, ftsFiles in filesGroupedByTarget.iteritems():

      res = self._checkSEAccess(targetSE, 'WriteAccess', vo=self.vo)

      if not res['OK']:
        # If the SE is currently banned, we just skip it
        if cmpError(res, errno.EACCES):
          log.info("Write access currently not permitted to %s, skipping." % targetSE)
        else:
          log.error(res)
          for ftsFile in ftsFiles:
            ftsFile.attempt += 1
        continue

      sourceSEs = self.sourceSEs.split(',') if self.sourceSEs is not None else []
      # { sourceSE : [FTSFiles] }
      res = FTS3Utilities.selectUniqueRandomSource(ftsFiles, allowedSources=sourceSEs)

      if not res['OK']:
        return res

      uniqueTransfersBySource = res['Value']

      # We don't need to check the source, since it is already filtered by the DataManager
      for sourceSE, ftsFiles in uniqueTransfersBySource.iteritems():

        for ftsFilesChunk in breakListIntoChunks(ftsFiles, maxFilesPerJob):

          newJob = self._createNewJob('Transfer', ftsFilesChunk, targetSE, sourceSE=sourceSE)

          newJobs.append(newJob)

    return S_OK(newJobs)
Example #44
0
    def cleanOutputs(self, jobInfo):
        """Remove all job outputs for job represented by jobInfo object.

    Including removal of descendents, if defined.
    """
        if len(jobInfo.outputFiles) == 0:
            return
        descendants = self.__findAllDescendants(jobInfo.outputFiles)
        existingOutputFiles = [
            lfn for lfn, status in izip_longest(jobInfo.outputFiles,
                                                jobInfo.outputFileStatus)
            if status == "Exists"
        ]
        filesToDelete = existingOutputFiles + descendants

        if not filesToDelete:
            return

        if not self.enabled:
            self.log.notice("Would have removed these files: \n +++ %s " %
                            "\n +++ ".join(filesToDelete))
            return
        self.log.notice("Remove these files: \n +++ %s " %
                        "\n +++ ".join(filesToDelete))

        errorReasons = defaultdict(list)
        successfullyRemoved = 0

        for lfnList in breakListIntoChunks(filesToDelete, 200):
            with UserProxy(proxyUserDN=self.authorDN,
                           proxyUserGroup=self.authorGroup) as proxyResult:
                if not proxyResult['OK']:
                    raise RuntimeError('Failed to get a proxy: %s' %
                                       proxyResult['Message'])
                result = DataManager().removeFile(lfnList)
                if not result['OK']:
                    self.log.error("Failed to remove LFNs", result['Message'])
                    raise RuntimeError("Failed to remove LFNs: %s" %
                                       result['Message'])
                for lfn, err in result['Value']['Failed'].items():
                    reason = str(err)
                    errorReasons[reason].append(lfn)
                successfullyRemoved += len(
                    result['Value']['Successful'].keys())
        for reason, lfns in errorReasons.items():
            self.log.error("Failed to remove %d files with error: %s" %
                           (len(lfns), reason))
        self.log.notice("Successfully removed %d files" % successfullyRemoved)
Example #45
0
    def cleanOutputs(self, jobInfo):
        """remove all job outputs"""
        if len(jobInfo.outputFiles) == 0:
            return
        descendants = self.__findAllDescendants(jobInfo.outputFiles)
        existingOutputFiles = [
            lfn for lfn, status in izip_longest(jobInfo.outputFiles,
                                                jobInfo.outputFileStatus)
            if status == "Exists"
        ]
        filesToDelete = existingOutputFiles + descendants

        if not filesToDelete:
            return

        if not self.enabled:
            self.log.notice("Would have removed these files: \n +++ %s " %
                            "\n +++ ".join(filesToDelete))
            return
        self.log.notice("Remove these files: \n +++ %s " %
                        "\n +++ ".join(filesToDelete))

        errorReasons = {}
        successfullyRemoved = 0

        for lfnList in breakListIntoChunks(filesToDelete, 200):
            ## this is needed to remove the file with the Shifter credentials and not with the server credentials
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'false')
            result = DataManager().removeFile(lfnList)
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'true')
            if not result['OK']:
                self.log.error("Failed to remove LFNs", result['Message'])
                raise RuntimeError("Failed to remove LFNs: %s" %
                                   result['Message'])
            for lfn, err in result['Value']['Failed'].items():
                reason = str(err)
                if reason not in errorReasons.keys():
                    errorReasons[reason] = []
                errorReasons[reason].append(lfn)
            successfullyRemoved += len(result['Value']['Successful'].keys())

        for reason, lfns in errorReasons.items():
            self.log.error("Failed to remove %d files with error: %s" %
                           (len(lfns), reason))
        self.log.notice("Successfully removed %d files" % successfullyRemoved)
Example #46
0
 def removeFile( self, lfn, rpc = '', url = '', timeout = None ):
   res = self.__checkArgumentFormat( lfn )
   if not res['OK']:
     return res
   lfns = res['Value'].keys()
   rpcClient = self._getRPC( rpc = rpc, url = url, timeout = timeout )
   successful = {}
   failed = {}
   listOfLists = breakListIntoChunks( lfns, 100 )
   for fList in listOfLists:
     res = rpcClient.removeFile( fList )
     if not res['OK']:
       return res
     successful.update( res['Value']['Successful'] )
     failed.update( res['Value']['Failed'] )
   resDict = {'Successful': successful, 'Failed':failed}
   return S_OK( resDict )
 def _groupByReplicas( self ):
   """ Generates a job based on the location of the input data """
   if not self.params:
     return S_ERROR( "TransformationPlugin._Standard: The 'Standard' plug-in requires parameters." )
   status = self.params['Status']
   groupSize = self.params['GroupSize']
   # Group files by SE
   fileGroups = self._getFileGroups( self.data )
   # Create tasks based on the group size
   tasks = []
   for replicaSE in sortList( fileGroups.keys() ):
     lfns = fileGroups[replicaSE]
     tasksLfns = breakListIntoChunks( lfns, groupSize )
     for taskLfns in tasksLfns:
       if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
         tasks.append( ( replicaSE, taskLfns ) )
   return S_OK( tasks )
Example #48
0
 def __setHasReplicaFlag(self, lfns):
     server = RPCClient(self.url, timeout=120)
     successful = {}
     failed = {}
     for lfnList in breakListIntoChunks(lfns, self.splitSize):
         res = server.addFiles(lfnList)
         if not res["OK"]:
             for lfn in lfnList:
                 failed[lfn] = res["Message"]
         else:
             for lfn in lfnList:
                 if res["Value"].has_key(lfn):
                     failed[lfn] = res["Value"][lfn]
                 else:
                     successful[lfn] = True
     resDict = {"Successful": successful, "Failed": failed}
     return S_OK(resDict)
 def __unsetHasReplicaFlag( self, lfns ):
   server = RPCClient( self.url, timeout = 120 )
   successful = {}
   failed = {}
   for lfnList in breakListIntoChunks( lfns, self.splitSize ):
     res = server.removeFiles( lfnList )
     if not res['OK']:
       for lfn in lfnList:
         failed[lfn] = res['Message']
     else:
       for lfn in lfnList:
         if res['Value'].has_key( lfn ):
           failed[lfn] = res['Value'][lfn]
         else:
           successful[lfn] = True
   resDict = {'Successful':successful, 'Failed':failed}
   return S_OK( resDict )
Example #50
0
  def _findFileIDs( self, lfns, connection=False ):
    """ Find lfn <-> FileID correspondence
    """
    connection = self._getConnection(connection)
    dirDict = self._getFileDirectories(lfns)
    failed = {}
    successful = {}
    result = self.db.dtree.findDirs( dirDict.keys() )
    if not result['OK']:
      return result
    directoryIDs = result['Value']
    directoryPaths = {}

    for dirPath in dirDict:
      if not dirPath in directoryIDs:
        for fileName in dirDict[dirPath]:
          fname = '%s/%s' % (dirPath,fileName)
          fname = fname.replace('//','/')
          failed[fname] = 'No such file or directory'
      else:
        directoryPaths[directoryIDs[dirPath]] = dirPath
    directoryIDList = directoryIDs.keys()
    for dirIDs in breakListIntoChunks( directoryIDList, 1000 ):

      wheres = []
      for dirPath in dirIDs:
        fileNames = dirDict[dirPath]
        dirID = directoryIDs[dirPath]
        wheres.append( "( DirID=%d AND FileName IN (%s) )" % (dirID, stringListToString(fileNames) ) )

      req = "SELECT FileName,DirID,FileID FROM FC_Files WHERE %s" % " OR ".join( wheres )
      result = self.db._query(req,connection)
      if not result['OK']:
        return result
      for fileName, dirID, fileID in result['Value']:
        fname = '%s/%s' % (directoryPaths[dirID],fileName)
        fname = fname.replace('//','/')
        successful[fname] = fileID

    for lfn in lfns:
      if not lfn in successful:
        failed[lfn] = "No such file or directory"

    return S_OK({"Successful":successful,"Failed":failed})
Example #51
0
 def __getFileMetadata(self, lfns):
     server = RPCClient(self.url, timeout=120)
     successful = {}
     failed = {}
     for lfnList in breakListIntoChunks(lfns, self.splitSize):
         res = server.getFileMetadata(lfnList)
         if not res["OK"]:
             for lfn in lfnList:
                 failed[lfn] = res["Message"]
         else:
             for lfn in lfnList:
                 if not lfn in res["Value"].keys():
                     failed[lfn] = "File does not exist"
                 elif res["Value"][lfn] in types.StringTypes:
                     failed[lfn] = res["Value"][lfn]
                 else:
                     successful[lfn] = res["Value"][lfn]
     resDict = {"Successful": successful, "Failed": failed}
     return S_OK(resDict)
Example #52
0
 def removeReplica( self, lfn, rpc = '', url = '', timeout = 120 ):
   res = self.__checkArgumentFormat( lfn )
   if not res['OK']:
     return res
   tuples = []
   for lfn, info in res['Value'].items():
     tuples.append( ( lfn, info['PFN'], info['SE'] ) )
   rpcClient = self._getRPC( rpc = rpc, url = url, timeout = timeout )
   successful = {}
   failed = {}
   listOfLists = breakListIntoChunks( tuples, 100 )
   for fList in listOfLists:
     res = rpcClient.removeReplica( fList )
     if not res['OK']:
       return res
     successful.update( res['Value']['Successful'] )
     failed.update( res['Value']['Failed'] )
   resDict = {'Successful': successful, 'Failed':failed}
   return S_OK( resDict )
 def __getFileMetadata( self, lfns ):
   server = RPCClient( self.url, timeout = 120 )
   successful = {}
   failed = {}
   for lfnList in breakListIntoChunks( lfns, self.splitSize ):
     res = server.getFileMetadata( lfnList )
     if not res['OK']:
       for lfn in lfnList:
         failed[lfn] = res['Message']
     else:
       for lfn in lfnList:
         if not lfn in res['Value'].keys():
           failed[lfn] = 'File does not exist'
         elif res['Value'][lfn] in types.StringTypes:
           failed[lfn] = res['Value'][lfn]
         else:
           successful[lfn] = res['Value'][lfn]
   resDict = {'Successful':successful, 'Failed':failed}
   return S_OK( resDict )
Example #54
0
 def setPendingRequests(self, jobs):
   """Loop over all the jobs and get requests, if any."""
   for jobChunk in breakListIntoChunks(jobs.values(), 1000):
     jobIDs = [job.jobID for job in jobChunk]
     while True:
       result = self.reqClient.readRequestsForJobs(jobIDs)
       if result['OK']:
         break
       self.log.error('Failed to read requests', result['Message'])
       # repeat
     for jobID in result['Value']['Successful']:
       request = result['Value']['Successful'][jobID]
       requestID = request.RequestID
       dbStatus = self.reqClient.getRequestStatus(requestID).get('Value', 'Unknown')
       for job in jobChunk:
         if job.jobID == jobID:
           job.pendingRequest = dbStatus not in ('Done', 'Canceled')
           self.log.notice('Found %s request for job %d' % ('pending' if job.pendingRequest else 'finished', jobID))
           break
Example #55
0
  def _getFileLFNs(self, fileIDs):
    """ Get the file LFNs for a given list of file IDs
        We need to override this method because the base class hard codes the column names
    """

    successful = {}
    for chunks in breakListIntoChunks(fileIDs, 1000):
      # Format the filenames and status to be used in a IN clause in the sotred procedure
      formatedFileIds = intListToString(chunks)
      result = self.db.executeStoredProcedureWithCursor(
          'ps_get_full_lfn_for_file_ids', (formatedFileIds, ))
      if not result['OK']:
        return result

      # The result contains FileID, LFN
      for row in result['Value']:
        successful[row[0]] = row[1]

    missingIds = set(fileIDs) - set(successful)
    failed = dict.fromkeys(missingIds, "File ID not found")

    return S_OK({'Successful': successful, 'Failed': failed})
Example #56
0
  def getLFNStatus(self, jobs):
    """Get all the LFNs for the jobs and get their status."""
    self.log.notice('Collecting LFNs...')
    lfnExistence = {}
    lfnCache = []
    for counter, job in enumerate(jobs.values()):
      if counter % self.printEveryNJobs == 0:
        self.log.notice('Getting JobInfo: %d/%d: %3.1fs' % (counter, len(jobs), float(time.time() - self.startTime)))
      while True:
        try:
          job.getJobInformation(self.diracILC)
          if job.inputFile:
            lfnCache.append(job.inputFile)
          if job.outputFiles:
            lfnCache.extend(job.outputFiles)
          break
        except RuntimeError as e:  # try again
          self.log.error('+++++ Failure for job:', job.jobID)
          self.log.error('+++++ Exception: ', str(e))

    counter = 0
    for lfnChunk in breakListIntoChunks(list(lfnCache), 200):
      counter += 200
      if counter % 1000 == 0:
        self.log.notice('Getting FileInfo: %d/%d: %3.1fs' % (counter, len(jobs), float(time.time() - self.startTime)))
      while True:
        try:
          reps = self.fcClient.exists(lfnChunk)
          if not reps['OK']:
            self.log.error('Failed to check file existence, try again...', reps['Message'])
            raise RuntimeError('Try again')
          statuses = reps['Value']
          lfnExistence.update(statuses['Successful'])
          break
        except RuntimeError:  # try again
          pass

    return lfnExistence
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    ssh = SSH( parameters = self.ceParameters )

    for jobList in breakListIntoChunks( jobIDList, 100 ):
      jobDict = {}
      for job in jobList:
        result = pfnparse( job )
        jobNumber = result['Value']['FileName']
        if jobNumber:
          jobDict[jobNumber] = job

      jobStamps = jobDict.keys()
      cmd = [ 'bjobs', ' '.join( jobStamps ) ]
      result = ssh.sshCall( 100, cmd )
      if not result['OK']:
        return result
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:
          if line.find( job ) != -1:
            if line.find( 'UNKWN' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              lsfStatus = line.split()[2]
              if lsfStatus in ['DONE', 'EXIT']:
                resultDict[jobDict[job]] = 'Done'
              elif lsfStatus in ['RUN', 'SSUSP']:
                resultDict[jobDict[job]] = 'Running'
              elif lsfStatus in ['PEND', 'PSUSP']:
                resultDict[jobDict[job]] = 'Waiting'

    return S_OK( resultDict )
  def _getJobStatusOnHost( self, jobIDList, host = None ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}    
    jobDict = {}
    for job in jobIDList:
      stamp = os.path.basename( urlparse( job ).path )
      jobDict[stamp] = job
    stampList = jobDict.keys()   

    for jobList in breakListIntoChunks( stampList, 100 ):
      resultCommand = self.__executeHostCommand( 'getJobStatus', { 'JobIDList': jobList }, host = host )
      if not resultCommand['OK']:
        return resultCommand
      
      result = resultCommand['Value']         
      if result['Status'] != 0:
        return S_ERROR( 'Failed to get job status: %s' % result['Message'] )
      
      resultDict.update( result['Jobs'] )
    
    return S_OK( resultDict )
Example #59
0
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    ssh = SSH( self.sshUser, self.sshHost, self.sshPassword )
    
    for jobList in breakListIntoChunks(jobIDList,100):
      jobDict = {}
      for job in jobList:
        jobNumber = job.split('.')[0]
        if jobNumber:
          jobDict[jobNumber] = job
      
      cmd = [ 'qstat', ' '.join( jobList ) ]
      result = ssh.sshCall( 10, cmd )
      if not result['OK']:
        return result
  
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:
          if line.find( job ) != -1:
            if line.find( 'Unknown' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              torqueStatus = line.split()[4]
              if torqueStatus in ['E', 'C']:
                resultDict[jobDict[job]] = 'Done'
              elif torqueStatus in ['R']:
                resultDict[jobDict[job]] = 'Running'
              elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']:
                resultDict[jobDict[job]] = 'Waiting'

    return S_OK( resultDict )
Example #60
0
  def _BroadcastProcessed( self ):
    """ this plug-in only creates tasks for files which have descendents
    """
    transformationStatus = self.params['Status']
    if transformationStatus in ('Flush', ):
      self.util.logInfo( "Flushing transformation, passing all files on" )
      return self._Broadcast()

    inputFiles = self.data
    self.util.logInfo( "Number of input files before selection: %d " % len( inputFiles ) )

    ## query only a maximum of 200 files in one go
    inputFileLists = breakListIntoChunks( inputFiles.keys(), 200 )

    for ifList in inputFileLists:
      resDesc = self.util.fc.getFileDescendents( ifList, depths=1 )
      self.util.logDebug( "Result from getFileDescendents: %s " % resDesc )
      if not resDesc['OK']:
        return resDesc
      descendents = resDesc['Value']

      for lfn in ifList:
        if lfn not in descendents['Successful']:
          self.util.logDebug( "Removed: %s, not in succesful " % lfn )
          inputFiles.pop( lfn, None )
        elif not descendents['Successful'][lfn]:
          self.util.logDebug( "Removed: %s no descendents" % lfn )
          inputFiles.pop( lfn, None )

      if descendents['Failed']:
        self.util.logWarn("Failed getDescendents: %s " % descendents['Failed'])

    self.util.logInfo( "Number of input files after selection: %d " % len( inputFiles ) )

    self.data = inputFiles
    return self._Broadcast()