def requirement(self):
        ssh = ScaleTools.Ssh(host=self.getConfig(self.configCondorServer),
                             username=self.getConfig(self.configCondorUser),
                             key=self.getConfig(self.configCondorKey))

        # Target.Requirements can't be filtered with -constraints since it would require ClassAd based regex matching.
        # TODO: Find a more generic way to match resources/requirements (condor_q -slotads ??)
        # cmd_idle = "condor_q -constraint 'JobStatus == 1' -slotads slotads_bwforcluster " \
        #            "-analyze:summary,reverse | tail -n1 | awk -F ' ' " \
        #            "'{print $3 "\n" $4}'| sort -n | head -n1"
        constraint = "( %s ) && ( %s )" % (self._query_constraints,
                                           self.getConfig(
                                               self.configCondorConstraint))

        cmd = ("condor_q -global -constraint '%s' %s" %
               (constraint, self._query_format_string))
        result = ssh.handleSshCall(call=cmd, quiet=True)
        if result[0] != 0:
            self.logger.warning("Could not get HTCondor queue status! %d: %s" %
                                (result[0], result[2]))
            return None
        elif any(error_string in result[1]
                 for error_string in self._CLI_error_strings):
            self.logger.warning("condor_q request timed out.")
            return None

        queue_line = (entry.split(",", 3)
                      for entry in str(result[1]).splitlines())
        converted_line = ((int(status), int(cores), requirement)
                          for status, cores, requirement in queue_line)
        if self.getConfig(self.configCondorRequirement):
            # TODO: We could use ClassAd bindings, to check requirement(s)
            filtered_line = (
                (status, cores)
                for status, cores, requirement in converted_line
                if self.getConfig(self.configCondorRequirement) in requirement)
        else:
            filtered_line = ((status, cores)
                             for status, cores, requirement in converted_line)

        required_cpus_total = 0
        required_cpus_idle_jobs = 0
        required_cpus_running_jobs = 0
        try:
            for job_status, requested_cpus in filtered_line:
                required_cpus_total += requested_cpus
                if job_status == self.condorStatusIdle:
                    required_cpus_idle_jobs += requested_cpus
                elif job_status == self.condorStatusRunning:
                    required_cpus_running_jobs += requested_cpus
        except ValueError:
            # This error should only occur, if the result was empty AND CondorRequirement is initial
            required_cpus_total = 0
            required_cpus_idle_jobs = 0
            required_cpus_running_jobs = 0

        self.logger.debug(
            "HTCondor queue: Idle: %d; Running: %d." %
            (required_cpus_idle_jobs, required_cpus_running_jobs))

        # cores->machines: machine definition required for RequirementAdapter
        n_cores = -int(
            self.getConfig(
                self.configMachines)[self.getNeededMachineType()]["cores"])
        self._curRequirement = -(required_cpus_total // n_cores)

        with Logging.JsonLog() as json_log:
            json_log.addItem(self.getNeededMachineType(), "jobs_idle",
                             required_cpus_idle_jobs)
            json_log.addItem(self.getNeededMachineType(), "jobs_running",
                             required_cpus_running_jobs)

        return self._curRequirement
Example #2
0
    def requirement(self):
        ssh = ScaleTools.Ssh(host=self.getConfig(self.configSlurmServer),
                             username=self.getConfig(self.configSlurmUser),
                             key=self.getConfig(self.configSlurmKey))

        # Target.Requirements can't be filtered with -constraints since it would require ClassAd based regex matching.
        # TODO: Find a more generic way to match resources/requirements (condor_q -slotads ??)
        # cmd_idle = "condor_q -constraint 'JobStatus == 1' -slotads slotads_bwforcluster " \
        #            "-analyze:summary,reverse | tail -n1 | awk -F ' ' " \
        #            "'{print $3 "\n" $4}'| sort -n | head -n1"
        #constraint = "( %s ) && ( %s )" % (self._query_constraints, self.getConfig(self.configCondorConstraint))

        #cmd = ("condor_q -global -allusers -nobatch -constraint '%s' %s" % (constraint, self._query_format_string))
        #cmd = 'squeue -p nemo_vm_atlsch --noheader --format="%T %r %c"'
        self.logger.info("Checking requirements in partition {}".format(
            self.getConfig(self.configSlurmPartition)))
        cmd = 'squeue -p {} --noheader --format="%T %r %c"'.format(
            self.getConfig(self.configSlurmPartition))
        result = ssh.handleSshCall(call=cmd, quiet=True)
        if result[0] != 0:
            self.logger.warning("Could not get Slurm queue status! %d: %s" %
                                (result[0], result[2]))
            return None
        elif any(error_string in result[1]
                 for error_string in self._CLI_error_strings):
            self.logger.warning("squeue request timed out.")
            return None

        required_cpus_total = 0
        required_cpus_idle_jobs = 0
        required_cpus_running_jobs = 0
        cpus_dependency_jobs = 0

        for line in result[1].splitlines():
            values = line.split()
            #self.logger.debug(values)

            if len(values) != 3:
                continue

            if "Dependency" in values[1]:
                cpus_dependency_jobs = cpus_dependency_jobs + int(values[2])
                continue
            if "PartitionTimeLimit" in values[1]:
                continue
            elif "PENDING" in values[0]:
                required_cpus_total = required_cpus_total + int(values[2])
                required_cpus_idle_jobs = required_cpus_idle_jobs + int(
                    values[2])
                continue
            elif "RUNNING" in values[0]:
                required_cpus_total = required_cpus_total + int(values[2])
                required_cpus_running_jobs = required_cpus_running_jobs + int(
                    values[2])
                continue
            else:
                self.logger.warning("unknown job state: %s. Ignoring.",
                                    values[0])

        self.logger.debug(
            "Slurm queue: Idle: %d; Running: %d. in partition: %s." %
            (required_cpus_idle_jobs, required_cpus_running_jobs,
             self.getConfig(self.configSlurmPartition)))

        # cores->machines: machine definition required for RequirementAdapter
        n_cores = -int(
            self.getConfig(
                self.configMachines)[self.getNeededMachineType()]["cores"])
        self._curRequirement = -(required_cpus_total // n_cores)

        self.logger.debug("Required CPUs total=%s" % required_cpus_total)
        self.logger.debug("Required CPUs idle Jobs=%s" %
                          required_cpus_idle_jobs)
        self.logger.debug("Required CPUs running Jobs=%s" %
                          required_cpus_running_jobs)
        self.logger.debug("CPUs dependency Jobs=%s" % cpus_dependency_jobs)
        with Logging.JsonLog() as json_log:
            json_log.addItem(self.getNeededMachineType(), "jobs_idle",
                             required_cpus_idle_jobs)
            json_log.addItem(self.getNeededMachineType(), "jobs_running",
                             required_cpus_running_jobs)

        return self._curRequirement