예제 #1
0
    def get_njobs_in_queue(self, username=None):
        """
        returns the number of jobs currently in the queu efor the user

        :param username: (str) the username of the jobs to count (default is to autodetect)
        :return: (int) number of jobs in the queue
        """
        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        qstat = Command(self._get_status_cmd(username))
        p = qstat.run(timeout=5)

        # parse the result
        if p[0] == 0:
            njobs = self._parse_njobs(p[1], username)
            queue_logger.info(
                'The number of jobs currently in the queue is: {}'.format(
                    njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = [
            'Error trying to get the number of jobs in the queue',
            'The error response reads: {}'.format(p[2])
        ]
        log_fancy(queue_logger, msgs, 'error')
        return None
예제 #2
0
    def get_njobs_in_queue(self, username=None):
        """
        returns the number of jobs currently in the queu efor the user

        :param username: (str) the username of the jobs to count (default is to autodetect)
        :return: (int) number of jobs in the queue
        """
        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        qstat = Command(self._get_status_cmd(username))
        p = qstat.run(timeout=5)

        # parse the result
        if p[0] == 0:
            njobs = self._parse_njobs(p[1], username)
            queue_logger.info(
                'The number of jobs currently in the queue is: {}'.format(
                    njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = ['Error trying to get the number of jobs in the queue',
                'The error response reads: {}'.format(p[2])]
        log_fancy(queue_logger, msgs, 'error')
        return None
예제 #3
0
    def submit_to_queue(self, script_file):
        """
        submits the job to the queue and returns the job id

        :param script_file: (str) name of the script file to use (String)
        :return: (int) job_id
        """
        if not os.path.exists(script_file):
            raise ValueError(
                'Cannot find script file located at: {}'.format(
                    script_file))

        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))
        submit_cmd = self.q_commands[self.q_type]["submit_cmd"]
        # submit the job
        try:
            if self.q_type == "Cobalt":
                # Cobalt requires scripts to be executable
                os.chmod(script_file,stat.S_IRWXU|stat.S_IRGRP|stat.S_IXGRP)
            cmd = [submit_cmd, script_file]
            #For most of the queues handled by common_adapter, it's best to simply submit the file name
            #as an argument.  LoadSharingFacility doesn't handle the header section (queue name, nodes, etc)
            #when taking file arguments, so the file needs to be passed as stdin to make it work correctly.
            if self.q_type == 'LoadSharingFacility':
                with open(script_file, 'r') as inputFile:
                    p = subprocess.Popen([submit_cmd],stdin=inputFile,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
            else:
                p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            p.wait()

            # retrieve the returncode. PBS returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    job_id = self._parse_jobid(p.stdout.read().decode())
                    queue_logger.info(
                        'Job submission was successful and job_id is {}'.format(
                            job_id))
                    return job_id
                except Exception as ex:
                    # probably error parsing job code
                    log_exception(queue_logger,
                                  'Could not parse job id following {} due to error {}...'
                                  .format(submit_cmd, str(ex)))
            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = [
                    'Error in job submission with {n} file {f} and cmd {c}'.format(
                        n=self.q_name, f=script_file, c=cmd),
                    'The error response reads: {}'.format(p.stderr.read())]
                log_fancy(queue_logger, msgs, 'error')

        except Exception as ex:
            # random error, e.g. no qsub on machine!
            log_exception(queue_logger,
                          'Running the command: {} caused an error...'
                          .format(submit_cmd))
예제 #4
0
    def submit_to_queue(self, script_file):
        """
        submits the job to the queue and returns the job id

        :param script_file: (str) name of the script file to use (String)
        :return: (int) job_id
        """
        if not os.path.exists(script_file):
            raise ValueError(
                'Cannot find script file located at: {}'.format(
                    script_file))

        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))
        submit_cmd = self.q_commands[self.q_type]["submit_cmd"]
        # submit the job
        try:
            if self.q_type == "Cobalt":
                # Cobalt requires scripts to be executable
                os.chmod(script_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP)
            cmd = [submit_cmd, script_file]
            # For most of the queues handled by common_adapter, it's best to simply submit the file name
            # as an argument.  LoadSharingFacility doesn't handle the header section (queue name, nodes, etc)
            # when taking file arguments, so the file needs to be passed as stdin to make it work correctly.
            if self.q_type == 'LoadSharingFacility':
                with open(script_file, 'r') as inputFile:
                    p = subprocess.Popen([submit_cmd], stdin=inputFile, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            else:
                p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            p.wait()

            # retrieve the returncode. PBS returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    job_id = self._parse_jobid(p.stdout.read().decode())
                    queue_logger.info(
                        'Job submission was successful and job_id is {}'.format(
                            job_id))
                    return job_id
                except Exception as ex:
                    # probably error parsing job code
                    log_exception(queue_logger,
                                  'Could not parse job id following {} due to error {}...'
                                  .format(submit_cmd, str(ex)))
            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = [
                    'Error in job submission with {n} file {f} and cmd {c}'.format(
                        n=self.q_name, f=script_file, c=cmd),
                    'The error response reads: {}'.format(p.stderr.read())]
                log_fancy(queue_logger, msgs, 'error')

        except Exception as ex:
            # random error, e.g. no qsub on machine!
            log_exception(queue_logger,
                          'Running the command: {} caused an error...'
                          .format(submit_cmd))
예제 #5
0
    def submit_to_queue(self, script_file):
        """
        submits the job to the queue and returns the job id

        :param script_file: (str) name of the script file to use (String)
        :return: (int) job_id
        """
        if not os.path.exists(script_file):
            raise ValueError(
                'Cannot find script file located at: {}'.format(script_file))

        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))

        # submit the job
        try:
            cmd = [self.submit_cmd, script_file]
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            p.wait()

            # grab the returncode. PBS returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    job_id = self._parse_jobid(p.stdout.read())
                    queue_logger.info(
                        'Job submission was successful and job_id is {}'.
                        format(job_id))
                    return job_id
                except:
                    # probably error parsing job code
                    log_exception(
                        queue_logger,
                        'Could not parse job id following {}...'.format(
                            self.submit_cmd))

            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = [
                    'Error in job submission with {n} file {f} and cmd {c}'.
                    format(n=self.q_name, f=script_file, c=cmd),
                    'The error response reads: {}'.format(p.stderr.read())
                ]
                log_fancy(queue_logger, msgs, 'error')

        except:
            # random error, e.g. no qsub on machine!
            log_exception(
                queue_logger,
                'Running the command: {} caused an error...'.format(
                    self.submit_cmd))
예제 #6
0
    def submit_to_queue(self, script_file):
        """
        submits the job to the queue and returns the job id

        :param script_file: (str) name of the script file to use (String)
        :return: (int) job_id
        """
        if not os.path.exists(script_file):
            raise ValueError(
                'Cannot find script file located at: {}'.format(
                    script_file))

        queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name))

        # submit the job
        try:
            cmd = [self.submit_cmd, script_file]
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            p.wait()

            # grab the returncode. PBS returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    job_id = self._parse_jobid(p.stdout.read())
                    queue_logger.info(
                        'Job submission was successful and job_id is {}'.format(
                            job_id))
                    return job_id
                except:
                    # probably error parsing job code
                    log_exception(queue_logger,
                                  'Could not parse job id following {}...'.format(
                                      self.submit_cmd))

            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = [
                    'Error in job submission with {n} file {f} and cmd {c}'.format(
                        n=self.q_name, f=script_file, c=cmd),
                    'The error response reads: {}'.format(p.stderr.read())]
                log_fancy(queue_logger, msgs, 'error')

        except:
            # random error, e.g. no qsub on machine!
            log_exception(queue_logger,
                          'Running the command: {} caused an error...'.format(
                              self.submit_cmd))
예제 #7
0
    def submit_to_queue(self, queue_params, script_file):
        """
        for documentation, see parent object
        """

        if not os.path.exists(script_file):
            raise ValueError(
                'Cannot find script file located at: {}'.format(script_file))

        # initialize logger
        slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir)

        # submit the job
        try:
            cmd = ['sbatch', script_file]
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            p.wait()

            # grab the returncode. SLURM returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    # output should of the form '2561553.sdb' or '352353.jessup' - just grab the first part for job id
                    job_id = int(p.stdout.read().split()[3])
                    slurm_logger.info(
                        'Job submission was successful and job_id is {}'.
                        format(job_id))
                    return job_id
                except:
                    # probably error parsing job code
                    log_exception(slurm_logger,
                                  'Could not parse job id following slurm...')

            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = [
                    'Error in job submission with SLURM file {f} and cmd {c}'.
                    format(f=script_file, c=cmd)
                ]
                msgs.append('The error response reads: {}'.format(
                    p.stderr.read()))
                log_fancy(slurm_logger, 'error', msgs)

        except:
            # random error, e.g. no qsub on machine!
            log_exception(slurm_logger, 'Running slurm caused an error...')
예제 #8
0
    def get_njobs_in_queue(self, rocket_params, username=None):
        """
        for documentation, see parent object
        """

        # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working
        # tmp_file_name = 'tmp_qstat.xml'
        # cmd = ['qstat', '-x']\n

        # initialize logger
        pbs_logger = get_fw_logger('rocket.pbs', rocket_params.logging_dir)

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        cmd = ['qstat', '-a', '-u', username]
        p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
        p.wait()

        # parse the result
        if p.returncode == 0:
            # lines should have this form
            # '1339044.sdb          username  queuename    2012-02-29-16-43  20460   --   --    --  00:20 C 00:09'
            # count lines that include the username in it

            # TODO: only count running or queued jobs. or rather, *don't* count jobs that are 'C'.
            outs = p.stdout.readlines()
            rx = re.compile(username)
            njobs = len(
                [line.split() for line in outs if rx.search(line) is not None])
            pbs_logger.info(
                'The number of jobs currently in the queue is: {}'.format(
                    njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = [
            'Error trying to get the number of jobs in the queue using qstat service'
        ]
        msgs.append('The error response reads: {}'.format(p.stderr.read()))
        log_fancy(pbs_logger, 'error', msgs)
        return None
예제 #9
0
    def get_njobs_in_queue(self, queue_params, username=None):
        """
        for documentation, see parent object
        """

        # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working
        # tmp_file_name = 'tmp_qstat.xml'
        # cmd = ['qstat', '-x']\n

        # initialize logger
        slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir)

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        cmd = ['squeue', '-o "%u"', '-u', username]
        p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
        p.wait()

        # parse the result
        if p.returncode == 0:
            # lines should have this form
            # username
            # count lines that include the username in it

            outs = p.stdout.readlines()
            rx = re.compile(username)
            njobs = len(
                [line.split() for line in outs if rx.search(line) is not None])
            slurm_logger.info(
                'The number of jobs currently in the queue is: {}'.format(
                    njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = [
            'Error trying to get the number of jobs in the queue using squeue service'
        ]
        msgs.append('The error response reads: {}'.format(p.stderr.read()))
        log_fancy(slurm_logger, 'error', msgs)
        return None
예제 #10
0
    def get_njobs_in_queue(self, rocket_params, username=None):
        """
        for documentation, see parent object
        """

        # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working
        # tmp_file_name = 'tmp_qstat.xml'
        # cmd = ['qstat', '-x']\n

        # initialize logger
        pbs_logger = get_fw_logger('rocket.pbs', rocket_params.logging_dir)

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        cmd = ['qstat', '-a', '-u', username]
        p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
        p.wait()

        # parse the result
        if p.returncode == 0:
            # lines should have this form
            # '1339044.sdb          username  queuename    2012-02-29-16-43  20460   --   --    --  00:20 C 00:09'
            # count lines that include the username in it

            # TODO: only count running or queued jobs. or rather, *don't* count jobs that are 'C'.
            outs = p.stdout.readlines()
            rx = re.compile(username)
            njobs = len([line.split() for line in outs if rx.search(line) is not None])
            pbs_logger.info('The number of jobs currently in the queue is: {}'.format(njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = ['Error trying to get the number of jobs in the queue using qstat service']
        msgs.append('The error response reads: {}'.format(p.stderr.read()))
        log_fancy(pbs_logger, 'error', msgs)
        return None
예제 #11
0
    def get_njobs_in_queue(self, queue_params, username=None):
        """
        for documentation, see parent object
        """

        # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working
        # tmp_file_name = 'tmp_qstat.xml'
        # cmd = ['qstat', '-x']\n

        # initialize logger
        slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir)

        # initialize username
        if username is None:
            username = getpass.getuser()

        # run qstat
        cmd = ['squeue', '-o "%u"', '-u', username]
        p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
        p.wait()

        # parse the result
        if p.returncode == 0:
            # lines should have this form
            # username
            # count lines that include the username in it

            outs = p.stdout.readlines()
            rx = re.compile(username)
            njobs = len([line.split() for line in outs if rx.search(line) is not None])
            slurm_logger.info('The number of jobs currently in the queue is: {}'.format(njobs))
            return njobs

        # there's a problem talking to qstat server?
        msgs = ['Error trying to get the number of jobs in the queue using squeue service']
        msgs.append('The error response reads: {}'.format(p.stderr.read()))
        log_fancy(slurm_logger, 'error', msgs)
        return None
예제 #12
0
    def submit_to_queue(self, queue_params, script_file):
        """
        for documentation, see parent object
        """

        if not os.path.exists(script_file):
            raise ValueError('Cannot find script file located at: {}'.format(script_file))

        # initialize logger
        slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir)

        # submit the job
        try:
            cmd = ['sbatch', script_file]
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            p.wait()

            # grab the returncode. SLURM returns 0 if the job was successful
            if p.returncode == 0:
                try:
                    # output should of the form '2561553.sdb' or '352353.jessup' - just grab the first part for job id
                    job_id = int(p.stdout.read().split()[3])
                    slurm_logger.info('Job submission was successful and job_id is {}'.format(job_id))
                    return job_id
                except:
                    # probably error parsing job code
                    log_exception(slurm_logger, 'Could not parse job id following slurm...')

            else:
                # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc...
                msgs = ['Error in job submission with SLURM file {f} and cmd {c}'.format(f=script_file, c=cmd)]
                msgs.append('The error response reads: {}'.format(p.stderr.read()))
                log_fancy(slurm_logger, 'error', msgs)

        except:
            # random error, e.g. no qsub on machine!
            log_exception(slurm_logger, 'Running slurm caused an error...')