Esempio n. 1
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_id = job.get_job_runner_external_id()
     log.debug(f"k8s trying to recover job: {job_id}")
     if job_id is None:
         self.put(job_wrapper)
         return
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                job_wrapper=job_wrapper)
     ajs.job_id = str(job_id)
     ajs.command_line = job.command_line
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state in (model.Job.states.RUNNING, model.Job.states.STOPPED):
         log.debug(
             "({}/{}) is still in {} state, adding to the runner monitor queue"
             .format(job.id, job.job_runner_external_id, job.state))
         ajs.old_state = model.Job.states.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.state == model.Job.states.QUEUED:
         log.debug(
             "({}/{}) is still in queued state, adding to the runner monitor queue"
             .format(job.id, job.job_runner_external_id))
         ajs.old_state = model.Job.states.QUEUED
         ajs.running = False
         self.monitor_queue.put(ajs)
Esempio n. 2
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_id = job.get_job_runner_external_id()
     if job_id is None:
         self.put(job_wrapper)
         return
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                job_wrapper=job_wrapper)
     ajs.job_id = str(job_id)
     ajs.command_line = job.get_command_line()
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         log.debug(
             "(%s/%s) is still in running state, adding to the DRM queue" %
             (job.get_id(), job.get_job_runner_external_id()))
         ajs.old_state = drmaa.JobState.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.get_state() == model.Job.states.QUEUED:
         log.debug(
             "(%s/%s) is still in DRM queued state, adding to the DRM queue"
             % (job.get_id(), job.get_job_runner_external_id()))
         ajs.old_state = drmaa.JobState.QUEUED_ACTIVE
         ajs.running = False
         self.monitor_queue.put(ajs)
Esempio n. 3
0
    def recover(self, job, job_wrapper):
        """ Recovers jobs stuck in the queued/running state when Galaxy started """
        """ This method is called by galaxy at the time of startup.
            Jobs in Running & Queued status in galaxy are put in the monitor_queue by creating an AsynchronousJobState object
        """
        job_id = job_wrapper.job_id
        ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                   job_wrapper=job_wrapper)
        ajs.job_id = str(job_id)
        ajs.job_destination = job_wrapper.job_destination
        job_wrapper.command_line = job.command_line
        ajs.job_wrapper = job_wrapper
        if job.state in (model.Job.states.RUNNING, model.Job.states.STOPPED):
            log.debug(
                f"({job.id}/{job.get_job_runner_external_id()}) is still in {job.state} state, adding to the god queue"
            )
            ajs.old_state = 'R'
            ajs.running = True
            self.monitor_queue.put(ajs)

        elif job.state == model.Job.states.QUEUED:
            log.debug(
                f"({job.id}/{job.get_job_runner_external_id()}) is still in god queued state, adding to the god queue"
            )
            ajs.old_state = 'Q'
            ajs.running = False
            self.monitor_queue.put(ajs)
Esempio n. 4
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_id = job.get_job_runner_external_id()
     pbs_job_state = AsynchronousJobState()
     pbs_job_state.output_file = "%s/%s.o" % (self.app.config.cluster_files_directory, job.id)
     pbs_job_state.error_file = "%s/%s.e" % (self.app.config.cluster_files_directory, job.id)
     pbs_job_state.exit_code_file = "%s/%s.ec" % (self.app.config.cluster_files_directory, job.id)
     pbs_job_state.job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job.id)
     pbs_job_state.job_id = str(job_id)
     pbs_job_state.runner_url = job_wrapper.get_job_runner_url()
     pbs_job_state.job_destination = job_wrapper.job_destination
     job_wrapper.command_line = job.command_line
     pbs_job_state.job_wrapper = job_wrapper
     if job.state == model.Job.states.RUNNING:
         log.debug(
             "(%s/%s) is still in running state, adding to the PBS queue"
             % (job.id, job.get_job_runner_external_id())
         )
         pbs_job_state.old_state = "R"
         pbs_job_state.running = True
         self.monitor_queue.put(pbs_job_state)
     elif job.state == model.Job.states.QUEUED:
         log.debug(
             "(%s/%s) is still in PBS queued state, adding to the PBS queue"
             % (job.id, job.get_job_runner_external_id())
         )
         pbs_job_state.old_state = "Q"
         pbs_job_state.running = False
         self.monitor_queue.put(pbs_job_state)
Esempio n. 5
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     # TODO this needs to be implemented to override unimplemented base method
     job_id = job.get_job_runner_external_id()
     log.debug("k8s trying to recover job: " + job_id)
     if job_id is None:
         self.put(job_wrapper)
         return
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory, job_wrapper=job_wrapper)
     ajs.job_id = str(job_id)
     ajs.command_line = job.command_line
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         log.debug("(%s/%s) is still in running state, adding to the runner monitor queue" % (
             job.id, job.job_runner_external_id))
         ajs.old_state = model.Job.states.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.state == model.Job.states.QUEUED:
         log.debug("(%s/%s) is still in queued state, adding to the runner monitor queue" % (
             job.id, job.job_runner_external_id))
         ajs.old_state = model.Job.states.QUEUED
         ajs.running = False
         self.monitor_queue.put(ajs)
Esempio n. 6
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     # TODO this needs to be implemented to override unimplemented base method
     job_id = job.get_job_runner_external_id()
     log.debug("k8s trying to recover job: " + job_id)
     if job_id is None:
         self.put(job_wrapper)
         return
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                job_wrapper=job_wrapper)
     ajs.job_id = str(job_id)
     ajs.command_line = job.command_line
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         log.debug(
             "(%s/%s) is still in running state, adding to the runner monitor queue"
             % (job.id, job.job_runner_external_id))
         ajs.old_state = model.Job.states.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.state == model.Job.states.QUEUED:
         log.debug(
             "(%s/%s) is still in queued state, adding to the runner monitor queue"
             % (job.id, job.job_runner_external_id))
         ajs.old_state = model.Job.states.QUEUED
         ajs.running = False
         self.monitor_queue.put(ajs)
 def recover(self,job,job_wrapper):
     # Recovers jobs in the queued/running state when Galaxy started
     # What is 'job' an instance of???
     # Could be model.Job?
     # Fetch the job id used by JSE-Drop
     job_name = job.get_job_runner_external_id()
     # Get the job destination
     job_destination = job_wrapper.job_destination
     # Fetch the drop dir
     drop_off_dir = self._get_drop_dir()
     log.debug("recover: drop-off dir = %s" % drop_off_dir)
     jse_drop = JSEDrop(drop_off_dir)
     # Store state information for job
     job_state = AsynchronousJobState()
     job_state.job_wrapper = job_wrapper
     job_state.job_id = job_name
     job_state.job_destination = job_destination
     # Sort out the status
     if job.state == model.Job.states.RUNNING:
         job_state.old_state = True
         job_state.running = True
     elif job.get_state() == model.Job.states.QUEUED:
         job_state.old_state = True
         job_state.running = False
     # Add to the queue of jobs to monitor
     self.monitor_queue.put(job_state)
Esempio n. 8
0
 def recover(self, job, job_wrapper):
     msg = ('(name!r/runner!r) is still in {state!s} state, adding to'
            ' the runner monitor queue')
     job_id = job.get_job_runner_external_id()
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                job_wrapper=job_wrapper)
     ajs.job_id = self.JOB_NAME_PREFIX + str(job_id)
     ajs.command_line = job.command_line
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         LOGGER.debug(
             msg.format(name=job.id,
                        runner=job.job_runner_external_id,
                        state='running'))
         ajs.old_state = model.Job.states.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.state == model.Job.states.QUEUED:
         LOGGER.debug(
             msg.format(name=job.id,
                        runner=job.job_runner_external_id,
                        state='queued'))
         ajs.old_state = model.Job.states.QUEUED
         ajs.running = False
         self.monitor_queue.put(ajs)
Esempio n. 9
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_id = job.get_job_runner_external_id()
     pbs_job_state = AsynchronousJobState()
     pbs_job_state.output_file = "%s/%s.o" % (
         self.app.config.cluster_files_directory, job.id)
     pbs_job_state.error_file = "%s/%s.e" % (
         self.app.config.cluster_files_directory, job.id)
     pbs_job_state.exit_code_file = "%s/%s.ec" % (
         self.app.config.cluster_files_directory, job.id)
     pbs_job_state.job_file = "%s/%s.sh" % (
         self.app.config.cluster_files_directory, job.id)
     pbs_job_state.job_id = str(job_id)
     pbs_job_state.runner_url = job_wrapper.get_job_runner_url()
     pbs_job_state.job_destination = job_wrapper.job_destination
     job_wrapper.command_line = job.command_line
     pbs_job_state.job_wrapper = job_wrapper
     if job.state == model.Job.states.RUNNING:
         log.debug(
             "(%s/%s) is still in running state, adding to the PBS queue" %
             (job.id, job.get_job_runner_external_id()))
         pbs_job_state.old_state = 'R'
         pbs_job_state.running = True
         self.monitor_queue.put(pbs_job_state)
     elif job.state == model.Job.states.QUEUED:
         log.debug(
             "(%s/%s) is still in PBS queued state, adding to the PBS queue"
             % (job.id, job.get_job_runner_external_id()))
         pbs_job_state.old_state = 'Q'
         pbs_job_state.running = False
         self.monitor_queue.put(pbs_job_state)
Esempio n. 10
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination
        self._populate_parameter_defaults(job_destination)

        command_line, client, remote_job_config, compute_environment = self.__prepare_job(
            job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = PulsarJobRunner.__dependencies_description(
                client, job_wrapper)
            rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client)
            unstructured_path_rewrites = {}
            output_names = []
            if compute_environment:
                unstructured_path_rewrites = compute_environment.unstructured_path_rewrites
                output_names = compute_environment.output_names()

            if self.app.config.metadata_strategy == "legacy":
                # Drop this branch in 19.09.
                metadata_directory = job_wrapper.working_directory
            else:
                metadata_directory = os.path.join(
                    job_wrapper.working_directory, "metadata")

            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=self.get_input_files(job_wrapper),
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.tool_working_directory,
                metadata_directory=metadata_directory,
                tool=job_wrapper.tool,
                config_files=job_wrapper.extra_filenames,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=unstructured_path_rewrites,
                touch_outputs=output_names,
            )
            job_id = pulsar_submit_job(client, client_job_description,
                                       remote_job_config)
            log.info("Pulsar job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d", job_wrapper.job_id)
            return

        pulsar_job_state = AsynchronousJobState()
        pulsar_job_state.job_wrapper = job_wrapper
        pulsar_job_state.job_id = job_id
        pulsar_job_state.old_state = True
        pulsar_job_state.running = False
        pulsar_job_state.job_destination = job_destination
        self.monitor_job(pulsar_job_state)
Esempio n. 11
0
 def _job_state(self, job, job_wrapper):
     job_state = AsynchronousJobState()
     # TODO: Determine why this is set when using normal message queue updates
     # but not CLI submitted MQ updates...
     raw_job_id = job.get_job_runner_external_id() or job_wrapper.job_id
     job_state.job_id = str(raw_job_id)
     job_state.runner_url = job_wrapper.get_job_runner_url()
     job_state.job_destination = job_wrapper.job_destination
     job_state.job_wrapper = job_wrapper
     return job_state
Esempio n. 12
0
 def _job_state( self, job, job_wrapper ):
     job_state = AsynchronousJobState()
     # TODO: Determine why this is set when using normal message queue updates
     # but not CLI submitted MQ updates...
     raw_job_id = job.get_job_runner_external_id() or job_wrapper.job_id
     job_state.job_id = str( raw_job_id )
     job_state.runner_url = job_wrapper.get_job_runner_url()
     job_state.job_destination = job_wrapper.job_destination
     job_state.job_wrapper = job_wrapper
     return job_state
Esempio n. 13
0
    def queue_job(self, job_wrapper):
        command_line = ''
        job_destination = job_wrapper.job_destination

        try:
            job_wrapper.prepare()
            if hasattr(job_wrapper, 'prepare_input_files_cmds'
                       ) and job_wrapper.prepare_input_files_cmds is not None:
                for cmd in job_wrapper.prepare_input_files_cmds:  # run the commands to stage the input files
                    #log.debug( 'executing: %s' % cmd )
                    if 0 != os.system(cmd):
                        raise Exception(
                            'Error running file staging command: %s' % cmd)
                job_wrapper.prepare_input_files_cmds = None  # prevent them from being used in-line
            command_line = self.build_command_line(
                job_wrapper,
                include_metadata=False,
                include_work_dir_outputs=False)
        except:
            job_wrapper.fail("failure preparing job", exception=True)
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        # If we were able to get a command line, run the job
        if not command_line:
            job_wrapper.finish('', '')
            return

        try:
            client = self.get_client_from_wrapper(job_wrapper)
            output_files = self.get_output_files(job_wrapper)
            input_files = job_wrapper.get_input_fnames()
            working_directory = job_wrapper.working_directory
            tool = job_wrapper.tool
            file_stager = FileStager(client, tool, command_line,
                                     job_wrapper.extra_filenames, input_files,
                                     output_files, working_directory)
            rebuilt_command_line = file_stager.get_rewritten_command_line()
            job_id = file_stager.job_id
            client.launch(rebuilt_command_line)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        lwr_job_state = AsynchronousJobState()
        lwr_job_state.job_wrapper = job_wrapper
        lwr_job_state.job_id = job_id
        lwr_job_state.old_state = True
        lwr_job_state.running = False
        lwr_job_state.job_destination = job_destination
        self.monitor_job(lwr_job_state)
Esempio n. 14
0
 def recover( self, job, job_wrapper ):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_state = AsynchronousJobState()
     job_state.job_id = str( job.get_job_runner_external_id() )
     job_state.runner_url = job_wrapper.get_job_runner_url()
     job_state.job_destination = job_wrapper.job_destination
     job_wrapper.command_line = job.get_command_line()
     job_state.job_wrapper = job_wrapper
     state = job.get_state()
     if state in [model.Job.states.RUNNING, model.Job.states.QUEUED]:
         log.debug( "(LWR/%s) is still in running state, adding to the LWR queue" % ( job.get_id()) )
         job_state.old_state = True
         job_state.running = state == model.Job.states.RUNNING
         self.monitor_queue.put( job_state )
Esempio n. 15
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination
        self._populate_parameter_defaults(job_destination)

        command_line, client, remote_job_config, compute_environment = self.__prepare_job(job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = PulsarJobRunner.__dependencies_description(client, job_wrapper)
            rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client)
            unstructured_path_rewrites = {}
            output_names = []
            if compute_environment:
                unstructured_path_rewrites = compute_environment.unstructured_path_rewrites
                output_names = compute_environment.output_names()

            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=self.get_input_files(job_wrapper),
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.tool_working_directory,
                metadata_directory=job_wrapper.working_directory,
                tool=job_wrapper.tool,
                config_files=job_wrapper.extra_filenames,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=unstructured_path_rewrites,
                touch_outputs=output_names,
            )
            job_id = pulsar_submit_job(client, client_job_description, remote_job_config)
            log.info("Pulsar job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d", job_wrapper.job_id)
            return

        pulsar_job_state = AsynchronousJobState()
        pulsar_job_state.job_wrapper = job_wrapper
        pulsar_job_state.job_id = job_id
        pulsar_job_state.old_state = True
        pulsar_job_state.running = False
        pulsar_job_state.job_destination = job_destination
        self.monitor_job(pulsar_job_state)
Esempio n. 16
0
 def recover(self, job, job_wrapper):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_state = AsynchronousJobState()
     job_state.job_id = str(job.get_job_runner_external_id())
     job_state.runner_url = job_wrapper.get_job_runner_url()
     job_state.job_destination = job_wrapper.job_destination
     job_wrapper.command_line = job.get_command_line()
     job_state.job_wrapper = job_wrapper
     state = job.get_state()
     if state in [model.Job.states.RUNNING, model.Job.states.QUEUED]:
         log.debug(
             "(LWR/%s) is still in running state, adding to the LWR queue" %
             (job.get_id()))
         job_state.old_state = True
         job_state.running = state == model.Job.states.RUNNING
         self.monitor_queue.put(job_state)
Esempio n. 17
0
    def queue_job(self, job_wrapper):
        command_line = ''
        job_destination = job_wrapper.job_destination

        try:
            job_wrapper.prepare()
            if hasattr(job_wrapper, 'prepare_input_files_cmds') and job_wrapper.prepare_input_files_cmds is not None:
                for cmd in job_wrapper.prepare_input_files_cmds:  # run the commands to stage the input files
                    #log.debug( 'executing: %s' % cmd )
                    if 0 != os.system(cmd):
                        raise Exception('Error running file staging command: %s' % cmd)
                job_wrapper.prepare_input_files_cmds = None  # prevent them from being used in-line
            command_line = self.build_command_line( job_wrapper, include_metadata=False, include_work_dir_outputs=False )
        except:
            job_wrapper.fail( "failure preparing job", exception=True )
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        # If we were able to get a command line, run the job
        if not command_line:
            job_wrapper.finish( '', '' )
            return

        try:
            client = self.get_client_from_wrapper(job_wrapper)
            output_files = self.get_output_files(job_wrapper)
            input_files = job_wrapper.get_input_fnames()
            working_directory = job_wrapper.working_directory
            tool = job_wrapper.tool
            file_stager = FileStager(client, tool, command_line, job_wrapper.extra_filenames, input_files, output_files, working_directory)
            rebuilt_command_line = file_stager.get_rewritten_command_line()
            job_id = file_stager.job_id
            client.launch( rebuilt_command_line )
            job_wrapper.set_job_destination( job_destination, job_id )
            job_wrapper.change_state( model.Job.states.QUEUED )
        except:
            job_wrapper.fail( "failure running job", exception=True )
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        lwr_job_state = AsynchronousJobState()
        lwr_job_state.job_wrapper = job_wrapper
        lwr_job_state.job_id = job_id
        lwr_job_state.old_state = True
        lwr_job_state.running = False
        lwr_job_state.job_destination = job_destination
        self.monitor_job(lwr_job_state)
Esempio n. 18
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination

        command_line, client, remote_job_config, compute_environment = self.__prepare_job(
            job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = LwrJobRunner.__dependencies_description(
                client, job_wrapper)
            rewrite_paths = not LwrJobRunner.__rewrite_parameters(client)
            unstructured_path_rewrites = {}
            if compute_environment:
                unstructured_path_rewrites = compute_environment.unstructured_path_rewrites

            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=self.get_input_files(job_wrapper),
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.working_directory,
                tool=job_wrapper.tool,
                config_files=job_wrapper.extra_filenames,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=unstructured_path_rewrites,
            )
            job_id = lwr_submit_job(client, client_job_description,
                                    remote_job_config)
            log.info("lwr job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        lwr_job_state = AsynchronousJobState()
        lwr_job_state.job_wrapper = job_wrapper
        lwr_job_state.job_id = job_id
        lwr_job_state.old_state = True
        lwr_job_state.running = False
        lwr_job_state.job_destination = job_destination
        self.monitor_job(lwr_job_state)
Esempio n. 19
0
 def recover( self, job, job_wrapper ):
     """Recovers jobs stuck in the queued/running state when Galaxy started"""
     job_id = job.get_job_runner_external_id()
     if job_id is None:
         self.put( job_wrapper )
         return
     ajs = AsynchronousJobState( files_dir=job_wrapper.working_directory, job_wrapper=job_wrapper )
     ajs.job_id = str( job_id )
     ajs.command_line = job.get_command_line()
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         log.debug( "(%s/%s) is still in running state, adding to the DRM queue" % ( job.get_id(), job.get_job_runner_external_id() ) )
         ajs.old_state = drmaa.JobState.RUNNING
         ajs.running = True
         self.monitor_queue.put( ajs )
     elif job.get_state() == model.Job.states.QUEUED:
         log.debug( "(%s/%s) is still in DRM queued state, adding to the DRM queue" % ( job.get_id(), job.get_job_runner_external_id() ) )
         ajs.old_state = drmaa.JobState.QUEUED_ACTIVE
         ajs.running = False
         self.monitor_queue.put( ajs )
Esempio n. 20
0
    def recover(self, job, job_wrapper):
        """ Recovers jobs stuck in the queued/running state when Galaxy started """
        """ This method is called by galaxy at the time of startup.
            Jobs in Running & Queued status in galaxy are put in the monitor_queue by creating an AsynchronousJobState object
        """
        job_id = job_wrapper.job_id
        ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory, job_wrapper=job_wrapper)
        ajs.job_id = str(job_id)
        ajs.job_destination = job_wrapper.job_destination
        job_wrapper.command_line = job.command_line
        ajs.job_wrapper = job_wrapper
        if job.state == model.Job.states.RUNNING:
            log.debug("(%s/%s) is still in running state, adding to the god queue" % (job.id, job.get_job_runner_external_id()))
            ajs.old_state = 'R'
            ajs.running = True
            self.monitor_queue.put(ajs)

        elif job.state == model.Job.states.QUEUED:
            log.debug("(%s/%s) is still in god queued state, adding to the god queue" % (job.id, job.get_job_runner_external_id()))
            ajs.old_state = 'Q'
            ajs.running = False
            self.monitor_queue.put(ajs)
Esempio n. 21
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination

        command_line, client, remote_job_config = self.__prepare_job( job_wrapper, job_destination )

        if not command_line:
            return

        try:
            dependency_resolution = LwrJobRunner.__dependency_resolution( client )
            remote_dependency_resolution = dependency_resolution == "remote"
            requirements = job_wrapper.tool.requirements if remote_dependency_resolution else []
            client_job_description = ClientJobDescription(
                command_line=command_line,
                output_files=self.get_output_files(job_wrapper),
                input_files=job_wrapper.get_input_fnames(),
                working_directory=job_wrapper.working_directory,
                tool=job_wrapper.tool,
                config_files=job_wrapper.extra_filenames,
                requirements=requirements,
                version_file=job_wrapper.get_version_string_path(),
            )
            job_id = lwr_submit_job(client, client_job_description, remote_job_config)
            log.info("lwr job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination( job_destination, job_id )
            job_wrapper.change_state( model.Job.states.QUEUED )
        except Exception:
            job_wrapper.fail( "failure running job", exception=True )
            log.exception("failure running job %d" % job_wrapper.job_id)
            return

        lwr_job_state = AsynchronousJobState()
        lwr_job_state.job_wrapper = job_wrapper
        lwr_job_state.job_id = job_id
        lwr_job_state.old_state = True
        lwr_job_state.running = False
        lwr_job_state.job_destination = job_destination
        self.monitor_job(lwr_job_state)
Esempio n. 22
0
 def recover(self, job, job_wrapper):
     msg = ('(name!r/runner!r) is still in {state!s} state, adding to'
            ' the runner monitor queue')
     job_id = job.get_job_runner_external_id()
     ajs = AsynchronousJobState(files_dir=job_wrapper.working_directory,
                                job_wrapper=job_wrapper)
     ajs.job_id = self.JOB_NAME_PREFIX + str(job_id)
     ajs.command_line = job.command_line
     ajs.job_wrapper = job_wrapper
     ajs.job_destination = job_wrapper.job_destination
     if job.state == model.Job.states.RUNNING:
         LOGGER.debug(msg.format(
             name=job.id, runner=job.job_runner_external_id,
             state='running'))
         ajs.old_state = model.Job.states.RUNNING
         ajs.running = True
         self.monitor_queue.put(ajs)
     elif job.state == model.Job.states.QUEUED:
         LOGGER.debug(msg.format(
             name=job.id, runner=job.job_runner_external_id,
             state='queued'))
         ajs.old_state = model.Job.states.QUEUED
         ajs.running = False
         self.monitor_queue.put(ajs)
Esempio n. 23
0
    def queue_job(self, job_wrapper):
        """Create PBS script for a job and submit it to the PBS queue"""
        # prepare the job
        if not self.prepare_job(
                job_wrapper,
                include_metadata=not (self.app.config.pbs_stage_path)):
            return

        job_destination = job_wrapper.job_destination

        # Determine the job's PBS destination (server/queue) and options from the job destination definition
        pbs_queue_name = None
        pbs_server_name = self.default_pbs_server
        pbs_options = []
        if '-q' in job_destination.params and 'destination' not in job_destination.params:
            job_destination.params['destination'] = job_destination.params.pop(
                '-q')
        if 'destination' in job_destination.params:
            if '@' in job_destination.params['destination']:
                # Destination includes a server
                pbs_queue_name, pbs_server_name = job_destination.params[
                    'destination'].split('@')
                if pbs_queue_name == '':
                    # e.g. `qsub -q @server`
                    pbs_queue_name = None
            else:
                # Destination is just a queue
                pbs_queue_name = job_destination.params['destination']
            job_destination.params.pop('destination')

        # Parse PBS params
        pbs_options = self.parse_destination_params(job_destination.params)

        # Explicitly set the determined PBS destination in the persisted job destination for recovery
        job_destination.params['destination'] = '%s@%s' % (pbs_queue_name or
                                                           '', pbs_server_name)

        c = pbs.pbs_connect(util.smart_str(pbs_server_name))
        if c <= 0:
            errno, text = pbs.error()
            job_wrapper.fail(
                "Unable to queue job for execution.  Resubmitting the job may succeed."
            )
            log.error("Connection to PBS server for submit failed: %s: %s" %
                      (errno, text))
            return

        # define job attributes
        ofile = "%s/%s.o" % (self.app.config.cluster_files_directory,
                             job_wrapper.job_id)
        efile = "%s/%s.e" % (self.app.config.cluster_files_directory,
                             job_wrapper.job_id)
        ecfile = "%s/%s.ec" % (self.app.config.cluster_files_directory,
                               job_wrapper.job_id)

        output_fnames = job_wrapper.get_output_fnames()

        # If an application server is set, we're staging
        if self.app.config.pbs_application_server:
            pbs_ofile = self.app.config.pbs_application_server + ':' + ofile
            pbs_efile = self.app.config.pbs_application_server + ':' + efile
            output_files = [str(o) for o in output_fnames]
            output_files.append(ecfile)
            stagein = self.get_stage_in_out(job_wrapper.get_input_fnames() +
                                            output_files,
                                            symlink=True)
            stageout = self.get_stage_in_out(output_files)
            attrs = [
                dict(name=pbs.ATTR_o, value=pbs_ofile),
                dict(name=pbs.ATTR_e, value=pbs_efile),
                dict(name=pbs.ATTR_stagein, value=stagein),
                dict(name=pbs.ATTR_stageout, value=stageout),
            ]
        # If not, we're using NFS
        else:
            attrs = [
                dict(name=pbs.ATTR_o, value=ofile),
                dict(name=pbs.ATTR_e, value=efile),
            ]

        # define PBS job options
        attrs.append(
            dict(name=pbs.ATTR_N,
                 value=str("%s_%s_%s" %
                           (job_wrapper.job_id, job_wrapper.tool.id,
                            job_wrapper.user))))
        job_attrs = pbs.new_attropl(len(attrs) + len(pbs_options))
        for i, attr in enumerate(attrs + pbs_options):
            job_attrs[i].name = attr['name']
            job_attrs[i].value = attr['value']
            if 'resource' in attr:
                job_attrs[i].resource = attr['resource']
        exec_dir = os.path.abspath(job_wrapper.working_directory)

        # write the job script
        if self.app.config.pbs_stage_path != '':
            # touch the ecfile so that it gets staged
            with file(ecfile, 'a'):
                os.utime(ecfile, None)

            stage_commands = pbs_symlink_template % (
                " ".join(job_wrapper.get_input_fnames() + output_files),
                self.app.config.pbs_stage_path,
                exec_dir,
            )
        else:
            stage_commands = ''

        env_setup_commands = [stage_commands]
        script = self.get_job_file(job_wrapper,
                                   exit_code_path=ecfile,
                                   env_setup_commands=env_setup_commands)
        job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory,
                                 job_wrapper.job_id)
        self.write_executable_script(job_file, script)
        # job was deleted while we were preparing it
        if job_wrapper.get_state() == model.Job.states.DELETED:
            log.debug(
                "Job %s deleted by user before it entered the PBS queue" %
                job_wrapper.job_id)
            pbs.pbs_disconnect(c)
            if self.app.config.cleanup_job in ("always", "onsuccess"):
                self.cleanup((ofile, efile, ecfile, job_file))
                job_wrapper.cleanup()
            return

        # submit
        # The job tag includes the job and the task identifier
        # (if a TaskWrapper was passed in):
        galaxy_job_id = job_wrapper.get_id_tag()
        log.debug("(%s) submitting file %s" % (galaxy_job_id, job_file))

        tries = 0
        while tries < 5:
            job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name,
                                    None)
            tries += 1
            if job_id:
                pbs.pbs_disconnect(c)
                break
            errno, text = pbs.error()
            log.warning("(%s) pbs_submit failed (try %d/5), PBS error %d: %s" %
                        (galaxy_job_id, tries, errno, text))
            time.sleep(2)
        else:
            log.error("(%s) All attempts to submit job failed" % galaxy_job_id)
            job_wrapper.fail(
                "Unable to run this job due to a cluster error, please retry it later"
            )
            return

        if pbs_queue_name is None:
            log.debug("(%s) queued in default queue as %s" %
                      (galaxy_job_id, job_id))
        else:
            log.debug("(%s) queued in %s queue as %s" %
                      (galaxy_job_id, pbs_queue_name, job_id))

        # persist destination
        job_wrapper.set_job_destination(job_destination, job_id)

        # Store PBS related state information for job
        job_state = AsynchronousJobState()
        job_state.job_wrapper = job_wrapper
        job_state.job_id = job_id
        job_state.job_file = job_file
        job_state.output_file = ofile
        job_state.error_file = efile
        job_state.exit_code_file = ecfile
        job_state.old_state = 'N'
        job_state.running = False
        job_state.job_destination = job_destination

        # Add to our 'queue' of jobs to monitor
        self.monitor_queue.put(job_state)
Esempio n. 24
0
    def queue_job( self, job_wrapper ):
        """Create PBS script for a job and submit it to the PBS queue"""
        # prepare the job
        if not self.prepare_job( job_wrapper, include_metadata=not( self.app.config.pbs_stage_path ) ):
            return

        job_destination = job_wrapper.job_destination

        # Determine the job's PBS destination (server/queue) and options from the job destination definition
        pbs_queue_name = None
        pbs_server_name = self.default_pbs_server
        pbs_options = []
        if '-q' in job_destination.params and 'destination' not in job_destination.params:
            job_destination.params['destination'] = job_destination.params.pop('-q')
        if 'destination' in job_destination.params:
            if '@' in job_destination.params['destination']:
                # Destination includes a server
                pbs_queue_name, pbs_server_name = job_destination.params['destination'].split('@')
                if pbs_queue_name == '':
                    # e.g. `qsub -q @server`
                    pbs_queue_name = None
            else:
                # Destination is just a queue
                pbs_queue_name = job_destination.params['destination']
            job_destination.params.pop('destination')

        # Parse PBS params
        pbs_options = self.parse_destination_params(job_destination.params)

        # Explicitly set the determined PBS destination in the persisted job destination for recovery
        job_destination.params['destination'] = '%s@%s' % (pbs_queue_name or '', pbs_server_name)

        c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
        if c <= 0:
            errno, text = pbs.error()
            job_wrapper.fail( "Unable to queue job for execution.  Resubmitting the job may succeed." )
            log.error( "Connection to PBS server for submit failed: %s: %s" % ( errno, text ) )
            return

        # define job attributes
        ofile = "%s/%s.o" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        efile = "%s/%s.e" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        ecfile = "%s/%s.ec" % (self.app.config.cluster_files_directory, job_wrapper.job_id)

        output_fnames = job_wrapper.get_output_fnames()

        # If an application server is set, we're staging
        if self.app.config.pbs_application_server:
            pbs_ofile = self.app.config.pbs_application_server + ':' + ofile
            pbs_efile = self.app.config.pbs_application_server + ':' + efile
            output_files = [ str( o ) for o in output_fnames ]
            output_files.append(ecfile)
            stagein = self.get_stage_in_out( job_wrapper.get_input_fnames() + output_files, symlink=True )
            stageout = self.get_stage_in_out( output_files )
            attrs = [
                dict( name=pbs.ATTR_o, value=pbs_ofile ),
                dict( name=pbs.ATTR_e, value=pbs_efile ),
                dict( name=pbs.ATTR_stagein, value=stagein ),
                dict( name=pbs.ATTR_stageout, value=stageout ),
            ]
        # If not, we're using NFS
        else:
            attrs = [
                dict( name=pbs.ATTR_o, value=ofile ),
                dict( name=pbs.ATTR_e, value=efile ),
            ]

        # define PBS job options
        attrs.append( dict( name=pbs.ATTR_N, value=str( "%s_%s_%s" % ( job_wrapper.job_id, job_wrapper.tool.id, job_wrapper.user ) ) ) )
        job_attrs = pbs.new_attropl( len( attrs ) + len( pbs_options ) )
        for i, attr in enumerate( attrs + pbs_options ):
            job_attrs[i].name = attr['name']
            job_attrs[i].value = attr['value']
            if 'resource' in attr:
                job_attrs[i].resource = attr['resource']
        exec_dir = os.path.abspath( job_wrapper.working_directory )

        # write the job script
        if self.app.config.pbs_stage_path != '':
            # touch the ecfile so that it gets staged
            with open(ecfile, 'a'):
                os.utime(ecfile, None)

            stage_commands = pbs_symlink_template % (
                " ".join( job_wrapper.get_input_fnames() + output_files ),
                self.app.config.pbs_stage_path,
                exec_dir,
            )
        else:
            stage_commands = ''

        env_setup_commands = [ stage_commands ]
        script = self.get_job_file(job_wrapper, exit_code_path=ecfile, env_setup_commands=env_setup_commands)
        job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id)
        self.write_executable_script( job_file, script )
        # job was deleted while we were preparing it
        if job_wrapper.get_state() == model.Job.states.DELETED:
            log.debug( "Job %s deleted by user before it entered the PBS queue" % job_wrapper.job_id )
            pbs.pbs_disconnect(c)
            if job_wrapper.cleanup_job in ( "always", "onsuccess" ):
                self.cleanup( ( ofile, efile, ecfile, job_file ) )
                job_wrapper.cleanup()
            return

        # submit
        # The job tag includes the job and the task identifier
        # (if a TaskWrapper was passed in):
        galaxy_job_id = job_wrapper.get_id_tag()
        log.debug("(%s) submitting file %s" % ( galaxy_job_id, job_file ) )

        tries = 0
        while tries < 5:
            job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None)
            tries += 1
            if job_id:
                pbs.pbs_disconnect(c)
                break
            errno, text = pbs.error()
            log.warning( "(%s) pbs_submit failed (try %d/5), PBS error %d: %s" % (galaxy_job_id, tries, errno, text) )
            time.sleep(2)
        else:
            log.error( "(%s) All attempts to submit job failed" % galaxy_job_id )
            job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" )
            return

        if pbs_queue_name is None:
            log.debug("(%s) queued in default queue as %s" % (galaxy_job_id, job_id) )
        else:
            log.debug("(%s) queued in %s queue as %s" % (galaxy_job_id, pbs_queue_name, job_id) )

        # persist destination
        job_wrapper.set_job_destination( job_destination, job_id )

        # Store PBS related state information for job
        job_state = AsynchronousJobState()
        job_state.job_wrapper = job_wrapper
        job_state.job_id = job_id
        job_state.job_file = job_file
        job_state.output_file = ofile
        job_state.error_file = efile
        job_state.exit_code_file = ecfile
        job_state.old_state = 'N'
        job_state.running = False
        job_state.job_destination = job_destination

        # Add to our 'queue' of jobs to monitor
        self.monitor_queue.put( job_state )
 def queue_job(self, job_wrapper):
     """Write JSE-Drop file to drop location
     """
     # Get the configured job destination
     job_destination = job_wrapper.job_destination
     # Get the parameters defined for this destination
     # i.e. location of the drop-off directory etc
     drop_off_dir = self._get_drop_dir()
     virtual_env = self._get_virtual_env()
     qsub_options = self._get_qsub_options(job_destination)
     galaxy_slots = self._get_galaxy_slots(job_destination)
     galaxy_id = self._get_galaxy_id()
     log.debug("queue_job: drop-off dir = %s" % drop_off_dir)
     log.debug("queue_job: virtual_env  = %s" % virtual_env)
     log.debug("queue_job: qsub options = %s" % qsub_options)
     log.debug("queue_job: galaxy_slots = %s" % galaxy_slots)
     log.debug("queue_job: galaxy_id    = %s" % galaxy_id)
     if drop_off_dir is None:
         # Can't locate drop-off dir
         job_wrapper.fail("failure preparing job script (no JSE-drop "
                          "directory defined)",exception=True )
         log.exception("(%s/%s) failure writing job script (no "
                       "JSE-drop directory defined)" %
                       (galaxy_id_tag,job_name))
         return
     # Initialise JSE-drop wrapper
     jse_drop = JSEDrop(drop_off_dir)
     # ID and name for job
     galaxy_id_tag = job_wrapper.get_id_tag()
     log.debug("ID tag: %s" % galaxy_id_tag)
     job_name = self._get_job_name(galaxy_id_tag,
                                   job_wrapper.tool.old_id,
                                   galaxy_id)
     log.debug("Job name: %s" % job_name)
     # Prepare the job wrapper (or abort)
     if not self.prepare_job(job_wrapper):
         return
     # Sort out the slots (see e.g. condor.py for example)
     if galaxy_slots:
         galaxy_slots_statement = 'GALAXY_SLOTS="%s"; export GALAXY_SLOTS_CONFIGURED="1"' % galaxy_slots
     else:
         galaxy_slots_statement = 'GALAXY_SLOTS="1"'
     # Create script contents
     script = self.get_job_file(job_wrapper,
                                galaxy_virtual_env=virtual_env,
                                slots_statement=galaxy_slots_statement,
                                exit_code_path=None)
     # Separate leading shell specification from generated script
     shell = '\n'.join(filter(lambda x: x.startswith('#!'),
                              script.split('\n')))
     script = '\n'.join(filter(lambda x: not x.startswith('#!'),
                               script.split('\n')))
     # Create header with embedded qsub flags
     qsub_header = ["-V",
                    "-wd %s" % job_wrapper.working_directory]
     if qsub_options:
         qsub_header.append(qsub_options)
     qsub_header = '\n'.join(["#$ %s" % opt for opt in qsub_header])
     log.debug("qsub_header: %s" % qsub_header)
     # Reassemble the script components
     script = "\n".join((shell,qsub_header,script))
     # Create the drop file to submit the job
     try:
         drop_file = jse_drop.run(job_name,script)
         log.debug("created drop file %s" % drop_file)
         log.info("(%s) submitted as %s" % (galaxy_id_tag,job_name))
     except:
         # Some problem writing the qsub file
         job_wrapper.fail("failure preparing job script",
                          exception=True )
         log.exception("(%s/%s) failure writing job script" %
                       (galaxy_id_tag,job_name))
         return
     # External job id (i.e. id used by JSE-Drop as a handle to
     # identify the job) is the same as the job name here
     external_job_id = job_name
     # Store runner information for tracking if Galaxy restarts
     job_wrapper.set_job_destination(job_destination,
                                     external_job_id)
     # Store state information for job
     job_state = AsynchronousJobState()
     job_state.job_wrapper = job_wrapper
     job_state.job_id = job_name
     job_state.old_state = True
     job_state.running = False
     job_state.job_destination = job_destination
     # Add to the queue of jobs to monitor
     self.monitor_job(job_state)
     log.info("%s: queued" % job_name)
Esempio n. 26
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination
        self._populate_parameter_defaults(job_destination)

        command_line, client, remote_job_config, compute_environment, remote_container = self.__prepare_job(job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = PulsarJobRunner.__dependencies_description(client, job_wrapper)
            rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client)
            path_rewrites_unstructured = {}
            output_names = []
            if compute_environment:
                path_rewrites_unstructured = compute_environment.path_rewrites_unstructured
                output_names = compute_environment.output_names()

                client_inputs_list = []
                for input_dataset_wrapper in job_wrapper.get_input_paths():
                    # str here to resolve false_path if set on a DatasetPath object.
                    path = str(input_dataset_wrapper)
                    object_store_ref = {
                        "dataset_id": input_dataset_wrapper.dataset_id,
                        "dataset_uuid": str(input_dataset_wrapper.dataset_uuid),
                        "object_store_id": input_dataset_wrapper.object_store_id,
                    }
                    client_inputs_list.append(ClientInput(path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH, object_store_ref=object_store_ref))

                for input_extra_path in compute_environment.path_rewrites_input_extra.keys():
                    # TODO: track dataset for object_Store_ref...
                    client_inputs_list.append(ClientInput(input_extra_path, CLIENT_INPUT_PATH_TYPES.INPUT_EXTRA_FILES_PATH))

                for input_metadata_path in compute_environment.path_rewrites_input_metadata.keys():
                    # TODO: track dataset for object_Store_ref...
                    client_inputs_list.append(ClientInput(input_metadata_path, CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH))

                input_files = None
                client_inputs = ClientInputs(client_inputs_list)
            else:
                input_files = self.get_input_files(job_wrapper)
                client_inputs = None

            if self.app.config.metadata_strategy == "legacy":
                # Drop this branch in 19.09.
                metadata_directory = job_wrapper.working_directory
            else:
                metadata_directory = os.path.join(job_wrapper.working_directory, "metadata")

            remote_pulsar_app_config = job_destination.params.get("pulsar_app_config", {})
            job_directory_files = []
            config_files = job_wrapper.extra_filenames
            tool_script = os.path.join(job_wrapper.working_directory, "tool_script.sh")
            if os.path.exists(tool_script):
                log.debug("Registering tool_script for Pulsar transfer [%s]" % tool_script)
                job_directory_files.append(tool_script)
            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=input_files,
                client_inputs=client_inputs,  # Only one of these input defs should be non-None
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.tool_working_directory,
                metadata_directory=metadata_directory,
                tool=job_wrapper.tool,
                config_files=config_files,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=path_rewrites_unstructured,
                touch_outputs=output_names,
                remote_pulsar_app_config=remote_pulsar_app_config,
                job_directory_files=job_directory_files,
                container=None if not remote_container else remote_container.container_id,
            )
            job_id = pulsar_submit_job(client, client_job_description, remote_job_config)
            log.info("Pulsar job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d", job_wrapper.job_id)
            return

        pulsar_job_state = AsynchronousJobState()
        pulsar_job_state.job_wrapper = job_wrapper
        pulsar_job_state.job_id = job_id
        pulsar_job_state.old_state = True
        pulsar_job_state.running = False
        pulsar_job_state.job_destination = job_destination
        self.monitor_job(pulsar_job_state)