def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append(Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def _get_job_names(self, workflow_name, instance, state): """Return list of job names in a given workflow instance and state. E.g., assume the following tokens are stored in the master: /workflow/some_workflow/12345/waiting/some_waiting_job /workflow/some_workflow/12345/waiting/some_other_waiting_job /workflow/some_workflow/12345/runnable/some_runnable_job the method called with workflow_name=some_workflow, instance=12345, state=waiting will return [some_waiting_job, some_other_waiting_job]. """ request = GroupRequest() name = Name() name.workflow = workflow_name name.instance = instance name.job_state = state request.namePrefix = name.get_job_state_prefix() request.groupSuffix = Name.DELIMITER response = self._client.group(request) job_names = [] if response.counts: for job_name in response.counts.keys(): name = Name.from_job_token_name(job_name) job_names.append(name.job) return job_names
def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append( Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def _simulate(self): """Simulate execution of active jobs.""" tokens = self._store.read_tokens() satisfied_deps = set() executed_jobs = [] jobs = {} for token in tokens: event_name = Name.from_event_token_name(token.name) if event_name.event: satisfied_deps.add((event_name.input, event_name.job)) else: job_name = Name.from_job_token_name(token.name) if job_name.job: job = pickle.loads(token.data) jobs[job.name] = job dep_counts = collections.defaultdict(int) while satisfied_deps: last_satisfied_deps = satisfied_deps satisfied_deps = set() for (_, job_name) in last_satisfied_deps: dep_counts[job_name] += 1 if dep_counts[job_name] == 2: executed_jobs.append(job_name) job = jobs[job_name] for output in job.outputs: satisfied_deps.add((job_name, output)) return executed_jobs
def _get_job_tokens(self, workflow=None, instance=None, job_state=None, job=None): """Extract job tokens from the store. Args: workflow: The name of the workflow whose jobs we are interested in. instance: The name of the instance whose jobs we are interested in. job_state: The state of the jobs we are interested in. job: The name of the job we are interested in. Returns: List of jobs matching the specification. """ name = Name(workflow=workflow, instance=instance, job_state=job_state, job=job) if name.job: prefix = name.get_job_token_name() elif name.job_state: prefix = name.get_job_state_prefix() elif name.instance: prefix = name.get_job_prefix() elif name.workflow: prefix = name.get_workflow_prefix() else: prefix = '' tokens = self._store.read_tokens(name_prefix=prefix) result = [] for token in tokens: token_name = Name.from_job_token_name(token.name) if token_name.get_job_token_name(): # This is a job token. if not job or job == token_name.job: # We matched the prefix so if we are looking for a specific # job, its names must match exactly. result.append(token) return result
def _get_job_names(self, workflow_name, instance, state): """Return list of job names in a given workflow instance and state. E.g., assume the following tokens are stored in the master: /workflow/some_workflow/12345/waiting/some_waiting_job /workflow/some_workflow/12345/waiting/some_other_waiting_job /workflow/some_workflow/12345/runnable/some_runnable_job the method called with workflow_name=some_workflow, instance=12345, state=waiting will return [some_waiting_job, some_other_waiting_job]. """ request = GroupRequest() name = Name() name.workflow = workflow_name name.instance = instance name.job_state = state request.namePrefix = name.get_job_state_prefix() request.groupSuffix = Name.DELIMITER response = self._client.group(request) job_names = [] if response.counts: for job_name in response.counts.keys(): name = Name.from_job_token_name(job_name) job_names.append(name.job) return job_names
def _execute_job(self): """Execute the owned job.""" assert self._owned_job_token job = pickle.loads(self._owned_job_token.data) name = Name.from_job_token_name(self._owned_job_token.name) self._executor = JobExecutor.from_job(name.workflow, name.instance, name.job, job, self._data_builder, self._emailer) success = self._executor.prepare() if success: self._owned_job_token.data = pickle.dumps(self._executor.job) success = self._update_owned_job_token() if success: self._start_renew_ownership() success = self._executor.execute() self._stop_renew_ownership() if success: self._move_job_token_to_waiting(self._executor.job, True) elif self._executor.job.retry(): self._keep_job_token_in_runnable(self._executor.job) else: signaller = Signaller(self._client, name.workflow, name.instance) # If ARCHIVE is not set, this is the first failed job in the # workflow. first_failure = not signaller.is_action_set(Signal.ARCHIVE) self._move_job_token_to_waiting(self._executor.job, False) self._send_job_failure_emails(first_failure) self._executor = None self._owned_job_token = None # If needed, archive the workflow. self._process_signals(name.workflow, name.instance)
def get_workflow_jobs_from_parser(workflow, parser_caller): config_parser = load_parser_with_caller(PinballConfig.PARSER, PinballConfig.PARSER_PARAMS, parser_caller) tokens = config_parser.get_workflow_tokens(workflow) jobs_data = [] for token in tokens: name = Name.from_job_token_name(token.name) if name.job: assert name.workflow == workflow job = pickle.loads(token.data) jobs_data.append(JobData(workflow=workflow, instance=None, job=name.job, job_type=job.__class__.__name__, is_condition=job.IS_CONDITION, info=job.info(), inputs=job.inputs, outputs=job.outputs, emails=job.emails, max_attempts=job.max_attempts, retry_delay_sec=job.retry_delay_sec, warn_timeout_sec=job.warn_timeout_sec, abort_timeout_sec=job.abort_timeout_sec, priority=token.priority, status=Status.NEVER_RUN)) return jobs_data
def _execute_job(self): """Execute the owned job.""" assert self._owned_job_token job = pickle.loads(self._owned_job_token.data) name = Name.from_job_token_name(self._owned_job_token.name) self._executor = JobExecutor.from_job(name.workflow, name.instance, name.job, job, self._data_builder, self._emailer) success = self._executor.prepare() if success: self._owned_job_token.data = pickle.dumps(self._executor.job) success = self._update_owned_job_token() if success: self._start_renew_ownership() success = self._executor.execute() self._stop_renew_ownership() if success: self._move_job_token_to_waiting(self._executor.job, True) elif self._executor.job.retry(): self._keep_job_token_in_runnable(self._executor.job) else: signaller = Signaller(self._client, name.workflow, name.instance) # If ARCHIVE is not set, this is the first failed job in the # workflow. first_failure = not signaller.is_action_set(Signal.ARCHIVE) self._move_job_token_to_waiting(self._executor.job, False) self._send_job_failure_emails(first_failure) self._executor = None self._owned_job_token = None # If needed, archive the workflow. self._process_signals(name.workflow, name.instance)
def _send_job_failure_emails(self, first_failure): assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) job = self._executor.job emails = set(job.emails) if first_failure: schedule_data = self._data_builder.get_schedule(name.workflow) if schedule_data: emails.update(schedule_data.emails) else: LOG.warning('no schedule found for workflow %s', name.workflow) if emails: execution = len(job.history) - 1 job_execution_data = self._data_builder.get_execution( name.workflow, name.instance, name.job, execution) try: self._emailer.send_job_execution_end_message( list(emails), job_execution_data) except: LOG.exception('error sending job failure email for ' 'workflow %s instance %s job %s execution %d', name.workflow, name.instance, name.job, execution)
def _simulate(self): """Simulate execution of active jobs.""" tokens = self._store.read_tokens() satisfied_deps = set() executed_jobs = [] jobs = {} for token in tokens: event_name = Name.from_event_token_name(token.name) if event_name.event: satisfied_deps.add((event_name.input, event_name.job)) else: job_name = Name.from_job_token_name(token.name) if job_name.job: job = pickle.loads(token.data) jobs[job.name] = job dep_counts = collections.defaultdict(int) while satisfied_deps: last_satisfied_deps = satisfied_deps satisfied_deps = set() for (_, job_name) in last_satisfied_deps: dep_counts[job_name] += 1 if dep_counts[job_name] == 2: executed_jobs.append(job_name) job = jobs[job_name] for output in job.outputs: satisfied_deps.add((job_name, output)) return executed_jobs
def get_workflow_jobs_from_parser(workflow): config_parser = load_path(PinballConfig.PARSER)(PinballConfig.PARSER_PARAMS) tokens = config_parser.get_workflow_tokens(workflow) jobs_data = [] for token in tokens: name = Name.from_job_token_name(token.name) if name.job: assert name.workflow == workflow job = pickle.loads(token.data) jobs_data.append(JobData(workflow=workflow, instance=None, job=name.job, job_type=job.__class__.__name__, is_condition=job.IS_CONDITION, info=job.info(), inputs=job.inputs, outputs=job.outputs, emails=job.emails, max_attempts=job.max_attempts, retry_delay_sec=job.retry_delay_sec, warn_timeout_sec=job.warn_timeout_sec, abort_timeout_sec=job.abort_timeout_sec, priority=token.priority, status=Status.NEVER_RUN)) return jobs_data
def test_job_token_name(self): NAME = '/workflow/some_workflow/some_instance/job/waiting/some_job' name = Name.from_job_token_name(NAME) self.assertEqual('some_workflow', name.workflow) self.assertEqual('some_instance', name.instance) self.assertEqual('waiting', name.job_state) self.assertEqual('some_job', name.job) self.assertEqual(NAME, name.get_job_token_name())
def test_job_token_name(self): NAME = "/workflow/some_workflow/some_instance/job/waiting/some_job" name = Name.from_job_token_name(NAME) self.assertEqual("some_workflow", name.workflow) self.assertEqual("some_instance", name.instance) self.assertEqual("waiting", name.job_state) self.assertEqual("some_job", name.job) self.assertEqual(NAME, name.get_job_token_name())
def test_change_instance(self): self._add_active_workflow_tokens() self._archive_tokens() analyzer = Analyzer.from_store(self._store, 'some_workflow', '123') analyzer.change_instance('321') tokens = analyzer.get_tokens() self.assertLess(0, len(tokens)) for token in tokens: name = Name.from_job_token_name(token.name) self.assertEqual('321', name.instance)
def test_change_instance(self): self._add_active_workflow_tokens() self._archive_tokens() analyzer = Analyzer.from_store(self._store, 'some_workflow', '123') analyzer.change_instance('321') tokens = analyzer.get_tokens() self.assertLess(0, len(tokens)) for token in tokens: name = Name.from_job_token_name(token.name) self.assertEqual('321', name.instance)
def _filter_job_tokens(self, tokens): """Filter out all tokens which are not job tokens. Args: tokens: The tokens to filter. """ for token in tokens: name = Name.from_job_token_name(token.name) if not self._instance and name.instance: self._instance = name.instance if name.job: job = pickle.loads(token.data) self._jobs[job.name] = job self._job_priorities[job.name] = token.priority
def _filter_job_tokens(self, tokens): """Filter out all tokens which are not job tokens. Args: tokens: The tokens to filter. """ for token in tokens: name = Name.from_job_token_name(token.name) if not self._instance and name.instance: self._instance = name.instance if name.job: job = pickle.loads(token.data) self._jobs[job.name] = job self._job_priorities[job.name] = token.priority
def _job_data_from_job_token(job_token, instance_start_time, instance_end_time): """Extract job data from a job token. Args: job_token: The job token that should be converted to data. instance_start_time: The start time of the workflow instance that this job belongs to. instance_end_time: The end time of the workflow instance that this job belongs to or the current time if the instance did not yet finish. Returns: The job data extracted from the token. """ status = DataBuilder._job_status_from_job_token(job_token) job = pickle.loads(job_token.data) if job.history: last_execution_record = job.history[-1] last_start_time = last_execution_record.start_time last_end_time = last_execution_record.end_time else: last_start_time = None last_end_time = None name = Name.from_job_token_name(job_token.name) progress = DataBuilder._get_progress(job.history, instance_start_time, instance_end_time) # TODO(mao): Change the status name from FAILURE to PENDING # if the condition is in pending status. return JobData(workflow=name.workflow, instance=name.instance, job=name.job, job_type=job.__class__.__name__, is_condition=job.IS_CONDITION, info=job.info(), inputs=job.inputs, outputs=job.outputs, emails=job.emails, max_attempts=job.max_attempts, retry_delay_sec=job.retry_delay_sec, warn_timeout_sec=job.warn_timeout_sec, abort_timeout_sec=job.abort_timeout_sec, priority=job_token.priority, status=status, last_start_time=last_start_time, last_end_time=last_end_time, progress=progress)
def _job_data_from_job_token(job_token, instance_start_time, instance_end_time): """Extract job data from a job token. Args: job_token: The job token that should be converted to data. instance_start_time: The start time of the workflow instance that this job belongs to. instance_end_time: The end time of the workflow instance that this job belongs to or the current time if the instance did not yet finish. Returns: The job data extracted from the token. """ status = DataBuilder._job_status_from_job_token(job_token) job = pickle.loads(job_token.data) if job.history: last_execution_record = job.history[-1] last_start_time = last_execution_record.start_time last_end_time = last_execution_record.end_time else: last_start_time = None last_end_time = None name = Name.from_job_token_name(job_token.name) progress = DataBuilder._get_progress(job.history, instance_start_time, instance_end_time) # TODO(mao): Change the status name from FAILURE to PENDING # if the condition is in pending status. return JobData(workflow=name.workflow, instance=name.instance, job=name.job, job_type=job.__class__.__name__, is_condition=job.IS_CONDITION, info=job.info(), inputs=job.inputs, outputs=job.outputs, emails=job.emails, max_attempts=job.max_attempts, retry_delay_sec=job.retry_delay_sec, warn_timeout_sec=job.warn_timeout_sec, abort_timeout_sec=job.abort_timeout_sec, priority=job_token.priority, status=status, last_start_time=last_start_time, last_end_time=last_end_time, progress=progress)
def _instances_data_from_job_tokens(self, job_tokens): """Extract instance data from job tokens. Args: job_tokens: Job tokens, potentially from different instances. Returns: List of workflow instance data defined by input job tokens. """ result = [] job_tokens_per_instance = collections.defaultdict(list) for job_token in job_tokens: name = Name.from_job_token_name(job_token.name) job_tokens_per_instance[name.get_instance_prefix()].append( job_token) for _, tokens in job_tokens_per_instance.items(): result.append(self._instance_data_from_job_tokens(tokens)) return result
def _instances_data_from_job_tokens(self, job_tokens): """Extract instance data from job tokens. Args: job_tokens: Job tokens, potentially from different instances. Returns: List of workflow instance data defined by input job tokens. """ result = [] job_tokens_per_instance = collections.defaultdict(list) for job_token in job_tokens: name = Name.from_job_token_name(job_token.name) job_tokens_per_instance[name.get_instance_prefix()].append( job_token) for _, tokens in job_tokens_per_instance.items(): result.append( self._instance_data_from_job_tokens(tokens)) return result
def _process_abort_signals(self): """Check if the running job should be aborted. Returns: False iff the job has been aborted. """ name = Name.from_job_token_name(self._owned_job_token.name) abort = False try: signaller = Signaller(self._client, name.workflow, name.instance) abort = signaller.is_action_set(Signal.ABORT) except (TTransport.TTransportException, socket.timeout, socket.error): # We need this exception handler only in logic located in the # Timer thread. If that thread fails, we should abort the process # and let the main thread decide what to do. LOG.exception('') abort = True if abort: self._abort() return not abort
def run(self, emailer, store): if not self._check_workflow_instances(emailer, self.workflow, store): LOG.warn('too many instances running for workflow %s', self.workflow) return None config_parser = load_path(PinballConfig.PARSER)(self.parser_params) workflow_tokens = config_parser.get_workflow_tokens(self.workflow) if not workflow_tokens: LOG.error('workflow %s not found', self.workflow) return None result = ModifyRequest() result.updates = workflow_tokens assert result.updates token = result.updates[0] name = Name.from_job_token_name(token.name) if not name.instance: name = Name.from_event_token_name(token.name) LOG.info('exporting workflow %s instance %s. Its tokens are under %s', name.workflow, name.instance, name.get_instance_prefix()) return result
def _process_abort_signals(self): """Check if the running job should be aborted. Returns: False iff the job has been aborted. """ name = Name.from_job_token_name(self._owned_job_token.name) abort = False try: signaller = Signaller(self._client, name.workflow, name.instance) abort = signaller.is_action_set(Signal.ABORT) except (TTransport.TTransportException, socket.timeout, socket.error): # We need this exception handler only in logic located in the # Timer thread. If that thread fails, we should abort the process # and let the main thread decide what to do. LOG.exception('') abort = True if abort: self._abort() return not abort
def _job_status_from_job_token(job_token): """Extract job status from a job token. Args: job_token: The token to extract status from. Returns: Status of the job. """ name = Name.from_job_token_name(job_token.name) job = pickle.loads(job_token.data) if not job.history: return Status.DISABLED if job.disabled else Status.NEVER_RUN last_execution_record = job.history[-1] if (name.job_state == Name.RUNNABLE_STATE and not last_execution_record.end_time): return Status.RUNNING if job.disabled: return Status.DISABLED if last_execution_record.exit_code != 0: return Status.FAILURE return Status.SUCCESS
def run(self, emailer, store): if not self._check_workflow_instances(emailer, self.workflow, store): LOG.warn('too many instances running for workflow %s', self.workflow) return None config_parser = load_path(PinballConfig.PARSER)(self.parser_params) workflow_tokens = config_parser.get_workflow_tokens(self.workflow) if not workflow_tokens: LOG.error('workflow %s not found', self.workflow) return None result = ModifyRequest() result.updates = workflow_tokens assert result.updates token = result.updates[0] name = Name.from_job_token_name(token.name) if not name.instance: name = Name.from_event_token_name(token.name) LOG.info('exporting workflow %s instance %s. Its tokens are under %s', name.workflow, name.instance, name.get_instance_prefix()) return result
def _job_status_from_job_token(job_token): """Extract job status from a job token. Args: job_token: The token to extract status from. Returns: Status of the job. """ name = Name.from_job_token_name(job_token.name) job = pickle.loads(job_token.data) if not job.history: return Status.DISABLED if job.disabled else Status.NEVER_RUN last_execution_record = job.history[-1] if (name.job_state == Name.RUNNABLE_STATE and not last_execution_record.end_time): return Status.RUNNING if job.disabled: return Status.DISABLED if last_execution_record.exit_code != 0: return Status.FAILURE return Status.SUCCESS
def _get_job_tokens(self, workflow=None, instance=None, job_state=None, job=None): """Extract job tokens from the store. Args: workflow: The name of the workflow whose jobs we are interested in. instance: The name of the instance whose jobs we are interested in. job_state: The state of the jobs we are interested in. job: The name of the job we are interested in. Returns: List of jobs matching the specification. """ name = Name(workflow=workflow, instance=instance, job_state=job_state, job=job) if name.job: prefix = name.get_job_token_name() elif name.job_state: prefix = name.get_job_state_prefix() elif name.instance: prefix = name.get_job_prefix() elif name.workflow: prefix = name.get_workflow_prefix() else: prefix = '' tokens = self._store.read_tokens(name_prefix=prefix) result = [] for token in tokens: token_name = Name.from_job_token_name(token.name) if token_name.get_job_token_name(): # This is a job token. if not job or job == token_name.job: # We matched the prefix so if we are looking for a specific # job, its names must match exactly. result.append(token) return result
def _make_job_runnable(self, job_token): """Attempt to make a job runnable. Query event tokens in job inputs. If a combination of triggering events exist, remove those events and make the job runnable. Otherwise, do nothing. Args: job_token: The job token to make runnable. Returns: True if there were no errors during communication with the master, otherwise False. """ job = pickle.loads(job_token.data) name = Name.from_job_token_name(job_token.name) request = QueryRequest(queries=[]) # TODO(pawel): handle jobs with no dependencies assert job.inputs for input_name in job.inputs: prefix = Name() prefix.workflow = name.workflow prefix.instance = name.instance prefix.job = name.job prefix.input = input_name query = Query() query.namePrefix = prefix.get_input_prefix() query.maxTokens = 1 request.queries.append(query) try: response = self._client.query(request) except TokenMasterException: # TODO(pawel): add a retry count and fail if a limit is reached. LOG.exception('error sending request %s', request) return False triggering_events = Worker._get_triggering_events(response.tokens) if triggering_events: return self._move_job_token_to_runnable(job_token, triggering_events) return True
def _make_job_runnable(self, job_token): """Attempt to make a job runnable. Query event tokens in job inputs. If a combination of triggering events exist, remove those events and make the job runnable. Otherwise, do nothing. Args: job_token: The job token to make runnable. Returns: True if there were no errors during communication with the master, otherwise False. """ job = pickle.loads(job_token.data) name = Name.from_job_token_name(job_token.name) request = QueryRequest(queries=[]) # TODO(pawel): handle jobs with no dependencies assert job.inputs for input_name in job.inputs: prefix = Name() prefix.workflow = name.workflow prefix.instance = name.instance prefix.job = name.job prefix.input = input_name query = Query() query.namePrefix = prefix.get_input_prefix() query.maxTokens = 1 request.queries.append(query) try: response = self._client.query(request) except TokenMasterException: # TODO(pawel): add a retry count and fail if a limit is reached. LOG.exception('error sending request %s', request) return False triggering_events = Worker._get_triggering_events(response.tokens) if triggering_events: return self._move_job_token_to_runnable(job_token, triggering_events) return True
def _move_job_token_to_runnable(self, job_token, triggering_event_tokens): """Move a job token to the runnable branch of the token tree. Token tree is the global, hierarchically structured token namespace. Args: job_token: The job token to make runnable. triggering_event_tokens: The list of events used to trigger the job. These events will be removed from the master in the same call to that makes the job token runnable. Returns: True on success, otherwise False. """ name = Name.from_job_token_name(job_token.name) name.job_state = Name.RUNNABLE_STATE job = pickle.loads(job_token.data) Worker._add_events_to_job(job, triggering_event_tokens) runnable_job_token = Token(name=name.get_job_token_name(), priority=job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(updates=[runnable_job_token], deletes=triggering_event_tokens + [job_token]) return self._send_request(request)
def _move_job_token_to_runnable(self, job_token, triggering_event_tokens): """Move a job token to the runnable branch of the token tree. Token tree is the global, hierarchically structured token namespace. Args: job_token: The job token to make runnable. triggering_event_tokens: The list of events used to trigger the job. These events will be removed from the master in the same call to that makes the job token runnable. Returns: True on success, otherwise False. """ name = Name.from_job_token_name(job_token.name) name.job_state = Name.RUNNABLE_STATE job = pickle.loads(job_token.data) Worker._add_events_to_job(job, triggering_event_tokens) runnable_job_token = Token(name=name.get_job_token_name(), priority=job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(updates=[runnable_job_token], deletes=triggering_event_tokens + [job_token]) return self._send_request(request)
def _move_job_token_to_waiting(self, job, succeeded): """Move the owned job token to the waiting group. If the job succeeded, also post events to job outputs. If the job failed or it is the final job (a job with no outputs), post an archive signal to finish the workflow. Args: job: The job that should be stored in the data field of the waiting job token. succeeded: True if the job succeeded, otherwise False. """ assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) name.job_state = Name.WAITING_STATE waiting_job_token = Token(name=name.get_job_token_name(), priority=self._owned_job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(deletes=[self._owned_job_token], updates=[waiting_job_token]) if succeeded: request.updates.extend(self._get_output_event_tokens(job)) if not job.outputs or not succeeded: # This is either the only job in the workflow with no outputs or a # failed job. In either case, the workflow is done. signaller = Signaller(self._client, workflow=name.workflow, instance=name.instance) if not signaller.is_action_set(Signal.ARCHIVE): signal_name = Name( workflow=name.workflow, instance=name.instance, signal=Signal.action_to_string(Signal.ARCHIVE)) signal = Signal(Signal.ARCHIVE) signal_token = Token(name=signal_name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) self._send_request(request)
def _move_job_token_to_waiting(self, job, succeeded): """Move the owned job token to the waiting group. If the job succeeded, also post events to job outputs. If the job failed or it is the final job (a job with no outputs), post an archive signal to finish the workflow. Args: job: The job that should be stored in the data field of the waiting job token. succeeded: True if the job succeeded, otherwise False. """ assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) name.job_state = Name.WAITING_STATE waiting_job_token = Token(name=name.get_job_token_name(), priority=self._owned_job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(deletes=[self._owned_job_token], updates=[waiting_job_token]) if succeeded: request.updates.extend(self._get_output_event_tokens(job)) if not job.outputs or not succeeded: # This is either the only job in the workflow with no outputs or a # failed job. In either case, the workflow is done. signaller = Signaller(self._client, workflow=name.workflow, instance=name.instance) if not signaller.is_action_set(Signal.ARCHIVE): signal_name = Name(workflow=name.workflow, instance=name.instance, signal=Signal.action_to_string( Signal.ARCHIVE)) signal = Signal(Signal.ARCHIVE) signal_token = Token(name=signal_name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) self._send_request(request)
def _send_job_failure_emails(self, first_failure): assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) job = self._executor.job emails = set(job.emails) if first_failure: schedule_data = self._data_builder.get_schedule(name.workflow) if schedule_data: emails.update(schedule_data.emails) else: LOG.warning('no schedule found for workflow %s', name.workflow) if emails: execution = len(job.history) - 1 job_execution_data = self._data_builder.get_execution( name.workflow, name.instance, name.job, execution) try: self._emailer.send_job_execution_end_message( list(emails), job_execution_data) except: LOG.exception( 'error sending job failure email for ' 'workflow %s instance %s job %s execution %d', name.workflow, name.instance, name.job, execution)
def _parse_job_token_name(token_name): name = Name.from_job_token_name(token_name) if name.workflow: return name return None
def _parse_job_token_name(token_name): name = Name.from_job_token_name(token_name) if name.workflow: return name return None
def _instance_data_from_job_tokens(self, job_tokens): """Extract instance data from job tokens in that instance. Args: job_tokens: Job tokens that belong to a single workflow instance. Returns: Workflow data describing the workflow instance identified by the input job tokens. """ assert job_tokens start_time = time.time() end_time = 0 failed = False for job_token in job_tokens: job = pickle.loads(job_token.data) if job.history: first_execution_record = job.history[0] if (first_execution_record.start_time and first_execution_record.start_time < start_time): start_time = first_execution_record.start_time last_execution_record = job.history[-1] if not last_execution_record.end_time: end_time = sys.maxint else: if last_execution_record.end_time > end_time: end_time = last_execution_record.end_time if (not job.disabled and last_execution_record.exit_code != 0): failed = True if not job_tokens: is_active = False else: is_active = True job_name = job_tokens[0].name archived_tokens = self._store.read_archived_tokens( name_prefix=job_name) for token in archived_tokens: if token.name == job_name: is_active = False break name = Name.from_job_token_name(job_tokens[0].name) is_scheduled_for_archive = False abort_signal = None if is_active: archive_signal = self._get_signal(name.workflow, name.instance, Signal.ARCHIVE, True) is_scheduled_for_archive = (archive_signal and Signal.TIMESTAMP_ATTR in archive_signal.attributes) else: abort_signal = self._get_signal(name.workflow, name.instance, Signal.ABORT, False) if abort_signal: status = Status.ABORTED if end_time == 0: # This can happen only if all jobs have an empty history. timestamp = abort_signal.attributes.get(Signal.TIMESTAMP_ATTR) start_time = timestamp end_time = timestamp elif (end_time == 0 or end_time == sys.maxint or (is_active and not is_scheduled_for_archive)): status = Status.RUNNING end_time = None elif failed: status = Status.FAILURE else: status = Status.SUCCESS return WorkflowInstanceData(name.workflow, name.instance, status, start_time, end_time)
def _instance_data_from_job_tokens(self, job_tokens): """Extract instance data from job tokens in that instance. Args: job_tokens: Job tokens that belong to a single workflow instance. Returns: Workflow data describing the workflow instance identified by the input job tokens. """ assert job_tokens start_time = time.time() end_time = 0 failed = False for job_token in job_tokens: job = pickle.loads(job_token.data) if job.history: first_execution_record = job.history[0] if (first_execution_record.start_time and first_execution_record.start_time < start_time): start_time = first_execution_record.start_time last_execution_record = job.history[-1] if not last_execution_record.end_time: end_time = sys.maxint else: if last_execution_record.end_time > end_time: end_time = last_execution_record.end_time if (not job.disabled and last_execution_record.exit_code != 0): failed = True if not job_tokens: is_active = False else: is_active = True job_name = job_tokens[0].name archived_tokens = self._store.read_archived_tokens( name_prefix=job_name) for token in archived_tokens: if token.name == job_name: is_active = False break name = Name.from_job_token_name(job_tokens[0].name) is_scheduled_for_archive = False abort_signal = None if is_active: archive_signal = self._get_signal(name.workflow, name.instance, Signal.ARCHIVE, True) is_scheduled_for_archive = (archive_signal and Signal.TIMESTAMP_ATTR in archive_signal.attributes) else: abort_signal = self._get_signal(name.workflow, name.instance, Signal.ABORT, False) if abort_signal: status = Status.ABORTED if end_time == 0: # This can happen only if all jobs have an empty history. timestamp = abort_signal.attributes.get(Signal.TIMESTAMP_ATTR) start_time = timestamp end_time = timestamp elif (end_time == 0 or end_time == sys.maxint or (is_active and not is_scheduled_for_archive)): status = Status.RUNNING end_time = None elif failed: status = Status.FAILURE else: status = Status.SUCCESS return WorkflowInstanceData(name.workflow, name.instance, status, start_time, end_time)