def test_own_runnable_job_token(self): self._post_job_tokens() self._worker._own_runnable_job_token() # Event token is not present so nothing should have changed. token_names = [ Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job').get_job_token_name(), Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job').get_job_token_name() ] self._verify_token_names(token_names) self.assertIsNone(self._worker._owned_job_token) self._post_workflow_start_event_token() self._worker._own_runnable_job_token() # Worker should now own a runnable job token. self._verify_parent_job_runnable() parent_token = self._get_token( Name(workflow='some_workflow', instance='12345', job_state=Name.RUNNABLE_STATE, job='parent_job').get_job_token_name()) self.assertEqual(parent_token, self._worker._owned_job_token)
def _verify_parent_job_runnable(self): token_names = [ Name(workflow='some_workflow', instance='12345', job_state=Name.RUNNABLE_STATE, job='parent_job').get_job_token_name(), Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job').get_job_token_name() ] self._verify_token_names(token_names)
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2**(AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_%d_%d' % (level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [ Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime' ] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = [ 'job_%d_%d' % (level - 1, 2 * job_index), 'job_%d_%d' % (level - 1, 2 * job_index + 1) ] event_name.input = 'job_%d_%d' % (level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_%d_%d' % (level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def test_run(self, job_executor_mock): self._post_job_tokens() self._post_workflow_start_event_token() job_executor_mock.from_job.side_effect = WorkerTestCase._from_job self._worker._test_only_end_if_no_runnable = True self._worker.run() with mock.patch('pinball.workflow.archiver.time') as time_patch: # add one day time_patch.time.return_value = time.time() + 24 * 60 * 60 self._worker.run() parent_job_token_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job').get_job_token_name() child_job_token_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job').get_job_token_name() signal_string = Signal.action_to_string(Signal.ARCHIVE) signal_token_name = Name(workflow='some_workflow', instance='12345', signal=signal_string).get_signal_token_name() token_names = [ parent_job_token_name, child_job_token_name, signal_token_name ] self._verify_archived_token_names(token_names) self.assertEqual(2, job_executor_mock.from_job.call_count) parent_token = self._get_stored_token(parent_job_token_name) job = pickle.loads(parent_token.data) self.assertEqual(1, len(job.history)) execution_record = job.history[0] self.assertEqual(0, execution_record.exit_code) self.assertEqual(1234567, execution_record.end_time) child_token = self._get_stored_token(child_job_token_name) job = pickle.loads(child_token.data) self.assertEqual(1, len(job.history)) execution_record = job.history[0] self.assertEqual(0, execution_record.exit_code) self.assertEqual(1234567, execution_record.end_time) signal_token = self._get_stored_token(signal_token_name) signal = pickle.loads(signal_token.data) self.assertEqual(Signal.ARCHIVE, signal.action)
def get_job_token(self, workflow_name, workflow_instance): """Convert the job to a pinball token representing its properties. Condition is similar to job. We check the template to decide use job_templates or condition_templates dynamically. Args: workflow_name: The name of the workflow in which context this job is instantiated. workflow_instance: The workflow instance of the output job token. Returns: A pinball token representing the job. """ if self.workflow.name == workflow_name: inputs = [input_job.name for input_job in self.inputs] else: # If it's an external job, do not include its inputs. inputs = [] if not inputs: inputs = [Name.WORKFLOW_START_INPUT] outputs = [] for output_job in self.outputs: if output_job.workflow.name == workflow_name: outputs.append(output_job.name) if issubclass(self.template.__class__, JobTemplateBase): params = self._get_template_params() job = self.template.get_pinball_job(inputs, outputs, params) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(job)) result.priority = self.compute_score() elif issubclass(self.template.__class__, ConditionTemplateBase): condition = self.template.get_pinball_condition(outputs) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(condition)) else: raise Exception( "Template must be a subclass of JobTemplateBase or ConditionTemplateBase!" ) return result
def _refresh_actions(self): """Reload actions from the master.""" request = QueryRequest(queries=[]) name = Name() top_query = Query() top_query.namePrefix = name.get_signal_prefix() request.queries.append(top_query) if self._workflow: workflow_query = Query() name.workflow = self._workflow workflow_query.namePrefix = name.get_signal_prefix() request.queries.append(workflow_query) if self._instance: instance_query = Query() name.instance = self._instance instance_query.namePrefix = name.get_signal_prefix() request.queries.append(instance_query) response = self._client.query(request) signal_tokens = [] for tokens in response.tokens: signal_tokens.extend(tokens) self._dedup_actions(signal_tokens)
def _make_runnable(self, workflow, instance): """Attempt to make jobs in a given workflow instance runnable. Go over all waiting jobs in a given workflow instance and try to make them runnable. Args: workflow: The name of the workflow whose jobs should be considered. instance: The workflow instance whose jobs should be considered. Returns: True if there were no errors during communication with the master, otherwise False. """ name = Name() name.workflow = workflow name.instance = instance name.job_state = Name.WAITING_STATE query = Query(namePrefix=name.get_job_state_prefix()) # TODO(pawel): to prevent multiple workers from trying to make the # same job runnable at the same time, this should be a # QueryAndOwnRequest. Note that the current implementation is correct, # just inefficient. request = QueryRequest(queries=[query]) try: response = self._client.query(request) except TokenMasterException: LOG.exception('error sending request %s', request) return False assert len(response.tokens) == 1 for token in response.tokens[0]: if not self._make_job_runnable(token): return False return True
def _post_signal_tokens(self): """Add some signal tokens to the master.""" request = ModifyRequest(updates=[]) signal = Signal(action=Signal.EXIT) name = Name(signal='exit') signal_token = Token(name=name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) signal = Signal(action=Signal.DRAIN) name.signal = 'drain' name.workflow = 'some_workflow' signal_token = Token(name=name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) name.instance = '123' signal_token = Token(name=name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) signal = Signal(action=Signal.ABORT) name.signal = 'abort' signal_token = Token(name=name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) client = self._factory.get_client() client.modify(request)
def is_signal_set(self, workflow, instance, action): """Check if a signal is set. Args: workflow: The workflow whose signal should be checked. If None, signals at the global level are checked. instance: The workflow instance whose signal should be checked. If not None, a matching workflow name must be provided. If None, signals at the workflow and the global level are checked. action: The signal action to check. Returns: True iff the signal exists in the specified context. """ for (workflow_name, instance_name) in [(workflow, instance), (workflow, None), (None, None)]: name = Name(workflow=workflow_name, instance=instance_name, signal=Signal.action_to_string(action)) token_name = name.get_signal_token_name() tokens = self._store.read_tokens(token_name) assert len(tokens) <= 1 if tokens: return True return False
def get_schedule(self, workflow): """Get workflow schedule data from the store. Args: workflow: The name of the workflow whose schedule should be retrieved. Returns: The workflow schedule or None if it was not found. """ name = Name(workflow=workflow) schedule_token_name = name.get_workflow_schedule_token_name() tokens = self._store.read_tokens(name_prefix=schedule_token_name) if tokens: for token in tokens: if token.name == schedule_token_name: schedule = pickle.loads(token.data) overrun_policy_help = OverrunPolicy.get_help( schedule.overrun_policy) return WorkflowScheduleData( next_run_time=schedule.next_run_time, recurrence_seconds=schedule.recurrence_seconds, overrun_policy=schedule.overrun_policy, overrun_policy_help=overrun_policy_help, workflow=schedule.workflow, parser_params=schedule.parser_params, emails=schedule.emails, max_running_instances=schedule.max_running_instances) return None
def _get_instance_using_cache(self, workflow, instance): """Get workflow instance, preferably from the cache. As a side effect, if the instance is archived and it does not exist in the cache, it will be added to the cache. Args: workflow: The name of the workflow whose instance we are interested in. instance: The instance we are interested in. Returns: The workflow instance or None if it was not found. """ name = Name(workflow=workflow, instance=instance) instance_prefix = name.get_instance_prefix() data = self._store.get_cached_data(instance_prefix) if data: instance_data = pickle.loads(data) else: # Cache only archived instances. if self._store.read_archived_token_names( name_prefix=instance_prefix): # The ordering of operations is important. We need to make # sure that we add to the cache instance data constructed from # the archived tokens. instance_data = self._get_instance_no_cache(workflow, instance) self._store.set_cached_data(instance_prefix, pickle.dumps(instance_data)) else: instance_data = self._get_instance_no_cache(workflow, instance) return instance_data
def _get_instances_using_cache(self, workflow): """Get workflow instances, preferably from the cache. As a side effect, archived instances that do not exist in the cache will be added to the cache. Args: workflow: The name of the workflow whose instances we are interested in. Returns: List of instances for the given workflow. """ name = Name(workflow=workflow) workflow_prefix = name.get_workflow_prefix() workflow_token_names = self._store.read_token_names( name_prefix=workflow_prefix) instances_prefixes = DataBuilder._get_instance_prefixes( workflow_token_names) result = [] for prefix in instances_prefixes: name = Name.from_instance_prefix(prefix) assert name.workflow and name.instance, ( 'Expected instance prefix, found %s' % prefix) result.append(self.get_instance(name.workflow, name.instance)) return result
def _get_jobs(self, workflow, job): """Get job definitions from the store across all workflow instances. Args: workflow: The name of the job workflow. instance: The name of the job instance. job: The name of the job. Returns: Matching job definition. """ name = Name(workflow=workflow) name_prefix = name.get_workflow_prefix() # This is a bit hacky since we bypass the Name module where all the # token naming logic is supposed to be located. # TODO(pawel): extend the Name module to support abstractions needed # here. name_infix = '/job/' name_suffix = '/%s' % job job_tokens = self._store.read_tokens(name_prefix=name_prefix, name_infix=name_infix, name_suffix=name_suffix) result = [] for job_token in job_tokens: job_record = pickle.loads(job_token.data) result.append(job_record) return result
def set_action(self, action): """Send a signal with a specific action to the master. Local signal store gets updated with the new action if it is successfully submitted to the master. If the communication with the master fails, locally stored signals get refreshed. Args: action: The action to set. """ attributes = {} if action == Signal.ABORT: attributes[Signal.TIMESTAMP_ATTR] = time.time() elif action == Signal.EXIT: attributes[Signal.GENERATION_ATTR] = PinballConfig.GENERATION signal = self._signals.get(action) if signal and signal.attributes == attributes: return # A signal with the same action but different data may already exist # in the master. signal_token = self._get_signal_token(action) if not signal_token: name = Name(workflow=self._workflow, instance=self._instance, signal=Signal.action_to_string(action)) signal_token = Token(name=name.get_signal_token_name()) signal = Signal(action, attributes) signal_token.data = pickle.dumps(signal) request = ModifyRequest(updates=[signal_token]) if self._send_request(request): self._signals[action] = signal
def get_workflow_tokens(self): """Create Pinball tokens representing a workflow instance. Convert workflow jobs to tokens and create event tokens in inputs of top-level jobs. Returns: A list of job and event tokens representing a workflow instance. """ all_jobs = self._get_transitive_deps() instance = get_unique_workflow_instance() result = [] for job in all_jobs: result.append(job.get_job_token(self.name, instance)) top_level_jobs = self._get_top_level_jobs() for job in top_level_jobs: event = Event(creator='parser') event_name = Name(workflow=self.name, instance=instance, job=job.name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append( Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result
def _is_done(self, workflow, instance): """Check if the workflow instance is done. A workflow is done if it does not have runnable jobs. Returns: True if we are certain that the workflow is not running. Otherwise False. If there were any errors during communication with the master, the return value is False. """ # Attempt to make the workflow runnable and verify that no WAITING job # tokens were changed in the meantime. name = Name(workflow=workflow, instance=instance, job_state=Name.WAITING_STATE) query = Query(namePrefix=name.get_job_state_prefix()) request = QueryRequest(queries=[query]) try: snapshot = Snapshot(self._client, request) except: LOG.exception('error sending request %s', request) return False if not self._make_runnable(workflow, instance): return False if not self._has_no_runnable_jobs(workflow, instance): return False try: return not snapshot.refresh() except: LOG.exception('error sending request %s', request) return False
def _get_job_names(self, workflow_name, instance, state): """Return list of job names in a given workflow instance and state. E.g., assume the following tokens are stored in the master: /workflow/some_workflow/12345/waiting/some_waiting_job /workflow/some_workflow/12345/waiting/some_other_waiting_job /workflow/some_workflow/12345/runnable/some_runnable_job the method called with workflow_name=some_workflow, instance=12345, state=waiting will return [some_waiting_job, some_other_waiting_job]. """ request = GroupRequest() name = Name() name.workflow = workflow_name name.instance = instance name.job_state = state request.namePrefix = name.get_job_state_prefix() request.groupSuffix = Name.DELIMITER response = self._client.group(request) job_names = [] if response.counts: for job_name in response.counts.keys(): name = Name.from_job_token_name(job_name) job_names.append(name.job) return job_names
def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append( Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def _query_and_own_runnable_job_token(self, workflow, instance): """Attempt to own a runnable job token from a given workflow instance. Try to own a runnable job token in a given workflow instance. The ownership of the qualifying job token lasts for a limited time so it has to be periodically renewed. Args: workflow: The name of the workflow whose jobs should be considered. instance: The workflow instance whose jobs should be considered. """ assert not self._owned_job_token name = Name(workflow=workflow, instance=instance, job_state=Name.RUNNABLE_STATE) query = Query() query.namePrefix = name.get_job_state_prefix() query.maxTokens = 1 request = QueryAndOwnRequest() request.query = query request.expirationTime = time.time() + Worker._LEASE_TIME_SEC request.owner = self._name try: response = self._client.query_and_own(request) if response.tokens: assert len(response.tokens) == 1 self._owned_job_token = response.tokens[0] except TokenMasterException: LOG.exception('error sending request %s', request)
def _verify_parent_job_waiting(self): token_names = [ Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job').get_job_token_name(), Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job').get_job_token_name(), Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event').get_event_token_name() ] self._verify_token_names(token_names)
def test_move_job_token_to_runnable(self): self._post_job_tokens() self._post_workflow_start_event_token() job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job') job_token = self._get_token(job_name.get_job_token_name()) event_name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event_token = self._get_token(event_name.get_event_token_name()) self._worker._move_job_token_to_runnable(job_token, [event_token]) # Event token should have been removed and the parent job should be # runnable. self._verify_parent_job_runnable()
def get_schedule_token(self): """Create a token describing workflow execution schedule.""" self.schedule.advance_next_run_time() timestamp = self.schedule.next_run_time token_name = (Name( workflow=self.name).get_workflow_schedule_token_name()) return Token(name=token_name, owner='parser', expirationTime=timestamp, data=pickle.dumps(self.schedule))
def _read_tokens_from_store(self, store): """Read archived job tokens from the store. Args: store: The store to read tokens from. """ name = Name(workflow=self._workflow, instance=self._instance) tokens = store.read_archived_tokens( name_prefix=name.get_instance_prefix()) self._filter_job_tokens(tokens)
def _get_schedule_token(): name = Name(workflow='workflow_0') now = int(time.time()) token = Token(name=name.get_workflow_schedule_token_name(), owner='some_owner', expirationTime=now - 10) schedule = WorkflowSchedule(next_run_time=now - 10, recurrence_seconds=10, workflow='workflow_0') token.data = pickle.dumps(schedule) return token
def _get_child_job_token(self): name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job') job = ShellJob(name=name.job, inputs=['parent_job'], outputs=[], command='echo child', emails=['*****@*****.**']) return Token(name=name.get_job_token_name(), data=pickle.dumps(job))
def _post_workflow_start_event_token(self): name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event = Event(creator='SimpleWorkflowTest') token = Token(name=name.get_event_token_name(), data=pickle.dumps(event)) request = ModifyRequest(updates=[token]) self._client.modify(request)
def _generate_signal_tokens(workflows): result = [] for w in range(0, workflows, 2): workflow = 'workflow_%d' % w signal = Signal(Signal.DRAIN) name = Name(workflow=workflow, signal=Signal.action_to_string(signal.action)) result.append( Token(name=name.get_signal_token_name(), version=10000000000 * w, data=pickle.dumps(signal))) return result
def _read_tokens_from_client(self, client): """Read archived job tokens from the client. Args: client: The client to read tokens from. """ name = Name(workflow=self._workflow, instance=self._instance) query = Query(namePrefix=name.get_workflow_prefix()) request = QueryRequest(queries=[query]) response = client.query(request) assert len(response.tokens) == 1 tokens = response.tokens[0] self._filter_job_tokens(tokens) self._filter_event_tokens(tokens)
def test_make_job_runnable(self): self._post_job_tokens() self._post_workflow_start_event_token() parent_job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job').get_job_token_name() child_job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job').get_job_token_name() parent_job_token = self._get_token(parent_job_name) child_job_token = self._get_token(child_job_name) self._worker._make_job_runnable(child_job_token) # Child job is missing triggering tokens so it cannot be made runnable. self._verify_parent_job_waiting() self._worker._make_job_runnable(parent_job_token) # Parent job has all triggering tokens so it can be made runnable. self._verify_parent_job_runnable()
def _job_data_less_than(job_data1, job_data2): """A comparator for job data objects. Jobs are sorted based on the execution time. Finished jobs sort on the end time. A running job sorts after a finished job. Running jobs sort on the start time. Jobs that never run sort on qualified name. """ if (not job_data1.last_start and not job_data2.last_start and not job_data1.last_end and not job_data2.last_end): # neither job ever run name1 = Name(workflow=job_data1.workflow, instance=job_data1.instance, job_state=Name.WAITING_STATE, job=job_data1.job).get_job_token_name() name2 = Name(workflow=job_data2.workflow, instance=job_data2.instance, job_state=Name.WAITING_STATE, job=job_data2.job).get_job_token_name() return name1 < name2 if not job_data1.last_start and job_data2.last_start: # job1 never run, job2 did return True if job_data1.last_start and not job_data2.last_start: # job2 never run, job1 did return False if not job_data1.last_end and not job_data2.last_end: # both jobs are running return job_data1.last_start < job_data2.last_start if not job_data1.last_end and job_data2.last_end: # only job1 is running return False if job_data1.last_end and not job_data2.last_end: # only job2 is running return True # both jobs run in the past but neither is running now return job_data1.last_end < job_data2.last_end