def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append( Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def test_customize_command(self): job = ShellJob(name='some_job', inputs=['some_input', 'some_other_input']) some_event = Event(attributes={'some_attr': 'some_value'}) some_other_event = Event( attributes={ 'some_attr': 'some_other_value', 'yet_another_attr': 'yet_another_value' }) execution_record = ExecutionRecord(instance=123, start_time=10) execution_record.events = [some_event, some_other_event] job.history = [execution_record] # Empty command. job.command = '' self.assertEqual('', job.customize_command()) # Command with no attributes. job.command = 'some_command' self.assertEqual('some_command', job.customize_command()) # Command with attributes. job.command = ('%(non_existent_attr)s %(some_attr)s ' '%(yet_another_attr)s') self.assertEqual(' some_value,some_other_value yet_another_value', job.customize_command()) # Command with percentage marks. job.command = ('%% some_command') self.assertEqual('% some_command', job.customize_command())
def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append(Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def test_events(self, open_mock, exists_mock, s3_open_mock): file_mock = mock.MagicMock() open_mock.return_value = file_mock file_mock.__enter__.return_value = file_mock exists_mock.return_value = True some_event = Event(creator='some_creator') some_other_event = Event(creator='some_other_creator') self._executor.job.events = [some_event, some_other_event] self.assertTrue(self._executor.prepare()) self.assertEqual(1, len(self._executor.job.history)) execution_record = self._executor.job.history[0] self.assertEqual([some_event, some_other_event], execution_record.events) self.assertEqual(s3_open_mock.call_count, 2)
def get_workflow_tokens(self): """Create Pinball tokens representing a workflow instance. Convert workflow jobs to tokens and create event tokens in inputs of top-level jobs. Returns: A list of job and event tokens representing a workflow instance. """ all_jobs = self._get_transitive_deps() instance = get_unique_workflow_instance() result = [] for job in all_jobs: result.append(job.get_job_token(self.name, instance)) top_level_jobs = self._get_top_level_jobs() for job in top_level_jobs: event = Event(creator='parser') event_name = Name(workflow=self.name, instance=instance, job=job.name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append( Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result
def _post_workflow_start_event_token(self): name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event = Event(creator='SimpleWorkflowTest') token = Token(name=name.get_event_token_name(), data=pickle.dumps(event)) request = ModifyRequest(updates=[token]) self._client.modify(request)
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2**(AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_%d_%d' % (level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [ Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime' ] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = [ 'job_%d_%d' % (level - 1, 2 * job_index), 'job_%d_%d' % (level - 1, 2 * job_index + 1) ] event_name.input = 'job_%d_%d' % (level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_%d_%d' % (level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def _generate_missing_events(self, job_names): """Generate external events required to run all jobs in a set. For a set of jobs (a subset of all jobs in the workflow), produce events satisfying upstream dependencies external to that set. E.g., for job dependency structure like this: A1 A2 | / B1 B2 | C1 C2 | / D1 and job_names = (C1, D1) we would generate events satisfying the following deps: B1->C1, C2->D1. Args: job_names: The set of job names whose external deps are to be satisfied by the generated events. """ input_prefixes = set() for job_name in job_names: job = self._jobs[job_name] for job_input in job.inputs: if job_input not in job_names: name = Name(workflow=self._workflow, instance=self._instance, job=job_name, input_name=job_input, event='poison_%d' % len(input_prefixes)) input_prefix = name.get_input_prefix() if input_prefix not in input_prefixes: input_prefixes.add(input_prefix) event_token_name = name.get_event_token_name() if not event_token_name in self._existing_events: self._new_events[ name.get_event_token_name()] = Event( 'analyzer')
def get_workflow_tokens(self, workflow): # TODO(pawel): add workflow connectivity check. job_configs = {} top_level_job_names = [] job_names = self._repository.get_job_names(workflow) for job_name in job_names: job_config = self._repository.get_job(workflow, job_name) job_configs[job_name] = job_config if not job_config.parents: top_level_job_names.append(job_name) job_config.parents = [Name.WORKFLOW_START_INPUT] job_outputs = collections.defaultdict(list) for job_config in job_configs.values(): for parent_job_name in job_config.parents: job_outputs[parent_job_name].append(job_config.job) result = [] instance = get_unique_workflow_instance() # Convert job configs to job tokens. for job_config in job_configs.values(): token = RepositoryConfigParser._job_config_to_token( workflow, instance, job_config, job_outputs[job_config.job]) result.append(token) # Create triggering events for top-level jobs. for job_name in top_level_job_names: event = Event(creator='repository_config_parser') event_name = Name(workflow=workflow, instance=instance, job=job_name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append( Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result