def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append(Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def _get_output_event_tokens(self, job): """Create output event tokens for the owned job token. Args: job: The job which output tokens should be generated. Returns: A list of event tokens corresponding to the outputs of the owned job token. """ assert self._owned_job_token job_name = Name.from_job_token_name(self._owned_job_token.name) output_name = Name() output_name.workflow = job_name.workflow output_name.instance = job_name.instance output_name.input = job_name.job event_tokens = [] for output in job.outputs: output_name.job = output output_name.event = get_unique_name() event = Event(creator=self._name) assert job.history execution_record = job.history[-1] event.attributes = execution_record.get_event_attributes() event_tokens.append( Token(name=output_name.get_event_token_name(), data=pickle.dumps(event))) return event_tokens
def get_workflow_tokens(self): """Create Pinball tokens representing a workflow instance. Convert workflow jobs to tokens and create event tokens in inputs of top-level jobs. Returns: A list of job and event tokens representing a workflow instance. """ all_jobs = self._get_transitive_deps() instance = get_unique_workflow_instance() result = [] for job in all_jobs: result.append(job.get_job_token(self.name, instance)) top_level_jobs = self._get_top_level_jobs() for job in top_level_jobs: event = Event(creator='parser') event_name = Name(workflow=self.name, instance=instance, job=job.name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append(Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result
def get_workflow_tokens(self): """Create Pinball tokens representing a workflow instance. Convert workflow jobs to tokens and create event tokens in inputs of top-level jobs. Returns: A list of job and event tokens representing a workflow instance. """ all_jobs = self._get_transitive_deps() instance = get_unique_workflow_instance() result = [] for job in all_jobs: result.append(job.get_job_token(self.name, instance)) top_level_jobs = self._get_top_level_jobs() for job in top_level_jobs: event = Event(creator='parser') event_name = Name(workflow=self.name, instance=instance, job=job.name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append( Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result
def _post_workflow_start_event_token(self): name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event = Event(creator='SimpleWorkflowTest') token = Token(name=name.get_event_token_name(), data=pickle.dumps(event)) request = ModifyRequest(updates=[token]) self._client.modify(request)
def _generate_missing_events(self, job_names): """Generate external events required to run all jobs in a set. For a set of jobs (a subset of all jobs in the workflow), produce events satisfying upstream dependencies external to that set. E.g., for job dependency structure like this: A1 A2 | / B1 B2 | C1 C2 | / D1 and job_names = (C1, D1) we would generate events satisfying the following deps: B1->C1, C2->D1. Args: job_names: The set of job names whose external deps are to be satisfied by the generated events. """ input_prefixes = set() for job_name in job_names: job = self._jobs[job_name] for job_input in job.inputs: if job_input not in job_names: name = Name(workflow=self._workflow, instance=self._instance, job=job_name, input_name=job_input, event='poison_%d' % len(input_prefixes)) input_prefix = name.get_input_prefix() if input_prefix not in input_prefixes: input_prefixes.add(input_prefix) event_token_name = name.get_event_token_name() if not event_token_name in self._existing_events: self._new_events[ name.get_event_token_name()] = Event( 'analyzer')
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2**(AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_%d_%d' % (level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [ Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime' ] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = [ 'job_%d_%d' % (level - 1, 2 * job_index), 'job_%d_%d' % (level - 1, 2 * job_index + 1) ] event_name.input = 'job_%d_%d' % (level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_%d_%d' % (level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def _post_event_tokens(self): """Add some event tokens to the master.""" request = ModifyRequest(updates=[]) name = Name(workflow='some_workflow', instance='12345') for job_id in range(0, 2): for input_id in range(0, 2): for event_id in range(0, 2): name.job = 'some_job_%d' % job_id name.input = 'some_input_%d' % input_id name.event = 'some_event_%d' % event_id event_token = Token(name=name.get_event_token_name()) request.updates.append(event_token) client = self._factory.get_client() client.modify(request)
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2 ** (AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_{0:d}_{1:d}'.format(level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime'] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = ['job_{0:d}_{1:d}'.format(level - 1, 2 * job_index), 'job_{0:d}_{1:d}'.format(level - 1, 2 * job_index + 1)] event_name.input = 'job_{0:d}_{1:d}'.format(level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_{0:d}_{1:d}'.format(level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def test_move_job_token_to_runnable(self): self._post_job_tokens() self._post_workflow_start_event_token() job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job') job_token = self._get_token(job_name.get_job_token_name()) event_name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event_token = self._get_token(event_name.get_event_token_name()) self._worker._move_job_token_to_runnable(job_token, [event_token]) # Event token should have been removed and the parent job should be # runnable. self._verify_parent_job_runnable()
def get_workflow_tokens(self, workflow): # TODO(pawel): add workflow connectivity check. job_configs = {} top_level_job_names = [] job_names = self._repository.get_job_names(workflow) for job_name in job_names: job_config = self._repository.get_job(workflow, job_name) job_configs[job_name] = job_config if not job_config.parents: top_level_job_names.append(job_name) job_config.parents = [Name.WORKFLOW_START_INPUT] job_outputs = collections.defaultdict(list) for job_config in job_configs.values(): for parent_job_name in job_config.parents: job_outputs[parent_job_name].append(job_config.job) result = [] instance = get_unique_workflow_instance() # Convert job configs to job tokens. for job_config in job_configs.values(): token = RepositoryConfigParser._job_config_to_token( workflow, instance, job_config, job_outputs[job_config.job]) result.append(token) # Create triggering events for top-level jobs. for job_name in top_level_job_names: event = Event(creator='repository_config_parser') event_name = Name(workflow=workflow, instance=instance, job=job_name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append(Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result
def get_workflow_tokens(self, workflow): # TODO(pawel): add workflow connectivity check. job_configs = {} top_level_job_names = [] job_names = self._repository.get_job_names(workflow) for job_name in job_names: job_config = self._repository.get_job(workflow, job_name) job_configs[job_name] = job_config if not job_config.parents: top_level_job_names.append(job_name) job_config.parents = [Name.WORKFLOW_START_INPUT] job_outputs = collections.defaultdict(list) for job_config in job_configs.values(): for parent_job_name in job_config.parents: job_outputs[parent_job_name].append(job_config.job) result = [] instance = get_unique_workflow_instance() # Convert job configs to job tokens. for job_config in job_configs.values(): token = RepositoryConfigParser._job_config_to_token( workflow, instance, job_config, job_outputs[job_config.job]) result.append(token) # Create triggering events for top-level jobs. for job_name in top_level_job_names: event = Event(creator='repository_config_parser') event_name = Name(workflow=workflow, instance=instance, job=job_name, input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') result.append( Token(name=event_name.get_event_token_name(), data=pickle.dumps(event))) return result