def get_job_token(self, workflow_name, workflow_instance): """Convert the job to a pinball token representing its properties. Condition is similar to job. We check the template to decide use job_templates or condition_templates dynamically. Args: workflow_name: The name of the workflow in which context this job is instantiated. workflow_instance: The workflow instance of the output job token. Returns: A pinball token representing the job. """ if self.workflow.name == workflow_name: inputs = [input_job.name for input_job in self.inputs] else: # If it's an external job, do not include its inputs. inputs = [] if not inputs: inputs = [Name.WORKFLOW_START_INPUT] outputs = [] for output_job in self.outputs: if output_job.workflow.name == workflow_name: outputs.append(output_job.name) if issubclass(self.template.__class__, JobTemplateBase): params = self._get_template_params() job = self.template.get_pinball_job(inputs, outputs, params) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(job)) result.priority = self.compute_score() elif issubclass(self.template.__class__, ConditionTemplateBase): condition = self.template.get_pinball_condition(outputs) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(condition)) else: raise Exception( "Template must be a subclass of JobTemplateBase or ConditionTemplateBase!" ) return result
def _get_job_tokens(self, workflow=None, instance=None, job_state=None, job=None): """Extract job tokens from the store. Args: workflow: The name of the workflow whose jobs we are interested in. instance: The name of the instance whose jobs we are interested in. job_state: The state of the jobs we are interested in. job: The name of the job we are interested in. Returns: List of jobs matching the specification. """ name = Name(workflow=workflow, instance=instance, job_state=job_state, job=job) if name.job: prefix = name.get_job_token_name() elif name.job_state: prefix = name.get_job_state_prefix() elif name.instance: prefix = name.get_job_prefix() elif name.workflow: prefix = name.get_workflow_prefix() else: prefix = '' tokens = self._store.read_tokens(name_prefix=prefix) result = [] for token in tokens: token_name = Name.from_job_token_name(token.name) if token_name.get_job_token_name(): # This is a job token. if not job or job == token_name.job: # We matched the prefix so if we are looking for a specific # job, its names must match exactly. result.append(token) return result
def get_job_token(self, workflow_name, workflow_instance): """Convert the job to a pinball token representing its properties. Condition is similar to job. We check the template to decide use job_templates or condition_templates dynamically. Args: workflow_name: The name of the workflow in which context this job is instantiated. workflow_instance: The workflow instance of the output job token. Returns: A pinball token representing the job. """ if self.workflow.name == workflow_name: inputs = [input_job.name for input_job in self.inputs] else: # If it's an external job, do not include its inputs. inputs = [] if not inputs: inputs = [Name.WORKFLOW_START_INPUT] outputs = [] for output_job in self.outputs: if output_job.workflow.name == workflow_name: outputs.append(output_job.name) if issubclass(self.template.__class__, JobTemplateBase): params = self._get_template_params() job = self.template.get_pinball_job(inputs, outputs, params) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(job)) result.priority = self.compute_score() elif issubclass(self.template.__class__, ConditionTemplateBase): condition = self.template.get_pinball_condition(outputs) name = Name(workflow=workflow_name, instance=workflow_instance, job_state=Name.WAITING_STATE, job=self.name) result = Token(name=name.get_job_token_name(), data=pickle.dumps(condition)) else: raise Exception("Template must be a subclass of JobTemplateBase or ConditionTemplateBase!") return result
def _get_child_job_token(self): name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job') job = ShellJob(name=name.job, inputs=['parent_job'], outputs=[], command='echo child', emails=['*****@*****.**']) return Token(name=name.get_job_token_name(), data=pickle.dumps(job))
def _get_child_job_token(self): name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='child_job') job = ShellJob(name=name.job, inputs=['parent_job'], outputs=[], command='echo child', emails=['*****@*****.**']) return Token(name=name.get_job_token_name(), data=pickle.dumps(job))
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2**(AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_%d_%d' % (level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [ Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime' ] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = [ 'job_%d_%d' % (level - 1, 2 * job_index), 'job_%d_%d' % (level - 1, 2 * job_index + 1) ] event_name.input = 'job_%d_%d' % (level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_%d_%d' % (level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def _post_job_tokens(self): """Add some job tokens to the master.""" request = ModifyRequest(updates=[]) name = Name(workflow='some_workflow', instance='12345') for job_id in range(0, 2): if job_id % 2 == 0: name.job_state = Name.WAITING_STATE else: name.job_state = Name.RUNNABLE_STATE name.job = 'some_job_%d' % job_id job_token = Token(name=name.get_job_token_name()) request.updates.append(job_token) client = self._factory.get_client() client.modify(request)
def _add_active_workflow_tokens(self): """Add some active workflow tokens. The job dependencies form a complete binary tree turned upside down. I.e., each job has two parents. """ self._store = EphemeralStore() version = 1 for level in range(AnalyzerTestCase._NUM_LEVELS): jobs_at_level = 2 ** (AnalyzerTestCase._NUM_LEVELS - level - 1) for job_index in range(jobs_at_level): job_name = 'job_{0:d}_{1:d}'.format(level, job_index) event_name = Name(workflow='some_workflow', instance='123', job=job_name, event='some_event') if level == 0: inputs = [Name.WORKFLOW_START_INPUT, Name.WORKFLOW_START_INPUT + '_prime'] event_name.input = Name.WORKFLOW_START_INPUT else: inputs = ['job_{0:d}_{1:d}'.format(level - 1, 2 * job_index), 'job_{0:d}_{1:d}'.format(level - 1, 2 * job_index + 1)] event_name.input = 'job_{0:d}_{1:d}'.format(level - 1, 2 * job_index) if level == AnalyzerTestCase._NUM_LEVELS - 1: outputs = [] else: outputs = ['job_{0:d}_{1:d}'.format(level + 1, job_index / 2)] job = ShellJob(name=job_name, inputs=inputs, outputs=outputs, command='some_command') job.history.append(ExecutionRecord()) name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job=job_name) job_token = Token(version=version, name=name.get_job_token_name(), priority=10, data=pickle.dumps(job)) version += 1 event = Event('some_event') event_token = Token(version=version, name=event_name.get_event_token_name(), priority=10, data=pickle.dumps(event)) self._store.commit_tokens([job_token, event_token])
def get_tokens(self): """Export all internally stored tokens. Returns: The list of tokens after all transformations performed by the analyzer. """ result = [] for job in self._jobs.values(): name = Name(workflow=self._workflow, instance=self._instance, job_state=Name.WAITING_STATE, job=job.name) data = pickle.dumps(job) token = Token(name=name.get_job_token_name(), priority=self._job_priorities[job.name], data=data) result.append(token) result.extend(self.get_new_event_tokens()) return result
def test_move_job_token_to_runnable(self): self._post_job_tokens() self._post_workflow_start_event_token() job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job') job_token = self._get_token(job_name.get_job_token_name()) event_name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event_token = self._get_token(event_name.get_event_token_name()) self._worker._move_job_token_to_runnable(job_token, [event_token]) # Event token should have been removed and the parent job should be # runnable. self._verify_parent_job_runnable()
def test_move_job_token_to_runnable(self): self._post_job_tokens() self._post_workflow_start_event_token() job_name = Name(workflow='some_workflow', instance='12345', job_state=Name.WAITING_STATE, job='parent_job') job_token = self._get_token(job_name.get_job_token_name()) event_name = Name(workflow='some_workflow', instance='12345', job='parent_job', input_name=Name.WORKFLOW_START_INPUT, event='workflow_start_event') event_token = self._get_token(event_name.get_event_token_name()) self._worker._move_job_token_to_runnable(job_token, [event_token]) # Event token should have been removed and the parent job should be # runnable. self._verify_parent_job_runnable()
def get_tokens(self): """Export all internally stored tokens. Returns: The list of tokens after all transformations performed by the analyzer. """ result = [] for job in self._jobs.values(): name = Name(workflow=self._workflow, instance=self._instance, job_state=Name.WAITING_STATE, job=job.name) data = pickle.dumps(job) token = Token(name=name.get_job_token_name(), priority=self._job_priorities[job.name], data=data) result.append(token) result.extend(self.get_new_event_tokens()) return result
def _generate_job_token(workflow, instance, job, executions, max_jobs): if job == 0: inputs = [Name.WORKFLOW_START_INPUT] else: inputs = ['job_%d' % (job - 1)] if job == max_jobs - 1: outputs = [] else: outputs = ['job_%d' % (job + 1)] shell_job = ShellJob(name='job_%d' % job, inputs=inputs, outputs=outputs, command='some command %d' % job) for e in range(0, executions): start_time = 1000000 * workflow + 10000 * instance + 100 * job + e + 1 end_time = start_time + 10 * e + 1 DIR = '/tmp/pinball/logs' if not os.path.exists(DIR): os.makedirs(DIR) LOG_PATTERN = '%s/%%s.%%d.%%s' % DIR info_log_file = LOG_PATTERN % (job, start_time, 'info') with open(info_log_file, 'w') as f: f.write('some info log of execution %d' % e) error_log_file = LOG_PATTERN % (job, start_time, 'error') with open(error_log_file, 'w') as f: f.write('some error log of execution %d' % e) record = ExecutionRecord(info='some_command %d some_args %d' % (e, e), instance='instance_%d' % instance, start_time=start_time, end_time=end_time, exit_code=(workflow + instance + e) % 2, logs={ 'info': info_log_file, 'error': error_log_file }) shell_job.history.append(record) name = Name(workflow='workflow_%d' % workflow, instance='instance_%d' % instance, job_state=Name.WAITING_STATE, job='job_%d' % job) return Token(name=name.get_job_token_name(), version=1000000 * workflow + 10000 * instance + 100 * job, priority=job, data=pickle.dumps(shell_job))
def test_run(self, load_path_mock): config_parser = mock.Mock() def load_path(params): self.assertEqual([PARSER_CALLER_KEY], params.keys()) return config_parser load_path_mock.return_value = load_path name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job='some_job') config_parser.get_workflow_tokens.return_value = [ Token(name=name.get_job_token_name())] schedule = WorkflowSchedule(workflow='some_workflow') store = EphemeralStore() emailer = Emailer('some_host', '8080') request = schedule.run(emailer, store) self.assertEqual(1, len(request.updates))
def test_run(self, load_path_mock): config_parser = mock.Mock() load_path_mock.return_value = config_parser name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job='some_job') config_parser.get_workflow_tokens.return_value = [ Token(name=name.get_job_token_name()) ] schedule = WorkflowSchedule(workflow='some_workflow') store = EphemeralStore() emailer = Emailer('some_host', '8080') request = schedule.run(emailer, store) self.assertEqual(load_path_mock.call_args_list, [ mock.call('pinball_ext.workflow.parser.PyWorkflowParser', {}, 'schedule') ]) self.assertEqual(1, len(request.updates))
def _generate_job_token(workflow, instance, job, executions, max_jobs): if job == 0: inputs = [Name.WORKFLOW_START_INPUT] else: inputs = ['job_%d' % (job - 1)] if job == max_jobs - 1: outputs = [] else: outputs = ['job_%d' % (job + 1)] shell_job = ShellJob(name='job_%d' % job, inputs=inputs, outputs=outputs, command='some command %d' % job) for e in range(0, executions): start_time = 1000000 * workflow + 10000 * instance + 100 * job + e + 1 end_time = start_time + 10 * e + 1 DIR = '/tmp/pinball/logs' if not os.path.exists(DIR): os.makedirs(DIR) LOG_PATTERN = '%s/%%s.%%d.%%s' % DIR info_log_file = LOG_PATTERN % (job, start_time, 'info') with open(info_log_file, 'w') as f: f.write('some info log of execution %d' % e) error_log_file = LOG_PATTERN % (job, start_time, 'error') with open(error_log_file, 'w') as f: f.write('some error log of execution %d' % e) record = ExecutionRecord( info='some_command %d some_args %d' % (e, e), instance='instance_%d' % instance, start_time=start_time, end_time=end_time, exit_code=(workflow + instance + e) % 2, logs={'info': info_log_file, 'error': error_log_file}) shell_job.history.append(record) name = Name(workflow='workflow_%d' % workflow, instance='instance_%d' % instance, job_state=Name.WAITING_STATE, job='job_%d' % job) return Token(name=name.get_job_token_name(), version=1000000 * workflow + 10000 * instance + 100 * job, priority=job, data=pickle.dumps(shell_job))
def _get_job_tokens(self, workflow=None, instance=None, job_state=None, job=None): """Extract job tokens from the store. Args: workflow: The name of the workflow whose jobs we are interested in. instance: The name of the instance whose jobs we are interested in. job_state: The state of the jobs we are interested in. job: The name of the job we are interested in. Returns: List of jobs matching the specification. """ name = Name(workflow=workflow, instance=instance, job_state=job_state, job=job) if name.job: prefix = name.get_job_token_name() elif name.job_state: prefix = name.get_job_state_prefix() elif name.instance: prefix = name.get_job_prefix() elif name.workflow: prefix = name.get_workflow_prefix() else: prefix = '' tokens = self._store.read_tokens(name_prefix=prefix) result = [] for token in tokens: token_name = Name.from_job_token_name(token.name) if token_name.get_job_token_name(): # This is a job token. if not job or job == token_name.job: # We matched the prefix so if we are looking for a specific # job, its names must match exactly. result.append(token) return result
def _job_config_to_token(workflow, instance, job_config, job_outputs): """Create a job token from a job config. Args: workflow: The workflow name. instance: The workflow instance. job_config: The job config to create token from. job_outputs: The names of the job outputs. Returns: Job token constructed from the job config. """ if job_config.is_condition: job = RepositoryConfigParser._condition_config_to_condition( job_config, job_outputs) else: job = RepositoryConfigParser._job_config_to_job(job_config, job_outputs) name = Name(workflow=workflow, instance=instance, job_state=Name.WAITING_STATE, job=job_config.job) job_token = Token(name=name.get_job_token_name(), data=pickle.dumps(job)) return job_token
def test_run(self, load_path_mock): config_parser = mock.Mock() def load_path(params): self.assertEqual([], params.keys()) return config_parser load_path_mock.return_value = load_path name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job='some_job') config_parser.get_workflow_tokens.return_value = [ Token(name=name.get_job_token_name()) ] schedule = WorkflowSchedule(workflow='some_workflow') store = EphemeralStore() emailer = Emailer('some_host', '8080') request = schedule.run(emailer, store) self.assertEqual(1, len(request.updates))
def test_run(self, load_path_mock): config_parser = mock.Mock() load_path_mock.return_value = config_parser name = Name(workflow='some_workflow', instance='123', job_state=Name.WAITING_STATE, job='some_job') config_parser.get_workflow_tokens.return_value = [ Token(name=name.get_job_token_name())] schedule = WorkflowSchedule(workflow='some_workflow') store = EphemeralStore() emailer = Emailer('some_host', '8080') request = schedule.run(emailer, store) self.assertEqual( load_path_mock.call_args_list, [ mock.call('pinball_ext.workflow.parser.PyWorkflowParser', {}, 'schedule') ] ) self.assertEqual(1, len(request.updates))
def _job_config_to_token(workflow, instance, job_config, job_outputs): """Create a job token from a job config. Args: workflow: The workflow name. instance: The workflow instance. job_config: The job config to create token from. job_outputs: The names of the job outputs. Returns: Job token constructed from the job config. """ if job_config.is_condition: job = RepositoryConfigParser._condition_config_to_condition( job_config, job_outputs) else: job = RepositoryConfigParser._job_config_to_job( job_config, job_outputs) name = Name(workflow=workflow, instance=instance, job_state=Name.WAITING_STATE, job=job_config.job) job_token = Token(name=name.get_job_token_name(), data=pickle.dumps(job)) return job_token