def create_restart_job_workflow() -> Workflow: workflow = Workflow() for i in range(2): job = TestScheduler.create_periodic_job(i, 4) workflow.add_job(job) job = TestScheduler.create_periodic_job(2, 20) job.job_config.periodic_config = None workflow.add_job(job) deps = [] deps.append( JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value, action=TaskAction.RESTART))) deps.append( JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value, action=TaskAction.RESTART))) workflow.add_edges("2_job", deps) workflow.workflow_id = 1 return workflow
def create_bash_workflow() -> Workflow: workflow = Workflow() workflow.project_desc = ProjectDesc() workflow.project_desc.project_name = "workflow_1" for i in range(3): job = TestDAGGenerator.create_bash_job(i) workflow.add_job(job) deps_1 = [ JobControlEdge(target_node_id='', source_node_id='1_job', met_config=MetConfig( event_key='key_1', event_value='value_1', event_type=UNDEFINED_EVENT_TYPE)) ] deps_2 = [ JobControlEdge(target_node_id='', source_node_id='2_job', met_config=MetConfig( event_key='key_2', event_value='value_2', event_type=UNDEFINED_EVENT_TYPE)) ] workflow.add_edges("1_job", deps_1) workflow.add_edges("2_job", deps_2) workflow.workflow_id = 1 return workflow
def create_stream_workflow() -> Workflow: workflow = Workflow() for i in range(3): job = TestScheduler.create_job(i, 5) workflow.add_job(job) deps = [] deps.append( JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value))) deps.append( JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value))) workflow.add_edges("2_job", deps) workflow.workflow_id = 1 job = TestScheduler.create_job(3, 1) workflow.add_job(job) dep2 = [] dep2.append( JobControlEdge(target_node_id='0_job', source_node_id='3_job', met_config=MetConfig(event_key='key1', event_value='value1'))) workflow.add_edges('3_job', dep2) return workflow
def create_workflow() -> Workflow: workflow = Workflow() workflow.project_desc = ProjectDesc() workflow.project_desc.project_name = "workflow_1" for i in range(6): job = TestDAGGenerator.create_dummy_job(i) if i == 2: job.job_config = SendEventJobConfig('localhost:50051', 'key_1', 'value_1', UNDEFINED_EVENT_TYPE) elif i == 3: job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', UNDEFINED_EVENT_TYPE) elif i == 5: job.job_config = SendEventJobConfig('localhost:50051', 'key_2', 'value_2', "STOP_SCHEDULER_CMD") workflow.add_job(job) dependencies = [ JobControlEdge(target_node_id='0_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('0_job'), event_value=State.FINISHED.value)), JobControlEdge(target_node_id='1_job', source_node_id='2_job', met_config=MetConfig( event_key=generate_job_status_key('1_job'), event_value=State.FINISHED.value)) ] workflow.add_edges("2_job", dependencies) dependencies = [ JobControlEdge(target_node_id='2_job', source_node_id='4_job', met_config=MetConfig( event_key='key_1', event_value='value_1', event_type=UNDEFINED_EVENT_TYPE)), JobControlEdge(target_node_id='3_job', source_node_id='4_job', met_config=MetConfig( event_key='key_2', event_value='value_2', event_type=UNDEFINED_EVENT_TYPE)) ] workflow.add_edges("4_job", dependencies) dependencies = [ JobControlEdge(target_node_id='4_job', source_node_id='5_job', met_config=MetConfig( event_key=generate_job_status_key('5_job'), event_value=State.FINISHED.value)) ] workflow.add_edges("5_job", dependencies) workflow.workflow_id = 1 return workflow
def build_workflow(self, split_graph: SplitGraph, project_context: ProjectContext) -> Workflow: workflow = Workflow() workflow.workflow_config = current_workflow_config() workflow.workflow_snapshot_id = '{}.{}.{}'.format( project_context.project_name, workflow.workflow_name, round(time.time() * 1000)) # add ai_nodes to workflow for sub in split_graph.nodes.values(): if sub.config.job_type not in self.job_generator_registry.object_dict: raise Exception("job generator not support job_type {}".format( sub.config.job_type)) generator: JobGenerator = self.job_generator_registry \ .get_object(sub.config.job_type) # set job resource dir job_resource_dir = os.path.join( project_context.get_generated_path(), workflow.workflow_snapshot_id, sub.config.job_name) if not os.path.exists(job_resource_dir): os.makedirs(job_resource_dir) job: Job = generator.generate(sub_graph=sub, resource_dir=job_resource_dir) job.resource_dir = job_resource_dir # set input output dataset for node in sub.nodes.values(): if isinstance(node, ReadDatasetNode): job.input_dataset_list.append(node.dataset()) elif isinstance(node, WriteDatasetNode): job.output_dataset_list.append(node.dataset()) workflow.add_job(job) def validate_edge(head, tail): if head not in workflow.jobs: raise Exception( 'job: {} is not defined in workflow!'.format(head)) if tail is not None and tail != '' and tail != '*' and tail not in workflow.jobs: raise Exception( 'job: {} is not defined in workflow!'.format(tail)) # add edges to workflow for edges in split_graph.edges.values(): for e in edges: control_edge = copy.deepcopy(e) validate_edge(control_edge.destination, control_edge.source) workflow.add_edge(control_edge.destination, control_edge) return workflow
def create_longrun_workflow() -> Workflow: workflow = Workflow() job_0 = TestScheduler.create_job(0, 10000) workflow.add_job(job_0) job_1 = TestScheduler.create_job(1, 1) workflow.add_job(job_1) deps = [] deps.append( JobControlEdge(target_node_id='1_job', source_node_id='0_job', met_config=MetConfig(event_key='key_1', event_value='value_1', event_type='stop', action=TaskAction.STOP))) workflow.add_edges("0_job", deps) workflow.workflow_id = 1 return workflow
def build_workflow(self, split_graph: SplitGraph, project_desc: ProjectDesc) -> Workflow: sub_id_to_job_id = {} workflow = Workflow() workflow.project_desc = project_desc # add ai_nodes to workflow for sub in split_graph.nodes.values(): if (sub.config.platform, sub.config.engine ) not in self.job_generator_registry.object_dict: raise Exception( "job generator not support platform {} engine {}".format( sub.config.platform, sub.config.engine)) generator: BaseJobGenerator = self.job_generator_registry \ .get_object((sub.config.platform, sub.config.engine)) job: BaseJob = generator.generate(sub_graph=sub, project_desc=project_desc) job.job_config.project_desc = project_desc if job.job_name is None: job.job_name = job.job_config.job_name workflow.add_job(job) sub_id_to_job_id[sub.instance_id] = job.instance_id # add edges to workflow for edges in split_graph.edges.values(): for e in edges: control_edge = copy.deepcopy(e) control_edge.source_node_id = sub_id_to_job_id[ e.source_node_id] control_edge.target_node_id = sub_id_to_job_id[ e.target_node_id] job_edge: JobControlEdge = control_edge_to_job_edge( control_edge=control_edge) workflow.add_edge(sub_id_to_job_id[e.source_node_id], job_edge) self.register_workflow_execution(workflow) for job in workflow.jobs.values(): job.job_config.project_path = project_desc.project_path if job.job_config.exec_mode is None: job.job_config.exec_mode = ExecutionMode.BATCH generator: BaseJobGenerator = self.job_generator_registry \ .get_object((job.platform, job.exec_engine)) generator.generate_job_resource(job) return workflow
def test_workflow_serde(self): workflow_config_file = os.path.join(os.path.dirname(__file__), 'workflow_1.yaml') workflow_config = load_workflow_config(workflow_config_file) workflow = Workflow() workflow.workflow_config = workflow_config jobs = [] for job_config in workflow_config.job_configs.values(): job = Job(job_config=job_config) workflow.add_job(job) jobs.append(job) edge = ControlEdge(destination=jobs[0].job_name, scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('a', 'a'), JobAction.START)) workflow.add_edge(jobs[0].job_name, edge) edge = ControlEdge(destination=jobs[0].job_name, scheduling_rule=SchedulingRule(MeetAnyEventCondition().add_event('b', 'b'), JobAction.START)) workflow.add_edge(jobs[0].job_name, edge) json_text = json_utils.dumps(workflow) w: Workflow = json_utils.loads(json_text) self.assertEqual(3, len(w.jobs)) self.assertEqual(2, len(w.edges.get(jobs[0].job_name)))
def create_workflow_one_job() -> Workflow: workflow = Workflow() workflow.workflow_id = 1 job = TestScheduler.create_job(0, 1) workflow.add_job(job) return workflow