def test_list_workflows(self): with mock.patch( 'ai_flow.test.scheduler.test_scheduling_service.MockScheduler' ) as mockScheduler: instance = mockScheduler.return_value self.server.scheduling_service._scheduler = instance instance.list_workflows.return_value = [ WorkflowInfo(workflow_name='test_workflow_1'), WorkflowInfo(workflow_name='test_workflow_2') ] client = SchedulingClient("localhost:{}".format(_PORT)) workflow_list = client.list_workflows(namespace='namespace') self.assertTrue(2, len(workflow_list))
def list_jobs(self, execution_id: Text) -> List[JobInfo]: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.run_id == execution_id).first() if dag_run is None: return None task_list = session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date).all() if task_list is None: return [] else: result = [] project_name, workflow_name = self.dag_id_to_namespace_workflow( dag_run.dag_id) for task in task_list: job = JobInfo( job_name=task.task_id, state=self.airflow_state_to_state(task.state), workflow_execution=WorkflowExecutionInfo( workflow_info=WorkflowInfo( namespace=project_name, workflow_name=workflow_name), execution_id=dag_run.run_id, state=self.airflow_state_to_state(dag_run.state))) result.append(job) return result
def restart_job(self, job_name: Text, execution_id: Text) -> Optional[JobInfo]: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.run_id == execution_id).first() if dag_run is None: return None if dag_run.state != State.RUNNING: raise Exception( 'execution: {} state: {} can not trigger job.'.format( execution_id, dag_run.state)) task = dag_run.get_task_instance(job_name, session) if task is None: return None self.airflow_client.schedule_task( dag_id=dag_run.dag_id, task_id=job_name, action=SchedulingAction.RESTART, context=ExecutionContext(dagrun_id=dag_run.run_id)) project_name, workflow_name = self.dag_id_to_namespace_workflow( dag_run.dag_id) return JobInfo( job_name=job_name, state=self.airflow_state_to_state(task.state), workflow_execution=WorkflowExecutionInfo( workflow_info=WorkflowInfo(namespace=project_name, workflow_name=workflow_name), execution_id=dag_run.run_id, state=self.airflow_state_to_state(dag_run.state)))
def delete_workflow(self, project_name: Text, workflow_name: Text) -> Optional[WorkflowInfo]: dag_id = self.airflow_dag_id(project_name, workflow_name) if not self.dag_exist(dag_id): return None deploy_path = self.config.properties().get('airflow_deploy_path') if deploy_path is None: raise Exception("airflow_deploy_path config not set!") airflow_file_path = os.path.join(deploy_path, dag_id + '.py') if os.path.exists(airflow_file_path): os.remove(airflow_file_path) # stop all workflow executions self.kill_all_workflow_execution(project_name, workflow_name) # clean db meta with create_session() as session: dag = session.query(DagModel).filter( DagModel.dag_id == dag_id).first() session.query(DagTag).filter(DagTag.dag_id == dag_id).delete() session.query(DagModel).filter(DagModel.dag_id == dag_id).delete() session.query(DagCode).filter( DagCode.fileloc_hash == DagCode.dag_fileloc_hash( dag.fileloc)).delete() session.query(SerializedDagModel).filter( SerializedDagModel.dag_id == dag_id).delete() session.query(DagRun).filter(DagRun.dag_id == dag_id).delete() session.query(TaskState).filter( TaskState.dag_id == dag_id).delete() session.query(TaskInstance).filter( TaskInstance.dag_id == dag_id).delete() session.query(TaskExecution).filter( TaskExecution.dag_id == dag_id).delete() return WorkflowInfo(namespace=project_name, workflow_name=workflow_name)
def submit_workflow(self, workflow: Workflow, project_desc: ProjectDesc, args: Dict = None) -> WorkflowInfo: workflow_name = workflow.workflow_name dag_id = self.airflow_dag_id(project_desc.project_name, workflow.workflow_name) code_text = self.dag_generator.generator(workflow, dag_id, args) workflow.workflow_name = workflow_name deploy_path = self.config.properties().get('airflow_deploy_path') if deploy_path is None: raise Exception("airflow_deploy_path config not set!") if not os.path.exists(deploy_path): os.makedirs(deploy_path) airflow_file_path = os.path.join(deploy_path, dag_id + '.py') if os.path.exists(airflow_file_path): os.remove(airflow_file_path) with NamedTemporaryFile(mode='w+t', prefix=dag_id, suffix='.py', dir='/tmp', delete=False) as f: f.write(code_text) os.rename(f.name, airflow_file_path) self.airflow_client.trigger_parse_dag(airflow_file_path) return WorkflowInfo(namespace=project_desc.project_name, workflow_name=workflow.workflow_name)
def get_workflow(self, project_name: Text, workflow_name: Text) -> Optional[WorkflowInfo]: dag_id = self.airflow_dag_id(project_name, workflow_name) with create_session() as session: dag = session.query(DagModel).filter( DagModel.dag_id == dag_id).first() if dag is None: return None else: return WorkflowInfo(namespace=project_name, workflow_name=workflow_name)
def test_get_workflow(self): with mock.patch( 'ai_flow.test.scheduler.test_scheduling_service.MockScheduler' ) as mockScheduler: instance = mockScheduler.return_value self.server.scheduling_service._scheduler = instance instance.get_workflow.return_value = WorkflowInfo( workflow_name='test_workflow') client = SchedulingClient("localhost:{}".format(_PORT)) workflow = client.get_workflow(namespace='namespace', workflow_name='test_workflow') self.assertTrue('test_workflow', workflow.name)
def start_new_workflow_execution( self, project_name: Text, workflow_name: Text) -> Optional[WorkflowExecutionInfo]: dag_id = self.airflow_dag_id(project_name, workflow_name) deploy_path = self.config.properties().get('airflow_deploy_path') if deploy_path is None: raise Exception("airflow_deploy_path config not set!") if not self.dag_exist(dag_id): return None context: ExecutionContext = self.airflow_client.schedule_dag(dag_id) return WorkflowExecutionInfo(workflow_info=WorkflowInfo( namespace=project_name, workflow_name=workflow_name), execution_id=context.dagrun_id, state=job_meta.State.INIT)
def get_workflow_execution( self, execution_id: Text) -> Optional[WorkflowExecutionInfo]: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.run_id == execution_id).first() if dag_run is None: return None else: state = self.airflow_state_to_state(dag_run.state) project_name, workflow_name = self.dag_id_to_namespace_workflow( dag_run.dag_id) return WorkflowExecutionInfo(workflow_info=WorkflowInfo( namespace=project_name, workflow_name=workflow_name), execution_id=dag_run.run_id, state=state)
def list_workflows(self, project_name: Text) -> List[WorkflowInfo]: with create_session() as session: dag_list = session.query(DagModel).filter( DagModel.dag_id.startswith('{}.'.format(project_name))).all() if dag_list is None: return [] else: result = [] for dag in dag_list: ns, workflow_name = self.parse_namespace_workflow_name( dag.dag_id) result.append( WorkflowInfo(namespace=project_name, workflow_name=workflow_name)) return result
def kill_workflow_execution( self, execution_id: Text) -> Optional[WorkflowExecutionInfo]: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.run_id == execution_id).first() if dag_run is None: return None project_name, workflow_name = self.dag_id_to_namespace_workflow( dag_run.dag_id) context: ExecutionContext = ExecutionContext(execution_id) current_context = self.airflow_client.stop_dag_run( dag_run.dag_id, context) return WorkflowExecutionInfo( workflow_info=WorkflowInfo(namespace=project_name, workflow_name=workflow_name), execution_id=current_context.dagrun_id, state=job_meta.State.KILLING)
def list_workflow_executions( self, project_name: Text, workflow_name: Text) -> List[WorkflowExecutionInfo]: dag_id = self.airflow_dag_id(project_name, workflow_name) with create_session() as session: dagrun_list = session.query(DagRun).filter( DagRun.dag_id == dag_id).all() if dagrun_list is None: return [] else: result = [] for dagrun in dagrun_list: state = self.airflow_state_to_state(dagrun.state) result.append( WorkflowExecutionInfo(workflow_info=WorkflowInfo( namespace=project_name, workflow_name=workflow_name), execution_id=dagrun.run_id, state=state)) return result
def get_job(self, job_name: Text, execution_id: Text) -> Optional[JobInfo]: with create_session() as session: dag_run = session.query(DagRun).filter( DagRun.run_id == execution_id).first() if dag_run is None: return None task = session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date, TaskInstance.task_id == job_name).first() if task is None: return None else: project_name, workflow_name = self.dag_id_to_namespace_workflow( dag_run.dag_id) return JobInfo(job_name=job_name, state=self.airflow_state_to_state(task.state), workflow_execution=WorkflowExecutionInfo( workflow_info=WorkflowInfo( namespace=project_name, workflow_name=workflow_name), execution_id=dag_run.run_id, state=self.airflow_state_to_state( dag_run.state)))
def proto_to_workflow(proto: WorkflowProto) -> WorkflowInfo: if proto is None: return None else: return WorkflowInfo(namespace=proto.namespace, workflow_name=proto.name)
def resume_workflow_scheduling(self, project_name: Text, workflow_name: Text) -> WorkflowInfo: dag_id = self.airflow_dag_id(project_name, workflow_name) DagModel.get_dagmodel(dag_id=dag_id).set_is_paused(is_paused=False) return WorkflowInfo(namespace=project_name, workflow_name=workflow_name)