def set_topological_dag_upstreams(dag, ops, op_runs, runs_by_ops): """Set the upstream runs for the operation runs in the dag following the topological sort.""" sorted_ops = dags.sort_topologically(dag=dag) for op_id in sorted_ops: op_run_id = runs_by_ops[op_id] op_run = op_runs[op_run_id] set_op_upstreams(op_run=op_run, op=ops[op_id])
def pipelines_start(self: 'celery_app.task', pipeline_run_id: int) -> None: pipeline_run = get_valid_pipeline_run(pipeline_run_id=pipeline_run_id) if not pipeline_run: _logger.info('Pipeline `%s` does not exist any more.', pipeline_run_id) pipeline_run.on_schedule() dag, op_runs = pipeline_run.dag sorted_ops = dags.sort_topologically(dag=dag) op_runs_to_start = [ op_runs[op_run_id] for op_run_id in sorted_ops if op_runs[op_run_id].last_status == OperationStatuses.CREATED ] concurrency = pipeline_run.pipeline.n_operation_runs_to_start future_check = False while op_runs_to_start and concurrency > 0: op_run = op_runs_to_start.pop() if start_operation_run(op_run): # If we end up here it means that the task future_check = True else: concurrency -= 1 if op_runs_to_start or future_check: # Schedule another task self.retry(countdown=Intervals.PIPELINES_SCHEDULER)
def test_sort_topologically(self): assert dags.sort_topologically(self.dag1) == [1, 5, 6, 2, 4] assert dags.sort_topologically(self.dag2) == [1, 5, 6, 9, 2, 7, 3] assert dags.sort_topologically(self.dag3) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] assert dags.sort_topologically(self.dag4) == [0, 7, 1, 2, 3, 5] with self.assertRaises(ValueError): # Cycles assert dags.sort_topologically(self.cycle1) with self.assertRaises(ValueError): # Cycles assert dags.sort_topologically(self.cycle2)
def pipelines_start(self, pipeline_run_id): pipeline_run = get_pipeline_run(pipeline_run_id=pipeline_run_id) if not pipeline_run: _logger.info('Pipeline `%s` does not exist any more.', pipeline_run_id) pipeline_run.on_schedule() dag, op_runs = pipeline_run.dag sorted_ops = dags.sort_topologically(dag=dag) op_runs_to_start = [op_runs[op_run_id] for op_run_id in sorted_ops if op_runs[op_run_id].last_status == OperationStatuses.CREATED] concurrency = pipeline_run.pipeline.n_operation_runs_to_start future_check = False while op_runs_to_start and concurrency > 0: op_run = op_runs_to_start.pop() if op_run.schedule_start(): # If we end up here it means that the task future_check = True else: concurrency -= 1 if op_runs_to_start or future_check: # Schedule another task self.retry(countdown=Intervals.PIPELINES_SCHEDULER)