Beispiel #1
0
def set_topological_dag_upstreams(dag, ops, op_runs, runs_by_ops):
    """Set the upstream runs for the operation runs in the dag following the topological sort."""
    sorted_ops = dags.sort_topologically(dag=dag)
    for op_id in sorted_ops:
        op_run_id = runs_by_ops[op_id]
        op_run = op_runs[op_run_id]
        set_op_upstreams(op_run=op_run, op=ops[op_id])
Beispiel #2
0
def pipelines_start(self: 'celery_app.task', pipeline_run_id: int) -> None:
    pipeline_run = get_valid_pipeline_run(pipeline_run_id=pipeline_run_id)
    if not pipeline_run:
        _logger.info('Pipeline `%s` does not exist any more.', pipeline_run_id)

    pipeline_run.on_schedule()
    dag, op_runs = pipeline_run.dag
    sorted_ops = dags.sort_topologically(dag=dag)
    op_runs_to_start = [
        op_runs[op_run_id] for op_run_id in sorted_ops
        if op_runs[op_run_id].last_status == OperationStatuses.CREATED
    ]
    concurrency = pipeline_run.pipeline.n_operation_runs_to_start
    future_check = False
    while op_runs_to_start and concurrency > 0:
        op_run = op_runs_to_start.pop()
        if start_operation_run(op_run):
            # If we end up here it means that the task
            future_check = True
        else:
            concurrency -= 1

    if op_runs_to_start or future_check:
        # Schedule another task
        self.retry(countdown=Intervals.PIPELINES_SCHEDULER)
Beispiel #3
0
    def test_sort_topologically(self):
        assert dags.sort_topologically(self.dag1) == [1, 5, 6, 2, 4]
        assert dags.sort_topologically(self.dag2) == [1, 5, 6, 9, 2, 7, 3]
        assert dags.sort_topologically(self.dag3) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        assert dags.sort_topologically(self.dag4) == [0, 7, 1, 2, 3, 5]

        with self.assertRaises(ValueError):  # Cycles
            assert dags.sort_topologically(self.cycle1)
        with self.assertRaises(ValueError):  # Cycles
            assert dags.sort_topologically(self.cycle2)
Beispiel #4
0
def pipelines_start(self, pipeline_run_id):
    pipeline_run = get_pipeline_run(pipeline_run_id=pipeline_run_id)
    if not pipeline_run:
        _logger.info('Pipeline `%s` does not exist any more.', pipeline_run_id)

    pipeline_run.on_schedule()
    dag, op_runs = pipeline_run.dag
    sorted_ops = dags.sort_topologically(dag=dag)
    op_runs_to_start = [op_runs[op_run_id] for op_run_id in sorted_ops
                        if op_runs[op_run_id].last_status == OperationStatuses.CREATED]
    concurrency = pipeline_run.pipeline.n_operation_runs_to_start
    future_check = False
    while op_runs_to_start and concurrency > 0:
        op_run = op_runs_to_start.pop()
        if op_run.schedule_start():
            # If we end up here it means that the task
            future_check = True
        else:
            concurrency -= 1

    if op_runs_to_start or future_check:
        # Schedule another task
        self.retry(countdown=Intervals.PIPELINES_SCHEDULER)