コード例 #1
0
    def with_dependencies(self, dag_collection):
        """Perfom a dry_run to get upstream dependencies."""
        dependencies = []

        if self.is_python_script:
            # dry run is not possible for python script queries; skip
            self.dependencies = dependencies
            return

        for table in self._get_referenced_tables():
            upstream_task = dag_collection.task_for_table(
                table[0], table[1], table[2])
            task_schedule_interval = dag_collection.dag_by_name(
                self.dag_name).schedule_interval

            if upstream_task is not None:
                # ensure there are no duplicate dependencies
                # manual dependency definitions overwrite automatically detected ones
                if not any(d.dag_name == upstream_task.dag_name
                           and d.task_id == upstream_task.task_name
                           for d in self.depends_on):
                    upstream_schedule_interval = dag_collection.dag_by_name(
                        upstream_task.dag_name).schedule_interval

                    execution_delta = schedule_interval_delta(
                        upstream_schedule_interval, task_schedule_interval)

                    if execution_delta == "0s":
                        execution_delta = None

                    dependencies.append(
                        TaskRef(
                            dag_name=upstream_task.dag_name,
                            task_id=upstream_task.task_name,
                            execution_delta=execution_delta,
                        ))
            else:
                # see if there are some static dependencies
                for task, patterns in EXTERNAL_TASKS.items():
                    if any(
                            fnmatchcase(f"{table[1]}.{table[2]}", p)
                            for p in patterns):
                        # ensure there are no duplicate dependencies
                        # manual dependency definitions overwrite automatically detected
                        if not any(d.dag_name == task.dag_name
                                   and d.task_id == task.task_id
                                   for d in self.depends_on + dependencies):
                            execution_delta = schedule_interval_delta(
                                task.schedule_interval, task_schedule_interval)

                            if execution_delta:
                                dependencies.append(
                                    TaskRef(
                                        dag_name=task.dag_name,
                                        task_id=task.task_id,
                                        execution_delta=execution_delta,
                                    ))
                        break  # stop after the first match

        self.dependencies = dependencies
コード例 #2
0
ファイル: dag.py プロジェクト: Iinh/bigquery-etl
    def _create_export_task(self, task, dag_collection):
        if not task.public_json:
            raise ValueError(
                f"Task {task.task_name} not marked as public JSON.")

        converter = cattr.Converter()
        task_dict = converter.unstructure(task)

        del task_dict["dataset"]
        del task_dict["table"]
        del task_dict["version"]
        del task_dict["project"]

        export_task = converter.structure(task_dict, Task)
        export_task.dag_name = self.name
        export_task.task_name = f"export_public_data_json_{export_task.task_name}"

        task_schedule_interval = dag_collection.dag_by_name(
            task.dag_name).schedule_interval

        execution_delta = schedule_interval_delta(task_schedule_interval,
                                                  self.schedule_interval)

        if execution_delta == "0s":
            execution_delta = None

        export_task.dependencies = [
            TaskRef(
                dag_name=task.dag_name,
                task_id=task.task_name,
                execution_delta=execution_delta,
            )
        ]

        return export_task
コード例 #3
0
 def get_execution_delta(self, schedule_interval):
     """Determine execution_delta, via schedule_interval if necessary."""
     if self.execution_delta is not None:
         return self.execution_delta
     elif self.schedule_interval is not None and schedule_interval is not None:
         execution_delta = schedule_interval_delta(self.schedule_interval,
                                                   schedule_interval)
         if execution_delta != "0s":
             return execution_delta
     return None
コード例 #4
0
 def test_schedule_interval_delta(self):
     assert schedule_interval_delta("0 1 * * *", "0 2 * * *") == "3600s"
     assert schedule_interval_delta("0 2 * * *", "0 0 * * *") == "-7200s"
     assert schedule_interval_delta("* * * * *", "* * * * *") == "0s"
     assert schedule_interval_delta("0 1 * * *", "0 1 * * *") == "0s"
     assert schedule_interval_delta("daily", "0 1 * * *") == "3600s"
     assert schedule_interval_delta("daily", "0 0 * * *") == "0s"