Ejemplo n.º 1
0
    def _register_dagrun(self, dagrun, execution_date):
        self.log.debug(f"self.task_dict: {self.task_dict}")
        # Register each task in the DAG
        for task_id, task in self.task_dict.items():
            t = self._now_ms()
            try:
                step = self._extract_metadata(dagrun, task)

                job_name = self._marquez_job_name(self.dag_id, task.task_id)
                run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

                task_run_id = _MARQUEZ.start_task(
                    run_id,
                    job_name,
                    self.description,
                    DagUtils.to_iso_8601(self._now_ms()),
                    None,  # TODO: add parent hierarchy
                    self._get_location(task),
                    DagUtils.get_start_time(execution_date),
                    DagUtils.get_end_time(
                        execution_date,
                        self.following_schedule(execution_date)),
                    step)

                JobIdMapping.set(job_name, dagrun.run_id, task_run_id)
            except Exception as e:
                self.log.error(
                    f'Failed to record task {task_id}: {e} '
                    f'{self._timed_log_message(t)}',
                    exc_info=True)
Ejemplo n.º 2
0
 def _register_dagrun(self, dagrun, execution_date, run_args):
     self.log.debug(f"self.task_dict: {self.task_dict}")
     # Register each task in the DAG
     for task_id, task in self.task_dict.items():
         t = self._now_ms()
         try:
             steps = self._extract_metadata(dagrun, task)
             [
                 _MARQUEZ.create_job(step, self._get_location(task),
                                     self.description) for step in steps
             ]
             marquez_jobrun_ids = [
                 _MARQUEZ.create_run(
                     self.new_run_id(), step, run_args,
                     DagUtils.get_start_time(execution_date),
                     DagUtils.get_end_time(
                         execution_date,
                         self.following_schedule(execution_date)))
                 for step in steps
             ]
             JobIdMapping.set(
                 self._marquez_job_name(self.dag_id, task.task_id),
                 dagrun.run_id, marquez_jobrun_ids)
         except Exception as e:
             self.log.error(
                 f'Failed to record task {task_id}: {e} '
                 f'{self._timed_log_message(t)}',
                 exc_info=True)
Ejemplo n.º 3
0
    def _register_dagrun(self, dagrun: DagRun, is_external_trigger: bool, execution_date: str):
        self.log.debug(f"self.task_dict: {self.task_dict}")
        # Register each task in the DAG
        for task_id, task in self.task_dict.items():
            t = self._now_ms()
            try:
                step = self._extract_metadata(dagrun, task)

                job_name = self._marquez_job_name(self.dag_id, task.task_id)
                run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

                task_run_id = _MARQUEZ.start_task(
                    run_id,
                    job_name,
                    self.description,
                    DagUtils.to_iso_8601(self._now_ms()),
                    dagrun.run_id,
                    self._get_location(task),
                    DagUtils.get_start_time(execution_date),
                    DagUtils.get_end_time(execution_date, self.following_schedule(execution_date)),
                    step,
                    {**step.run_facets, **get_custom_facets(task, is_external_trigger)}
                )

                JobIdMapping.set(
                    job_name,
                    dagrun.run_id,
                    task_run_id
                )
            except Exception as e:
                self.log.error(
                    f'Failed to record task {task_id}: {e} '
                    f'{self._timed_log_message(t)}',
                    exc_info=True)
Ejemplo n.º 4
0
    def create_dagrun(self, *args, **kwargs):
        # run Airflow's create_dagrun() first
        dagrun = super(DAG, self).create_dagrun(*args, **kwargs)

        create_dag_start_ms = self._now_ms()
        try:
            _MARQUEZ.create_namespace()
            self._register_dagrun(dagrun,
                                  DagUtils.get_execution_date(**kwargs),
                                  DagUtils.get_run_args(**kwargs))
        except Exception as e:
            self.log.error(
                f'Failed to record metadata: {e} '
                f'{self._timed_log_message(create_dag_start_ms)}',
                exc_info=True)

        return dagrun
Ejemplo n.º 5
0
    def _report_task_instance(self, task_instance, dagrun, session):
        task = self.get_task(task_instance.task_id)

        # Note: task_run_id could be missing if it was removed from airflow
        # or the job could not be registered.
        task_run_id = JobIdMapping.pop(
            self._marquez_job_name_from_task_instance(task_instance), dagrun.run_id, session)
        step = self._extract_metadata(dagrun, task, task_instance)

        job_name = self._marquez_job_name(self.dag_id, task.task_id)
        run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

        if not task_run_id:
            task_run_id = _MARQUEZ.start_task(
                run_id,
                job_name,
                self.description,
                DagUtils.to_iso_8601(task_instance.start_date),
                dagrun.run_id,
                self._get_location(task),
                DagUtils.to_iso_8601(task_instance.start_date),
                DagUtils.to_iso_8601(task_instance.end_date),
                step,
                {**step.run_facets, **get_custom_facets(task, False)}
            )

            if not task_run_id:
                self.log.warning('Could not emit lineage')

        self.log.debug(f'Setting task state: {task_instance.state}'
                       f' for {task_instance.task_id}')
        if task_instance.state in {State.SUCCESS, State.SKIPPED}:
            _MARQUEZ.complete_task(
                task_run_id,
                job_name,
                DagUtils.to_iso_8601(task_instance.end_date),
                step
            )
        else:
            _MARQUEZ.fail_task(
                task_run_id,
                job_name,
                DagUtils.to_iso_8601(task_instance.end_date),
                step
            )
Ejemplo n.º 6
0
    def _report_task_instance(self, ti, dagrun, run_args, session):
        task = self.get_task(ti.task_id)
        run_ids = self._job_id_mapping.pop(self._marquez_job_name_from_ti(ti),
                                           dagrun.run_id, session)
        steps = self._extract_metadata(dagrun, task, ti)

        # Note: run_ids could be missing if it was removed from airflow
        # or the job could not be registered.
        if not run_ids:
            [
                self._marquez.create_job(step, self._get_location(task),
                                         self.description) for step in steps
            ]
            run_ids = [
                self._marquez.create_run(self.new_run_id(), step, run_args,
                                         DagUtils.to_iso_8601(ti.start_date),
                                         DagUtils.to_iso_8601(ti.end_date))
                for step in steps
            ]
            if not run_ids:
                self.log.warn('Could not emit lineage')

        for step in steps:
            for run_id in run_ids:
                self._marquez.create_job(step, self._get_location(task),
                                         self.description, ti.state, run_id)
                self._marquez.start_run(run_id,
                                        DagUtils.to_iso_8601(ti.start_date))

                self.log.debug(f'Setting task state: {ti.state}'
                               f' for {ti.task_id}')
                if ti.state in {State.SUCCESS, State.SKIPPED}:
                    self._marquez.complete_run(
                        run_id, DagUtils.to_iso_8601(ti.end_date))
                else:
                    self._marquez.fail_run(run_id,
                                           DagUtils.to_iso_8601(ti.end_date))
Ejemplo n.º 7
0
    def handle_callback(self, *args, **kwargs):
        self.log.debug(f"handle_callback({args}, {kwargs})")
        try:
            dagrun = args[0]
            self.log.debug(f"handle_callback() dagrun : {dagrun}")
            _MARQUEZ.create_namespace()
            self._report_task_instances(dagrun,
                                        DagUtils.get_run_args(**kwargs),
                                        kwargs.get('session'))
        except Exception as e:
            self.log.error(
                f'Failed to record dagrun callback: {e} '
                f'dag_id={self.dag_id}',
                exc_info=True)

        return super().handle_callback(*args)