예제 #1
0
파일: dag.py 프로젝트: ravwojdyla/marquez
    def _register_dagrun(self, dagrun: DagRun, is_external_trigger: bool, execution_date: str):
        self.log.debug(f"self.task_dict: {self.task_dict}")
        # Register each task in the DAG
        for task_id, task in self.task_dict.items():
            t = self._now_ms()
            try:
                step = self._extract_metadata(dagrun, task)

                job_name = self._marquez_job_name(self.dag_id, task.task_id)
                run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

                task_run_id = _MARQUEZ.start_task(
                    run_id,
                    job_name,
                    self.description,
                    DagUtils.to_iso_8601(self._now_ms()),
                    dagrun.run_id,
                    self._get_location(task),
                    DagUtils.get_start_time(execution_date),
                    DagUtils.get_end_time(execution_date, self.following_schedule(execution_date)),
                    step,
                    {**step.run_facets, **get_custom_facets(task, is_external_trigger)}
                )

                JobIdMapping.set(
                    job_name,
                    dagrun.run_id,
                    task_run_id
                )
            except Exception as e:
                self.log.error(
                    f'Failed to record task {task_id}: {e} '
                    f'{self._timed_log_message(t)}',
                    exc_info=True)
예제 #2
0
    def _register_dagrun(self, dagrun, execution_date):
        self.log.debug(f"self.task_dict: {self.task_dict}")
        # Register each task in the DAG
        for task_id, task in self.task_dict.items():
            t = self._now_ms()
            try:
                step = self._extract_metadata(dagrun, task)

                job_name = self._marquez_job_name(self.dag_id, task.task_id)
                run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

                task_run_id = _MARQUEZ.start_task(
                    run_id,
                    job_name,
                    self.description,
                    DagUtils.to_iso_8601(self._now_ms()),
                    None,  # TODO: add parent hierarchy
                    self._get_location(task),
                    DagUtils.get_start_time(execution_date),
                    DagUtils.get_end_time(
                        execution_date,
                        self.following_schedule(execution_date)),
                    step)

                JobIdMapping.set(job_name, dagrun.run_id, task_run_id)
            except Exception as e:
                self.log.error(
                    f'Failed to record task {task_id}: {e} '
                    f'{self._timed_log_message(t)}',
                    exc_info=True)
예제 #3
0
파일: dag.py 프로젝트: ravwojdyla/marquez
    def _report_task_instance(self, task_instance, dagrun, session):
        task = self.get_task(task_instance.task_id)

        # Note: task_run_id could be missing if it was removed from airflow
        # or the job could not be registered.
        task_run_id = JobIdMapping.pop(
            self._marquez_job_name_from_task_instance(task_instance), dagrun.run_id, session)
        step = self._extract_metadata(dagrun, task, task_instance)

        job_name = self._marquez_job_name(self.dag_id, task.task_id)
        run_id = self._marquez_run_id(dagrun.run_id, task.task_id)

        if not task_run_id:
            task_run_id = _MARQUEZ.start_task(
                run_id,
                job_name,
                self.description,
                DagUtils.to_iso_8601(task_instance.start_date),
                dagrun.run_id,
                self._get_location(task),
                DagUtils.to_iso_8601(task_instance.start_date),
                DagUtils.to_iso_8601(task_instance.end_date),
                step,
                {**step.run_facets, **get_custom_facets(task, False)}
            )

            if not task_run_id:
                self.log.warning('Could not emit lineage')

        self.log.debug(f'Setting task state: {task_instance.state}'
                       f' for {task_instance.task_id}')
        if task_instance.state in {State.SUCCESS, State.SKIPPED}:
            _MARQUEZ.complete_task(
                task_run_id,
                job_name,
                DagUtils.to_iso_8601(task_instance.end_date),
                step
            )
        else:
            _MARQUEZ.fail_task(
                task_run_id,
                job_name,
                DagUtils.to_iso_8601(task_instance.end_date),
                step
            )
예제 #4
0
    def _report_task_instance(self, ti, dagrun, run_args, session):
        task = self.get_task(ti.task_id)
        run_ids = self._job_id_mapping.pop(self._marquez_job_name_from_ti(ti),
                                           dagrun.run_id, session)
        steps = self._extract_metadata(dagrun, task, ti)

        # Note: run_ids could be missing if it was removed from airflow
        # or the job could not be registered.
        if not run_ids:
            [
                self._marquez.create_job(step, self._get_location(task),
                                         self.description) for step in steps
            ]
            run_ids = [
                self._marquez.create_run(self.new_run_id(), step, run_args,
                                         DagUtils.to_iso_8601(ti.start_date),
                                         DagUtils.to_iso_8601(ti.end_date))
                for step in steps
            ]
            if not run_ids:
                self.log.warn('Could not emit lineage')

        for step in steps:
            for run_id in run_ids:
                self._marquez.create_job(step, self._get_location(task),
                                         self.description, ti.state, run_id)
                self._marquez.start_run(run_id,
                                        DagUtils.to_iso_8601(ti.start_date))

                self.log.debug(f'Setting task state: {ti.state}'
                               f' for {ti.task_id}')
                if ti.state in {State.SUCCESS, State.SKIPPED}:
                    self._marquez.complete_run(
                        run_id, DagUtils.to_iso_8601(ti.end_date))
                else:
                    self._marquez.fail_run(run_id,
                                           DagUtils.to_iso_8601(ti.end_date))