def _register_dagrun(self, dagrun: DagRun, is_external_trigger: bool, execution_date: str): self.log.debug(f"self.task_dict: {self.task_dict}") # Register each task in the DAG for task_id, task in self.task_dict.items(): t = self._now_ms() try: step = self._extract_metadata(dagrun, task) job_name = self._marquez_job_name(self.dag_id, task.task_id) run_id = self._marquez_run_id(dagrun.run_id, task.task_id) task_run_id = _MARQUEZ.start_task( run_id, job_name, self.description, DagUtils.to_iso_8601(self._now_ms()), dagrun.run_id, self._get_location(task), DagUtils.get_start_time(execution_date), DagUtils.get_end_time(execution_date, self.following_schedule(execution_date)), step, {**step.run_facets, **get_custom_facets(task, is_external_trigger)} ) JobIdMapping.set( job_name, dagrun.run_id, task_run_id ) except Exception as e: self.log.error( f'Failed to record task {task_id}: {e} ' f'{self._timed_log_message(t)}', exc_info=True)
def _register_dagrun(self, dagrun, execution_date): self.log.debug(f"self.task_dict: {self.task_dict}") # Register each task in the DAG for task_id, task in self.task_dict.items(): t = self._now_ms() try: step = self._extract_metadata(dagrun, task) job_name = self._marquez_job_name(self.dag_id, task.task_id) run_id = self._marquez_run_id(dagrun.run_id, task.task_id) task_run_id = _MARQUEZ.start_task( run_id, job_name, self.description, DagUtils.to_iso_8601(self._now_ms()), None, # TODO: add parent hierarchy self._get_location(task), DagUtils.get_start_time(execution_date), DagUtils.get_end_time( execution_date, self.following_schedule(execution_date)), step) JobIdMapping.set(job_name, dagrun.run_id, task_run_id) except Exception as e: self.log.error( f'Failed to record task {task_id}: {e} ' f'{self._timed_log_message(t)}', exc_info=True)
def _report_task_instance(self, task_instance, dagrun, session): task = self.get_task(task_instance.task_id) # Note: task_run_id could be missing if it was removed from airflow # or the job could not be registered. task_run_id = JobIdMapping.pop( self._marquez_job_name_from_task_instance(task_instance), dagrun.run_id, session) step = self._extract_metadata(dagrun, task, task_instance) job_name = self._marquez_job_name(self.dag_id, task.task_id) run_id = self._marquez_run_id(dagrun.run_id, task.task_id) if not task_run_id: task_run_id = _MARQUEZ.start_task( run_id, job_name, self.description, DagUtils.to_iso_8601(task_instance.start_date), dagrun.run_id, self._get_location(task), DagUtils.to_iso_8601(task_instance.start_date), DagUtils.to_iso_8601(task_instance.end_date), step, {**step.run_facets, **get_custom_facets(task, False)} ) if not task_run_id: self.log.warning('Could not emit lineage') self.log.debug(f'Setting task state: {task_instance.state}' f' for {task_instance.task_id}') if task_instance.state in {State.SUCCESS, State.SKIPPED}: _MARQUEZ.complete_task( task_run_id, job_name, DagUtils.to_iso_8601(task_instance.end_date), step ) else: _MARQUEZ.fail_task( task_run_id, job_name, DagUtils.to_iso_8601(task_instance.end_date), step )
def _report_task_instance(self, ti, dagrun, run_args, session): task = self.get_task(ti.task_id) run_ids = self._job_id_mapping.pop(self._marquez_job_name_from_ti(ti), dagrun.run_id, session) steps = self._extract_metadata(dagrun, task, ti) # Note: run_ids could be missing if it was removed from airflow # or the job could not be registered. if not run_ids: [ self._marquez.create_job(step, self._get_location(task), self.description) for step in steps ] run_ids = [ self._marquez.create_run(self.new_run_id(), step, run_args, DagUtils.to_iso_8601(ti.start_date), DagUtils.to_iso_8601(ti.end_date)) for step in steps ] if not run_ids: self.log.warn('Could not emit lineage') for step in steps: for run_id in run_ids: self._marquez.create_job(step, self._get_location(task), self.description, ti.state, run_id) self._marquez.start_run(run_id, DagUtils.to_iso_8601(ti.start_date)) self.log.debug(f'Setting task state: {ti.state}' f' for {ti.task_id}') if ti.state in {State.SUCCESS, State.SKIPPED}: self._marquez.complete_run( run_id, DagUtils.to_iso_8601(ti.end_date)) else: self._marquez.fail_run(run_id, DagUtils.to_iso_8601(ti.end_date))