def stub(stage, test_df=None): # type: (str, pd.DataFrame) -> None # if random.randint(0, 1): # raise Exception("brrrr") log_metric(stage, utcnow()) log_dataframe("df_" + stage, test_df)
def __init__( self, max_retries, remove_failed_store, tracker_raise_on_error, is_verbose, databand_api_client, *args, **kwargs, ): super(TrackingAsyncWebChannel, self).__init__(*args, **kwargs) self.web_channel = TrackingWebChannel(databand_api_client) self._background_worker = TrackingAsyncWebChannelBackgroundWorker( item_processing_handler=self._background_worker_item_handler, skip_processing_callback=self. _background_worker_skip_processing_callback, ) self._max_retries = max_retries self._remove_failed_store = remove_failed_store self._tracker_raise_on_error = tracker_raise_on_error self._is_verbose = is_verbose self._log_fn = logger.info if self._is_verbose else logger.debug self._shutting_down = False self._start_time = utcnow()
def test_log_metric_pendulum(self): now = utcnow() @task def t_f_metric(): log_metric("t_f", now) t = assert_run_task(t_f_metric.t()) t.ctrl.last_task_run.meta_files.get_metric_target("t_f").read() assert t.ctrl.last_task_run.meta_files.get_metric_target( "t_f").read().split()[1] == str(now)
def flush(self): # skip the handler if worker already exited to avoid hanging if not self._background_worker.is_alive: return # process remaining items in the queue tracking_duration = (utcnow() - self._start_time).in_seconds() # don't exceed 10% of whole tracking duration while flushing but not less then 2m and no more then 30m flush_limit = min(max(tracking_duration * 0.1, 120), 30 * 60) logger.info( f"Waiting {flush_limit}s for TrackingAsyncWebChannel to complete async tasks..." ) self._shutting_down = True try: self._background_worker.flush(flush_limit) self.web_channel.flush() logger.info("TrackingAsyncWebChannel completed all tasks") except TimeoutError as e: err_msg = f"TrackingAsyncWebChannel flush exceeded {flush_limit}s timeout" log_exception(err_msg, e, logger) finally: self._shutting_down = False
def test_track_python_operator(self): args = dict(start_date=days_ago(2)) with DAG(dag_id="test_dag", default_args=args, schedule_interval=timedelta(minutes=1)): run_this = PythonOperator( task_id="print_the_context", provide_context=True, python_callable=_test_func, ) track_task(run_this) # # env = { # "AIRFLOW_CTX_DAG_ID": "test_dag", # "AIRFLOW_CTX_EXECUTION_DATE": "emr_task", # "AIRFLOW_CTX_TASK_ID": "1970-01-01T0000.000", # "AIRFLOW_CTX_TRY_NUMBER": "1", # "AIRFLOW_CTX_UID": get_airflow_instance_uid(), # } # # with mock.patch.dict(os.environ, env): ti = TaskInstance(run_this, utcnow()) ti.run(ignore_depends_on_past=True, ignore_ti_state=True)