コード例 #1
0
def stub(stage, test_df=None):
    # type: (str, pd.DataFrame) -> None
    # if random.randint(0, 1):
    #     raise Exception("brrrr")

    log_metric(stage, utcnow())
    log_dataframe("df_" + stage, test_df)
コード例 #2
0
    def __init__(
        self,
        max_retries,
        remove_failed_store,
        tracker_raise_on_error,
        is_verbose,
        databand_api_client,
        *args,
        **kwargs,
    ):
        super(TrackingAsyncWebChannel, self).__init__(*args, **kwargs)

        self.web_channel = TrackingWebChannel(databand_api_client)

        self._background_worker = TrackingAsyncWebChannelBackgroundWorker(
            item_processing_handler=self._background_worker_item_handler,
            skip_processing_callback=self.
            _background_worker_skip_processing_callback,
        )

        self._max_retries = max_retries
        self._remove_failed_store = remove_failed_store
        self._tracker_raise_on_error = tracker_raise_on_error
        self._is_verbose = is_verbose
        self._log_fn = logger.info if self._is_verbose else logger.debug
        self._shutting_down = False
        self._start_time = utcnow()
コード例 #3
0
ファイル: test_task_metrics.py プロジェクト: databand-ai/dbnd
    def test_log_metric_pendulum(self):
        now = utcnow()

        @task
        def t_f_metric():
            log_metric("t_f", now)

        t = assert_run_task(t_f_metric.t())
        t.ctrl.last_task_run.meta_files.get_metric_target("t_f").read()
        assert t.ctrl.last_task_run.meta_files.get_metric_target(
            "t_f").read().split()[1] == str(now)
コード例 #4
0
    def flush(self):
        # skip the handler if worker already exited to avoid hanging
        if not self._background_worker.is_alive:
            return
        # process remaining items in the queue

        tracking_duration = (utcnow() - self._start_time).in_seconds()
        # don't exceed 10% of whole tracking duration while flushing but not less then 2m and no more then 30m
        flush_limit = min(max(tracking_duration * 0.1, 120), 30 * 60)

        logger.info(
            f"Waiting {flush_limit}s for TrackingAsyncWebChannel to complete async tasks..."
        )
        self._shutting_down = True
        try:
            self._background_worker.flush(flush_limit)
            self.web_channel.flush()
            logger.info("TrackingAsyncWebChannel completed all tasks")
        except TimeoutError as e:
            err_msg = f"TrackingAsyncWebChannel flush exceeded {flush_limit}s timeout"
            log_exception(err_msg, e, logger)
        finally:
            self._shutting_down = False
コード例 #5
0
    def test_track_python_operator(self):
        args = dict(start_date=days_ago(2))

        with DAG(dag_id="test_dag",
                 default_args=args,
                 schedule_interval=timedelta(minutes=1)):
            run_this = PythonOperator(
                task_id="print_the_context",
                provide_context=True,
                python_callable=_test_func,
            )
        track_task(run_this)
        #
        # env = {
        #     "AIRFLOW_CTX_DAG_ID": "test_dag",
        #     "AIRFLOW_CTX_EXECUTION_DATE": "emr_task",
        #     "AIRFLOW_CTX_TASK_ID": "1970-01-01T0000.000",
        #     "AIRFLOW_CTX_TRY_NUMBER": "1",
        #     "AIRFLOW_CTX_UID": get_airflow_instance_uid(),
        # }
        #
        # with mock.patch.dict(os.environ, env):
        ti = TaskInstance(run_this, utcnow())
        ti.run(ignore_depends_on_past=True, ignore_ti_state=True)