def on_render(self, value): try: callback_check(value, self._available_callback_kwargs) # raised when value is a dotted path with lazy loading turned on. # cannot check because that requires importing the dotted path except CallbackCheckAborted: pass self._on_render = value
def _run_on_finish(self): """Call on_finish hook """ if self.on_finish: kwargs = callback_check(self.on_finish, self._available_callback_kwargs) self.on_finish(**kwargs)
def update(self, source_code): """ Update metadata in the storage backend, this should be called by Task objects when running successfully to update metadata in the backend storage. If saving in the backend storage succeeds the local copy is updated as well """ if self._data is None: self._data = dict(timestamp=None, stored_source_code=None) new_data = dict(timestamp=datetime.now().timestamp(), stored_source_code=source_code) kwargs = callback_check(self._product.prepare_metadata, available={ 'metadata': new_data, 'product': self._product }) data = self._product.prepare_metadata(**kwargs) self._product.save_metadata(data) # if saving went good, we can update the local copy self.update_locally(new_data)
def call_with_available(self, accepted_kwargs): """ Check the signature of the callable to ensure it has the passed keyword arguments. It causes to import the dotted path, if it hasn't been done yet. Then it calls the function. """ callable_ = self._get_callable() kwargs_to_use = callback_check(callable_, accepted_kwargs) return self(**kwargs_to_use)
def test_callback_check_from_dotted_path(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg): return some_arg """) dp = DottedPath('some_module.fn', lazy_load=False) assert callback_check(dp, available={'some_arg': 42}) == {'some_arg': 42}
def _run_on_render(self): if self.on_render: self._logger.debug('Executing on_render hook ' 'for dag "%s"', self.name) kwargs_available = copy(self._available_callback_kwargs) kwargs = callback_check(self.on_render, kwargs_available) self.on_render(**kwargs) else: self._logger.debug('No on_render hook for dag ' '"%s", skipping', self.name)
def test_callback_check_doesnt_include_constructor_args( tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def add(x, y): return x + y """) dp = DottedPath(dict(dotted_path='some_module.add', x=1), lazy_load=False) assert callback_check(dp, available={'y': 2}) == {'y': 2} assert dp(y=2) == 3
def _run_on_finish(self, build_report): if self.on_finish: self._logger.debug('Executing on_finish hook ' 'for dag "%s"', self.name) kwargs_available = copy(self._available_callback_kwargs) kwargs_available['report'] = build_report kwargs = callback_check(self.on_finish, kwargs_available) self.on_finish(**kwargs) else: self._logger.debug('No on_finish hook for dag ' '"%s", skipping', self.name)
def _run_on_failure(self, tb): if self.on_failure: self._logger.debug('Executing on_failure hook ' 'for dag "%s"', self.name) kwargs_available = copy(self._available_callback_kwargs) kwargs_available['traceback'] = tb kwargs = callback_check(self.on_failure, kwargs_available) self.on_failure(**kwargs) else: self._logger.debug('No on_failure hook for dag ' '"%s", skipping', self.name)
def test_no_error_if_args_passed_to_the_constructor(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg, another_arg): return some_arg """) dp = DottedPath(dict(dotted_path='some_module.fn', some_arg=42), lazy_load=False) assert callback_check(dp, available={'another_arg': 100}) == { 'another_arg': 100 }
def _run_on_render(self): if self.on_render: self._logger.debug('Calling on_render hook on task %s', self.name) kwargs = callback_check(self.on_render, self._available_callback_kwargs) try: self.on_render(**kwargs) except Exception as e: msg = ('Exception when running on_render ' 'for task "{}": {}'.format(self.name, e)) self._logger.exception(msg) self.exec_status = TaskStatus.ErroredRender raise type(e)(msg) from e
def test_overrides_default_param_with_available_param(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg, another_arg): return some_arg """) dp = DottedPath(dict(dotted_path='some_module.fn', some_arg=42), lazy_load=False) assert callback_check(dp, available={ 'another_arg': 100, 'some_arg': 200 }) == { 'another_arg': 100, 'some_arg': 200 }
def update(self, source_code, params): """ Update metadata in the storage backend, this should be called by Task objects when running successfully to update metadata in the backend storage. If saving in the backend storage succeeds the local copy is updated as well Parameters ---------- source_code : str Task's source code params : dict Task's params """ # make sure params are json serializable try: # TODO: check this to prevent this error happennig in # Product.save_metadata implementation. All current implementations # serialize using json. I think it's best to serialize here and # pass the string to the save_metadata method. but this will do # for now json.dumps(params) except Exception: warnings.warn(f'Params {params!r} are not serializable, they ' 'will be ignored. Changes to them wont trigger ' 'task execution.') params = None new_data = dict(timestamp=datetime.now().timestamp(), stored_source_code=source_code, params=params) kwargs = callback_check(self._product.prepare_metadata, available={ 'metadata': new_data, 'product': self._product }) data = self._product.prepare_metadata(**kwargs) self._product.save_metadata(data) # if saving worked, we can update the local in-memory copy self.update_locally(new_data)
def update(self, source_code, params): """ Update metadata in the storage backend, this should be called by Task objects when running successfully to update metadata in the backend storage. If saving in the backend storage succeeds the local copy is updated as well Parameters ---------- source_code : str Task's source code params : dict Task's params """ # remove any unserializable parameters params = remove_non_serializable_top_keys(params) new_data = dict( timestamp=datetime.now().timestamp(), stored_source_code=source_code, # process params to store hashes in case they're # declared as resources params=process_resources(params)) kwargs = callback_check(self._product.prepare_metadata, available={ 'metadata': new_data, 'product': self._product }) data = self._product.prepare_metadata(**kwargs) self._product.save_metadata(data) # if saving worked, we can update the local in-memory copy self.update_locally(new_data)
def test_returns_kwargs_to_use(): def fn(a, b): pass assert callback_check(fn, {'a': 1, 'b': 2, 'c': 3}) == {'a': 1, 'b': 2}
def test_fn_with_default_values(): def fn(a, b, default=1): pass with pytest.raises(CallbackSignatureError): callback_check(fn, {'a', 'b'}, allow_default=False)
def on_failure(self, value): callback_check(value, self._available_callback_kwargs) self._on_failure = value
def test_fn_with_unknown_params(): def fn(a, b, unknown): pass with pytest.raises(CallbackSignatureError): callback_check(fn, {'a', 'b'})
def on_render(self, value): callback_check(value, self._available_callback_kwargs) self._on_render = value
def _run_on_failure(self): if self.on_failure: kwargs = callback_check(self.on_failure, self._available_callback_kwargs) self.on_failure(**kwargs)
def build(self, force=False, show_progress=True, debug=False, close_clients=True): """ Runs the DAG in order so that all upstream dependencies are run for every task Parameters ---------- force : bool, default=False If True, it will run all tasks regardless of status, defaults to False show_progress : bool, default=True Show progress bar debug : bool, default=False Drop a debugging session if building raises an exception. Note that this modifies the executor, temporarily setting it to Serial with subprocess off and catching exceptions/warnings off. Restores the original executor at the end close_clients : bool, default=True Close all clients (dag-level, task-level and product-level) upon successful build Notes ----- All dag-level clients are closed after calling this function ``debug`` is useful to let a pipeline run and start debugging at a failing PythonCallable task but it won't work with failing ``NotebookRunner`` tasks because notebooks/scripts are executed in a different process. If you want to debug ``NotebookRunner`` tasks, use ``NotebookRunner.debug()`` instead. Returns ------- BuildReport A dict-like object with tasks as keys and dicts with task status as values """ kwargs = callback_check(self._params.logging_factory, available={'dag_name': self.name}) res = self._params.logging_factory(**kwargs) if isinstance(res, Iterable): dag_logger = DAGLogger(*res) else: dag_logger = DAGLogger(handler=res) # if debug, we have to change the executor to these settings, if we run # tasks in a subprocess or catch exception, we won't be able to start # the debugging session in the right place if debug: executor_original = self.executor self.executor = executors.Serial(build_in_subprocess=False, catch_exceptions=False, catch_warnings=False) callable_ = partial(self._build, force=force, show_progress=show_progress) with dag_logger: try: if debug: report = debug_if_exception(callable_) else: report = callable_() finally: if close_clients: self.close_clients() if debug: self.executor = executor_original return report
def on_finish(self, value): callback_check(value, self._available_callback_kwargs) self._on_finish = value
def test_error_if_lazy_loaded_dotted_path(): dp = DottedPath('not_a_module.not_a_function', lazy_load=True) with pytest.raises(CallbackCheckAborted): callback_check(dp, available={'some_arg': 42})