def test_call_dotted_path_calls_function(monkeypatch, spec): mock = Mock() monkeypatch.setattr(functions, 'some_function', mock) dotted_path.DottedPath(spec)() mock.assert_called_once_with()
def test_init_and_call_dotted_path(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg): return some_arg """) dp = dotted_path.DottedPath('some_module.fn', lazy_load=False) assert dp(42) == 42
def _init_client(task_dict, lazy_import): if 'client' in task_dict: dp = dotted_path.DottedPath(task_dict.pop('client'), lazy_load=lazy_import, allow_return_none=False) if lazy_import: task_dict['client'] = dp else: task_dict['client'] = dp()
def test_call_spec_without_dotted_path_key(): spec = {'a': 1} with pytest.raises(SpecValidationError) as excinfo: dotted_path.DottedPath(spec)() assert excinfo.value.errors == [{ 'loc': ('dotted_path', ), 'msg': 'field required', 'type': 'value_error.missing' }]
def test_dotted_path_from_dict(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg): return some_arg """) dp = dotted_path.DottedPath(dict(dotted_path='some_module.fn', some_arg=10), lazy_load=False) assert dp() == 10
def test_dotted_path_repr(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg): return some_arg """) dp = dotted_path.DottedPath('some_module.fn', lazy_load=True) assert repr(dp) == "DottedPath('some_module.fn')" dp._load_callable() assert 'loaded:' in repr(dp)
def test_call_spec_with_kwargs(monkeypatch): mock = Mock() monkeypatch.setattr(functions, 'some_function', mock) spec = { 'dotted_path': 'test_pkg.functions.some_function', 'a': 1, 'b': 2, } dotted_path.DottedPath(spec)() mock.assert_called_once_with(a=1, b=2)
def test_dotted_path_if_overriding_args(tmp_directory, tmp_imports): Path('some_module.py').write_text(""" def fn(some_arg): return some_arg """) dp = dotted_path.DottedPath(dict(dotted_path='some_module.fn', some_arg=10), lazy_load=False) with pytest.warns(UserWarning) as record: dp(some_arg=20) expected = ("Got duplicated arguments ('some_arg') when calling " "dotted path 'some_module.fn'. Overriding values...") assert record[0].message.args[0] == expected
def _to_dag(self): """ Internal method to manage the different cases to convert to a DAG object """ if 'location' in self: return dotted_path.call_dotted_path(self['location']) dag = DAG() if 'config' in self: dag._params = DAGConfiguration.from_dict(self['config']) if 'executor' in self: valid = {'serial', 'parallel'} executor = self['executor'] if executor not in valid: raise ValueError('executor must be one ' f'of {valid}, got: {executor}') if executor == 'parallel': dag.executor = Parallel() clients = self.get('clients') if clients: for class_name, dotted_path_spec in clients.items(): dps = dotted_path.DottedPathSpec(dotted_path_spec) if self._lazy_import: dag.clients[class_name] = dps else: dag.clients[class_name] = dps() for attr in ['serializer', 'unserializer']: if attr in self: setattr( dag, attr, dotted_path.DottedPath(self[attr], lazy_load=self._lazy_import)) process_tasks(dag, self, root_path=self._parent_path) return dag
def _init_product(task_dict, meta, task_class, root_path, lazy_import): """ Initialize product. Resolution logic order: task.product_class meta.{task_class}.product_default_class Current limitation: When there is more than one product, they all must be from the same class. """ product_raw = task_dict.pop('product') # return if we already have a product if isinstance(product_raw, products.product.Product): return product_raw CLASS = _find_product_class(task_class, task_dict, meta) if 'product_client' in task_dict: dp = dotted_path.DottedPath(task_dict.pop('product_client'), lazy_load=lazy_import, allow_return_none=False) if lazy_import: client = dp else: client = dp() kwargs = {'client': client} else: kwargs = {} # determine the base path for the product (only relevant if product # is a File) relative_to = (Path(task_dict['source']).parent if meta['product_relative_to_source'] else root_path) # initialize Product instance return try_product_init(CLASS, product_raw, relative_to, kwargs)
def _init_task(data, meta, project_root, lazy_import, dag): """Initialize a single task from a dictionary spec """ task_dict = copy(data) class_ = task_dict.pop('class') product = _init_product(task_dict, meta, class_, project_root, lazy_import=lazy_import) _init_client(task_dict, lazy_import=lazy_import) source = task_dict.pop('source') name = task_dict.pop('name', None) on_finish = task_dict.pop('on_finish', None) on_render = task_dict.pop('on_render', None) on_failure = task_dict.pop('on_failure', None) if 'serializer' in task_dict: task_dict['serializer'] = dotted_path.DottedPath( task_dict['serializer'], lazy_load=lazy_import) if 'unserializer' in task_dict: task_dict['unserializer'] = dotted_path.DottedPath( task_dict['unserializer'], lazy_load=lazy_import) # edge case: if using lazy_import, we should not check if the kernel # is installed. this is used when exporting to Argo/Airflow using # soopervisor, since the exporting process should not require to have # the ir kernel installed. The same applies when Airflow has to convert # the DAG, the Airflow environment shouldn't require the ir kernel if (class_ == tasks.NotebookRunner and lazy_import and 'check_if_kernel_installed' not in task_dict): task_dict['check_if_kernel_installed'] = False # make paths to resources absolute if 'params' in task_dict: task_dict['params'] = resolve_resources(task_dict['params'], relative_to=project_root) try: task = class_(source=source, product=product, name=name, dag=dag, **task_dict) except Exception as e: msg = (f'Failed to initialize {class_.__name__} task with ' f'source {str(source)!r}.') raise DAGSpecInitializationError(msg) from e if on_finish: task.on_finish = dotted_path.DottedPath(on_finish, lazy_load=lazy_import) if on_render: task.on_render = dotted_path.DottedPath(on_render, lazy_load=lazy_import) if on_failure: task.on_failure = dotted_path.DottedPath(on_failure, lazy_load=lazy_import) return task
def to_task(self, dag): """ Convert the spec to a Task or TaskGroup and add it to the dag. Returns a (task, upstream) tuple with the Task instance and list of upstream dependencies (as described in the 'upstream' key, if any, empty if no 'upstream' key). If the spec has a 'grid' key, a TaskGroup instance instead Parameters ---------- dag The DAG to add the task(s) to """ data = copy(self.data) upstream = _make_iterable(data.pop('upstream')) if 'grid' in data: data_source_ = data["source"] data_source = str(data_source_ if not hasattr( data_source_, '__name__') else data_source_.__name__) if 'params' in data: raise DAGSpecInitializationError( 'Error initializing task with ' f'source {data_source!r}: ' '\'params\' is not allowed when using \'grid\'') if 'name' not in data: raise DAGSpecInitializationError( f'Error initializing task with ' f'source {data_source!r}: ' 'tasks with \'grid\' must have a \'name\'') task_class = data.pop('class') product_class = _find_product_class(task_class, data, self.meta) product = data.pop('product') name = data.pop('name') grid = data.pop('grid') # hooks on_render = data.pop('on_render', None) on_finish = data.pop('on_finish', None) on_failure = data.pop('on_failure', None) if on_render: on_render = dotted_path.DottedPath(on_render, lazy_load=self.lazy_import) if on_finish: on_finish = dotted_path.DottedPath(on_finish, lazy_load=self.lazy_import) if on_failure: on_failure = dotted_path.DottedPath(on_failure, lazy_load=self.lazy_import) return TaskGroup.from_grid(task_class=task_class, product_class=product_class, product_primitive=product, task_kwargs=data, dag=dag, name=name, grid=grid, resolve_relative_to=self.project_root, on_render=on_render, on_finish=on_finish, on_failure=on_failure), upstream else: return _init_task(data=data, meta=self.meta, project_root=self.project_root, lazy_import=self.lazy_import, dag=dag), upstream
def test_eager_load_missing_function(): with pytest.raises(ModuleNotFoundError): dotted_path.DottedPath('not_a_module.not_a_function', lazy_load=False)
def test_lazy_load_missing_function(): dp = dotted_path.DottedPath('not_a_module.not_a_function', lazy_load=True) with pytest.raises(ModuleNotFoundError): dp()