def test_upload(): p1, p2 = Mock(), Mock() m = MetaProduct({'a': p1, 'b': p2}) m.upload() p1.upload.assert_called_once_with() p2.upload.assert_called_once_with()
def test_unserialize_multipe_products(tmp_directory): Path('a.txt').write_text('contents of a') Path('b.txt').write_text('contents of b') obj = unserializer_txt(MetaProduct({'a': 'a.txt', 'b': 'b.txt'})) assert obj == {'a': 'contents of a', 'b': 'contents of b'}
def test_delete_metadata(tmp_directory): Path('.a.txt.metadata').touch() Path('.b.txt.metadata').touch() a = File('a.txt') b = File('b.txt') m = MetaProduct({'a': a, 'b': b}) m.metadata.delete() assert not Path('.a.txt.metadata').exists() assert not Path('.b.txt.metadata').exists()
def test_serialize_multipe_products(tmp_directory): serializer_txt({ 'a': 'contents of a', 'b': 'contents of b' }, MetaProduct({ 'a': 'a.txt', 'b': 'b.txt' })) assert Path('a.txt').read_text() == 'contents of a' assert Path('b.txt').read_text() == 'contents of b'
def __init__(self, product, dag, name=None, params=None): self._params = Params(params) if name is None: # use name inferred from the source object self._name = self._source.name if self._name is None: raise AttributeError('Task name can only be None if it ' 'can be inferred from the source object. ' 'For example, when the task receives a ' 'pathlib.Path, when using SourceLoader ' 'or in PythonCallable. Pass a value ' 'explicitly') else: self._name = name if not isinstance(dag, AbstractDAG): raise TypeError( f"'dag' must be an instance of DAG, got {type(dag)!r}") # NOTE: we should get rid of this, maybe just add hooks that are # called back on the dag object to avoid having a reference here self.dag = dag dag._add_task(self) if not hasattr(self, '_source'): raise RuntimeError( 'self._source must be initialized before calling ' '__init__ in Task') if self._source is None: raise TypeError( '_init_source must return a source object, got None') if isinstance(product, Product): self._product = product if self.PRODUCT_CLASSES_ALLOWED is not None: if not isinstance(self._product, self.PRODUCT_CLASSES_ALLOWED): raise TypeError('{} only supports the following product ' 'classes: {}, got {}'.format( type(self).__name__, self.PRODUCT_CLASSES_ALLOWED, type(self._product).__name__)) else: # if assigned a tuple/list of products, create a MetaProduct self._product = MetaProduct(product) if self.PRODUCT_CLASSES_ALLOWED is not None: if not all( isinstance(p, self.PRODUCT_CLASSES_ALLOWED) for p in self._product): raise TypeError('{} only supports the following product ' 'classes: {}, got {}'.format( type(self).__name__, self.PRODUCT_CLASSES_ALLOWED, type(self._product).__name__)) self._logger = logging.getLogger('{}.{}'.format( __name__, type(self).__name__)) self.product.task = self self._client = None self.exec_status = TaskStatus.WaitingRender self._on_finish = None self._on_failure = None self._on_render = None
def test_can_iterate_over_products(): p1 = File('1.txt') p2 = File('2.txt') m = MetaProduct([p1, p2]) assert list(m) == [p1, p2]
def test_can_iterate_when_initialized_with_dictionary(): p1 = File('1.txt') p2 = File('2.txt') m = MetaProduct({'a': p1, 'b': p2}) assert list(m) == [p1, p2]
def test_get(): a = File('1.txt') m = MetaProduct({'a': a}) assert m.get('a') is a assert m.get('b') is None
def test_repr(arg, expected): assert repr(MetaProduct(arg)) == expected
def product(self): # NOTE: this allows declaring a dag as a dependency for one task, # maybe create a metaclass that applies to DAGs and Task # We have to rebuild it since tasks might have been added return MetaProduct([t.product for t in self.values()])
def product(self): # We have to rebuild it since tasks might have been added return MetaProduct([t.product for t in self.values()])
def test_serialize_multiple_products_validates_obj(tmp_directory): with pytest.raises(TypeError) as excinfo: serializer_txt(['a', 'b'], MetaProduct({'a': 'a.txt', 'b': 'b.txt'})) assert 'Error serializing task: if task generates multiple' in str( excinfo.value)
def test_serialize_multiple_products_validates_obj_keys(tmp_directory, obj): with pytest.raises(ValueError) as excinfo: serializer_txt(obj, MetaProduct({'a': 'a.txt', 'b': 'b.txt'})) assert 'Error serializing task' in str(excinfo.value) assert 'with valid keys' in str(excinfo.value)
def __init__(self, source, product, dag, name, params=None): """ All subclasses must implement the same constuctor to keep the API consistent, optional parameters after "params" are ok Parameters ---------- source: str or pathlib.Path Source code for the task, for tasks that do not take source code as input (such as PostgresCopyFrom), this can be another thing. The source can be a template and can make references to any parameter in "params", "upstream" parameters or its own "product", not all Tasks have templated source (templating code is mostly used by Tasks that take SQL source code as input) product: Product The product that this task will create upon completion dag: DAG The DAG holding this task name: str A name for this task, if None a default will be assigned params: dict Extra parameters passed to the task on rendering (if templated source) or during execution (if not templated source) """ self._params = params or {} self._name = name self._source = self._init_source(source) if dag is None: raise TypeError('DAG cannot be None') self.dag = dag dag._add_task(self) if self._source is None: raise TypeError('_init_source must return a value, got None') if not isinstance(self._source, Source): raise TypeError('_init_source must return a subclass of Source') if isinstance(product, Product): self._product = product if self.PRODUCT_CLASSES_ALLOWED is not None: if not isinstance(self._product, self.PRODUCT_CLASSES_ALLOWED): raise TypeError('{} only supports the following product ' 'classes: {}, got {}' .format(type(self).__name__, self.PRODUCT_CLASSES_ALLOWED, type(self._product).__name__)) else: # if assigned a tuple/list of products, create a MetaProduct self._product = MetaProduct(product) if self.PRODUCT_CLASSES_ALLOWED is not None: if not all(isinstance(p, self.PRODUCT_CLASSES_ALLOWED) for p in self._product): raise TypeError('{} only supports the following product ' 'classes: {}, got {}' .format(type(self).__name__, self.PRODUCT_CLASSES_ALLOWED, type(self._product).__name__)) self._logger = logging.getLogger('{}.{}'.format(__name__, type(self).__name__)) self.product.task = self self.client = None self._status = TaskStatus.WaitingRender self.build_report = None self._on_finish = None self._on_failure = None