def test_from_dict(self, no_kind_dict): e = Entity.from_dict(no_kind_dict) assert e.name == entity_name assert e.version == entity_version assert e.kind is None assert e.stage_config == { "raw": {"table_1": "1.0.0", "table_2": "1.0.0"}, "staging": {"table_3": "1.0.0", "table_4": "1.0.0"}, }
def test_add_view(self): entity = Entity(entity_name, entity_version) view = View("my_query", "SELECT * FROM TABLE", "1.0.0") entity.add_view("staging", view) assert entity.stage_config == {"staging": {view.name: view.version}} assert entity.transformations == {"staging": {view.name: view}} with pytest.raises(ConflictingNameError): entity.add_view("staging", view)
def test_add_transformation(self): entity = Entity(entity_name, entity_version) transformation = Transformation("my_query", "SELECT * FROM TABLE", "1.0.0") entity.add_transformation("staging", transformation) assert entity.stage_config == { "staging": {transformation.name: transformation.version} } assert entity.transformations == { "staging": {transformation.name: transformation} } with pytest.raises(ConflictingNameError): entity.add_transformation("staging", transformation)
def test_serialize_deserialize(self, no_kind_entity): d = no_kind_entity.to_dict() loaded = Entity.from_dict(d) assert loaded == no_kind_entity
def no_kind_entity(self): stage_config = { "raw": {"table_1": "1.0.0", "table_2": "1.0.0"}, "staging": {"table_3": "1.0.0", "table_4": "1.0.0"}, } return Entity(entity_name, entity_version, stage_config=stage_config)
def test_serialize_deserialize(self, my_entity): d = my_entity.to_dict() loaded = Entity.from_dict(d) assert loaded == my_entity
def my_entity(self): stage_config = { "raw": {"table_1": "1.0.0", "table_2": "1.0.0"}, "staging": {"table_1": "1.0.0", "table_2": "1.0.0"}, } return Entity("entity1", "1.0.0", stage_config=stage_config)
# The first step is to define entities using the Flycs SDK stage_config = { "raw": { "table_1": "1.0.0", "table_2": "1.0.0" }, "staging": { "table_3": "1.0.0", "table_4": "1.0.0" }, "data_warehouse": { "table_5": "1.1.0" }, } entity1 = Entity(name="entity1", version="1.0.0", stage_config=stage_config) # Once the entities are defined, we can create pipelines. p1 = Pipeline( name="my_pipeline", version="1.0.0", schedule="* 12 * * *", # this is using cron notation entities=[entity1], kind=PipelineKind.VANILLA, start_time=datetime.now(tz=timezone.utc), ) # To be able to be discovered, the pipelines needs to be aggregated into a list called 'pipelines' located at the root of the module. # make the pipelines available to be discovered by the rest of the Flycs ecosystem pipelines = [p1]
mycode = CustomCode( name="my_custom_code", version="1.0.0", operator_builder=build, dependencies=[ Dependency(ENTITY_NAME, "staging", query.name) ], # use the dependencies argument to place the airflow operator at the right place in your DAG requirements=[ "airflow==1.10.0", ], # requirements let you define dependencies required by the build function ) # define the entity entity = Entity( name=ENTITY_NAME, version="1.0.0", custom_operators={"staging": [mycode]}, ) # insert the transformations into the entity entity.add_transformation("staging", query) python_pipeline = Pipeline( name="python_pipeline", version="1.0.0", schedule="10 10 * * *", entities=[entity], kind=PipelineKind.VANILLA, start_time=datetime.now(tz=timezone.utc), )