Beispiel #1
0
 def test_from_dict(self, no_kind_dict):
     e = Entity.from_dict(no_kind_dict)
     assert e.name == entity_name
     assert e.version == entity_version
     assert e.kind is None
     assert e.stage_config == {
         "raw": {"table_1": "1.0.0", "table_2": "1.0.0"},
         "staging": {"table_3": "1.0.0", "table_4": "1.0.0"},
     }
Beispiel #2
0
    def test_add_view(self):
        entity = Entity(entity_name, entity_version)
        view = View("my_query", "SELECT * FROM TABLE", "1.0.0")
        entity.add_view("staging", view)
        assert entity.stage_config == {"staging": {view.name: view.version}}
        assert entity.transformations == {"staging": {view.name: view}}

        with pytest.raises(ConflictingNameError):
            entity.add_view("staging", view)
Beispiel #3
0
    def test_add_transformation(self):
        entity = Entity(entity_name, entity_version)
        transformation = Transformation("my_query", "SELECT * FROM TABLE", "1.0.0")
        entity.add_transformation("staging", transformation)
        assert entity.stage_config == {
            "staging": {transformation.name: transformation.version}
        }
        assert entity.transformations == {
            "staging": {transformation.name: transformation}
        }

        with pytest.raises(ConflictingNameError):
            entity.add_transformation("staging", transformation)
Beispiel #4
0
 def test_serialize_deserialize(self, no_kind_entity):
     d = no_kind_entity.to_dict()
     loaded = Entity.from_dict(d)
     assert loaded == no_kind_entity
Beispiel #5
0
 def no_kind_entity(self):
     stage_config = {
         "raw": {"table_1": "1.0.0", "table_2": "1.0.0"},
         "staging": {"table_3": "1.0.0", "table_4": "1.0.0"},
     }
     return Entity(entity_name, entity_version, stage_config=stage_config)
Beispiel #6
0
 def test_serialize_deserialize(self, my_entity):
     d = my_entity.to_dict()
     loaded = Entity.from_dict(d)
     assert loaded == my_entity
Beispiel #7
0
 def my_entity(self):
     stage_config = {
         "raw": {"table_1": "1.0.0", "table_2": "1.0.0"},
         "staging": {"table_1": "1.0.0", "table_2": "1.0.0"},
     }
     return Entity("entity1", "1.0.0", stage_config=stage_config)
Beispiel #8
0
# The first step is to define entities using the Flycs SDK
stage_config = {
    "raw": {
        "table_1": "1.0.0",
        "table_2": "1.0.0"
    },
    "staging": {
        "table_3": "1.0.0",
        "table_4": "1.0.0"
    },
    "data_warehouse": {
        "table_5": "1.1.0"
    },
}
entity1 = Entity(name="entity1", version="1.0.0", stage_config=stage_config)

# Once the entities are defined, we can create pipelines.
p1 = Pipeline(
    name="my_pipeline",
    version="1.0.0",
    schedule="* 12 * * *",  # this is using cron notation
    entities=[entity1],
    kind=PipelineKind.VANILLA,
    start_time=datetime.now(tz=timezone.utc),
)

# To be able to be discovered, the pipelines needs to be aggregated into a list called 'pipelines' located at the root of the module.
# make the pipelines available to be discovered by the rest of the Flycs ecosystem
pipelines = [p1]
Beispiel #9
0
mycode = CustomCode(
    name="my_custom_code",
    version="1.0.0",
    operator_builder=build,
    dependencies=[
        Dependency(ENTITY_NAME, "staging", query.name)
    ],  # use the dependencies argument to place the airflow operator at the right place in your DAG
    requirements=[
        "airflow==1.10.0",
    ],  # requirements let you define dependencies required by the build function
)

# define the entity
entity = Entity(
    name=ENTITY_NAME,
    version="1.0.0",
    custom_operators={"staging": [mycode]},
)
# insert the transformations into the entity
entity.add_transformation("staging", query)

python_pipeline = Pipeline(
    name="python_pipeline",
    version="1.0.0",
    schedule="10 10 * * *",
    entities=[entity],
    kind=PipelineKind.VANILLA,
    start_time=datetime.now(tz=timezone.utc),
)