Esempio n. 1
0
def test_viable_source_provided_error(bad_config):
    with pytest.raises(
            TypeError,
            match=
            "Either staging_view_sql and source_table, or filename and fields must be provided.",
    ):
        create_dag(dag_id="TEST",
                   cdr_type="TEST",
                   start_date=datetime.now(),
                   extract_sql="DUMMY SQL",
                   **bad_config)
Esempio n. 2
0
def test_no_cluster_by_default():
    dag = create_dag(
        dag_id="TEST",
        cdr_type="TEST",
        start_date=datetime.now(),
        extract_sql="DUMMY SQL",
        staging_view_sql="DUMMY STAGING SQL",
        source_table="DUMMY_SOURCE_TABLE",
    )
    assert "cluster" not in dag.task_dict
Esempio n. 3
0
def test_source_table_macro_added():
    dag = create_dag(
        dag_id="TEST",
        cdr_type="TEST",
        start_date=datetime.now(),
        extract_sql="DUMMY SQL",
        staging_view_sql="DUMMY STAGING SQL",
        source_table="DUMMY_SOURCE_TABLE",
    )
    assert dag.user_defined_macros["source_table"] == "DUMMY_SOURCE_TABLE"
Esempio n. 4
0
def test_cluster_set_when_field_given():
    dag = create_dag(
        dag_id="TEST",
        cdr_type="TEST",
        start_date=datetime.now(),
        extract_sql="DUMMY SQL",
        staging_view_sql="DUMMY STAGING SQL",
        source_table="DUMMY_SOURCE_TABLE",
        cluster_field="DUMMY_FIELD",
    )
    assert "cluster" in dag.task_dict
Esempio n. 5
0
def test_inferred_op_types(args, expected_view_type, expected_extract_type,
                           expected_flux_sensor_type):
    dag = create_dag(dag_id="TEST",
                     cdr_type="TEST",
                     start_date=datetime.now(),
                     extract_sql="DUMMY SQL",
                     **args)
    assert dag.task_dict[
        "create_staging_view"].__class__.__name__ == expected_view_type
    assert dag.task_dict["extract"].__class__.__name__ == expected_extract_type
    assert (dag.task_dict["check_not_in_flux"].__class__.__name__ ==
            expected_flux_sensor_type)
Esempio n. 6
0
def test_flux_check_can_be_disabled(args):
    """
    Flux check task is not present when use_flux_sensor=True
    """
    dag = create_dag(
        dag_id="TEST",
        cdr_type="TEST",
        start_date=datetime.now(),
        extract_sql="DUMMY SQL",
        use_flux_sensor=False,
        **args
    )
    assert "check_not_in_flux" not in dag.task_dict
Esempio n. 7
0
def test_use_file_flux_sensor_deprecated():
    """
    Setting 'use_file_flux_sensor=False' produces deprecation warning and uses TableFluxSensor
    """
    with pytest.deprecated_call(
        match="The 'use_file_flux_sensor' argument is deprecated. Set use_flux_sensor='table' instead."
    ):
        dag = create_dag(
            dag_id="TEST",
            cdr_type="TEST",
            start_date=datetime.now(),
            extract_sql="DUMMY SQL",
            filename="DUMMY FILE PATTERN",
            fields=dict(DUMMY_FIELD="DUMMY_TYPE"),
            use_file_flux_sensor=False,
        )
    assert dag.task_dict["check_not_in_flux"].__class__.__name__ == "TableFluxSensor"
Esempio n. 8
0
def test_invalid_flux_sensor_error():
    """
    Attempting to use file flux sensor when not extracting from a file raises an error
    """
    with pytest.raises(
        ValueError,
        match="File flux sensor can only be used when loading from a file.",
    ):
        dag = create_dag(
            dag_id="TEST",
            cdr_type="TEST",
            start_date=datetime.now(),
            extract_sql="DUMMY SQL",
            staging_view_sql="DUMMY STAGING SQL",
            source_table="DUMMY_SOURCE_TABLE",
            use_flux_sensor="file",
        )
Esempio n. 9
0
def test_choose_flux_sensor_type(use_flux_sensor, expected_flux_sensor_type):
    """
    Type of flux check is set according to the 'use_flux_check' argument
    """
    dag = create_dag(
        dag_id="TEST",
        cdr_type="TEST",
        start_date=datetime.now(),
        extract_sql="DUMMY SQL",
        filename="DUMMY FILE PATTERN",
        fields=dict(DUMMY_FIELD="DUMMY_TYPE"),
        use_flux_sensor=use_flux_sensor,
    )
    assert (
        dag.task_dict["check_not_in_flux"].__class__.__name__
        == expected_flux_sensor_type
    )
Esempio n. 10
0
from datetime import datetime, timedelta

from airflow import DAG
from flowetl.util import create_dag

dag = create_dag(
    dag_id="filesystem_dag",
    schedule_interval=None,
    retries=0,
    retry_delay=timedelta(days=1),
    start_date=datetime(2016, 3, 1),
    end_date=datetime(2016, 6, 17),
    cdr_type="calls",
    data_present_poke_interval=5,
    flux_check_poke_interval=5,
    flux_check_wait_interval=5,
    extract_sql="extract.sql",
    indexes=["msisdn_counterpart", "location_id", "datetime", "tac"],
    cluster_field="msisdn",
    program="zcat",
    filename="/files/{{ params.cdr_type }}_{{ ds_nodash }}.csv.gz",
    fields={
        "msisdn": "TEXT",
        "cell_id": "TEXT",
        "event_time": "TIMESTAMPTZ",
    },
    null="Undefined",
)

dag.is_paused_upon_creation = False
Esempio n. 11
0
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Test dags for use in integration tests.
"""

from datetime import datetime, timedelta

from airflow import DAG
from flowetl.util import create_dag

dag = create_dag(
    dag_id="calls",
    schedule_interval="@daily",
    retries=0,
    retry_delay=timedelta(days=1),
    start_date=datetime(2019, 12, 21),
    end_date=datetime(2019, 12, 27),
    cdr_type="calls",
    data_present_poke_interval=5,
    flux_check_poke_interval=5,
    flux_check_wait_interval=5,
    extract_sql="extract_calls.sql",
    indexes=["msisdn_counterpart", "location_id", "datetime", "tac"],
    cluster_field="msisdn",
    source_table="sample_data_fdw",
    staging_view_sql="stage_calls.sql",
    null="Undefined",
)
Esempio n. 12
0
Test dags for use in integration tests.
"""

from datetime import datetime, timedelta

from airflow import DAG
from flowetl.util import create_dag

dag = create_dag(
    dag_id="sms",
    schedule_interval="@daily",
    retries=0,
    retry_delay=timedelta(days=1),
    start_date=datetime(2019, 1, 1),
    end_date=datetime(2019, 1, 2),
    cdr_type="sms",
    data_present_poke_interval=5,
    flux_check_poke_interval=5,
    flux_check_wait_interval=5,
    extract_sql="extract_sms.sql",
    indexes=["msisdn_counterpart", "location_id", "datetime", "tac"],
    cluster_field="msisdn",
    filename="/mounts/files/{{ params.cdr_type.upper() }}_{{ ds_nodash }}.csv",
    fields={
        "msisdn": "TEXT",
        "event_time": "TIMESTAMPTZ",
        "cell_id": "TEXT",
    },
    null="Undefined",
)