コード例 #1
0
def test_construct_etl_dag_with_production_callables():
    """
    Make sure that the DAG returned has the correct task callables as
    specified in the task_callable_mapping argument. Use PRODUCTION_ETL_TASK_CALLABLES
    mapping.
    """
    default_args = {"owner": "bob", "start_date": parse("1900-01-01")}
    task_callable_mapping = {
        t: Mock(wraps=v)
        for (t, v) in PRODUCTION_ETL_TASK_CALLABLES.items()
    }
    cdr_type = "spaghetti"

    dag = construct_etl_dag(**task_callable_mapping,
                            default_args=default_args,
                            cdr_type=cdr_type)

    assert dag.dag_id == f"etl_{cdr_type}"

    dag_task_callable_mapping = {
        t.task_id: t.python_callable
        for t in dag.tasks if isinstance(t, PythonOperator)
    }
    expected_dag_task_callable_mapping = {
        t.task_id: PRODUCTION_ETL_TASK_CALLABLES[t.task_id](
            task_id=t.task_id).python_callable
        for t in dag.tasks if isinstance(t, PythonOperator)
    }
    assert dag_task_callable_mapping == expected_dag_task_callable_mapping
    [t.assert_called_once() for _, t in task_callable_mapping.items()]
コード例 #2
0
def test_construct_etl_dag_fails_with_no_start_date():
    """
    Make sure we get an exception if default_args does not contain a start_date
    """
    default_args = {"owner": "bob"}
    task_callable_mapping = TEST_ETL_TASK_CALLABLES
    cdr_type = "spaghetti"

    # pylint: disable=unused-variable
    with pytest.raises(AirflowException):
        dag = construct_etl_dag(**task_callable_mapping,
                                default_args=default_args,
                                cdr_type=cdr_type)
コード例 #3
0
def test_construct_etl_dag_fails_with_bad_start_date():
    """
    If the start_date is not a valid date we get an error
    """
    default_args = {"owner": "bob", "start_date": "bob_time"}
    task_callable_mapping = TEST_ETL_TASK_CALLABLES
    cdr_type = "spaghetti"

    # pylint: disable=unused-variable
    with pytest.raises(ParserError):
        dag = construct_etl_dag(**task_callable_mapping,
                                default_args=default_args,
                                cdr_type=cdr_type)
コード例 #4
0
def test_construct_etl_dag_fails_with_incorrect_mapping_keys():
    """
    If the dictionary we pass to task_callable_mapping does not have
    correct keys we get a TypeError.
    """
    default_args = {"owner": "bob", "start_date": "bob_time"}
    task_callable_mapping = {}
    cdr_type = "spaghetti"

    # pylint: disable=unused-variable
    with pytest.raises(TypeError):
        dag = construct_etl_dag(**task_callable_mapping,
                                default_args=default_args,
                                cdr_type=cdr_type)
コード例 #5
0
def test_construct_etl_dag_with_no_owner_defaults_to_airflow():
    """
    Make sure that if we pass no owner in default_args the owner is
    Airflow.
    """
    default_args = {"start_date": parse("1900-01-01")}
    task_callable_mapping = TEST_ETL_TASK_CALLABLES
    cdr_type = "spaghetti"

    dag = construct_etl_dag(**task_callable_mapping,
                            default_args=default_args,
                            cdr_type=cdr_type)

    assert dag.owner == "Airflow"
コード例 #6
0
ファイル: etl.py プロジェクト: cristianchagalj/FlowKit
default_args = {"owner": "flowminder", "start_date": parse("1900-01-01")}

ETL_TASK_CALLABLES = {
    "testing": TEST_ETL_TASK_CALLABLES,
    "production": PRODUCTION_ETL_TASK_CALLABLES,
}

flowetl_runtime_config = os.environ.get("FLOWETL_RUNTIME_CONFIG", "production")

# Determine if we are in a testing environment - use dummy callables if so
if flowetl_runtime_config == "testing":
    task_callable_mapping = TEST_ETL_TASK_CALLABLES
    logger.info("running in testing environment")

    dag = construct_etl_dag(**task_callable_mapping,
                            default_args=default_args,
                            cdr_type="testing")
elif flowetl_runtime_config == "production":
    task_callable_mapping = PRODUCTION_ETL_TASK_CALLABLES
    logger.info("running in production environment")

    # read and validate the config file before creating the DAGs
    global_config_dict = get_config_from_file(
        config_filepath=Path("/mounts/config/config.yml"))
    validate_config(global_config_dict=global_config_dict)

    default_args = global_config_dict["default_args"]

    # create DAG for each cdr_type
    for cdr_type in CDRType:
        # Ensure `cdr_type` is a string (e.g. "sms", instead of the raw value `CDRType.SMS`)