Esempio n. 1
0
def test_attributes_to_csv():
    ya_direct_campaigns_to_csv_config.export.credentials = yandex_direct_credentials
    config = ETLFlowConfig(**ya_direct_campaigns_to_csv_config.dict())
    etl_flow = ETLOperator(config)
    list(
        etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1),
                 max_pages=2))
Esempio n. 2
0
def validate():
    from flowmaster.operators.etl.policy import ETLFlowConfig

    for file_name, config in YamlHelper.iter_parse_file_from_dir(
        setttings.FLOW_CONFIGS_DIR, match=".flow"
    ):
        ETLFlowConfig(name=file_name, **config)
        typer.echo(f"  {file_name} OK")
Esempio n. 3
0
def test_reports_to_clickhouse():
    ya_direct_report_to_clickhouse_config.export.credentials = yandex_direct_credentials
    ya_direct_report_to_clickhouse_config.load.credentials = clickhouse_credentials
    config = ETLFlowConfig(**ya_direct_report_to_clickhouse_config.dict())
    etl_flow = ETLOperator(config)
    list(
        etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1),
                 max_pages=2))
Esempio n. 4
0
def work_policy():
    return ETLFlowConfig.WorkPolicy(
        schedule=ETLFlowConfig.WorkPolicy.SchedulePolicy(
            timezone="Europe/Moscow",
            start_time="00:00:00",
            from_date=dt.date.today() - dt.timedelta(5),
            interval="daily",
        )
    )
Esempio n. 5
0
def sqlite_to_csv_config(tmp_path, work_policy, sqlite_export_policy, csv_transform_policy, csv_load_policy):
    return ETLFlowConfig(
        name="sqlite_to_csv",
        provider=SQLiteProvider.name,
        storage=CSVLoader.name,
        work=work_policy,
        export=sqlite_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Esempio n. 6
0
def config_csv_to_csv_with_columns(work_policy, csv_transform_policy, csv_export_policy, csv_load_policy):

    return ETLFlowConfig(
        name="csv_to_csv_with_columns",
        provider=CSVProvider.name,
        storage=CSVLoader.name,
        work=work_policy,
        export=csv_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
def test_codex_telegram():
    def export_func(start_period,
                    end_period) -> Iterator[tuple[dict, list, list]]:
        yield ({}, ["date"], [[start_period]])

    yml_visits_to_csv_config.work.notifications = ETLFlowConfig.WorkPolicy.NotificationsPolicy(
        codex_telegram=ETLFlowConfig.WorkPolicy.NotificationsPolicy.
        CodexTelegramPolicy(
            links=[credentials["codex_telegram"]],
            on_success=True,
        ))
    config = ETLFlowConfig(**dict(yml_visits_to_csv_config))

    YandexMetrikaLogsExport.__call__ = Mock(side_effect=export_func)
    etl_flow = ETLOperator(config)

    list(
        etl_flow(start_period=dt.datetime(2021, 1, 1),
                 end_period=dt.datetime(2021, 1, 1)))
Esempio n. 8
0
def order_etl_flow(
    *, logger: Logger, async_mode: bool = False, dry_run: bool = False
) -> Iterator:
    """Prepare flow function to be sent to the queue and executed"""
    from flowmaster.operators.etl.service import ETLOperator
    from flowmaster.operators.etl.policy import ETLFlowConfig

    for file_name, config in YamlHelper.iter_parse_file_from_dir(
        FLOW_CONFIGS_DIR, match=".etl.flow"
    ):
        if dry_run:
            if config.get("provider") != "fakedata":
                continue

        try:
            flow_config = ETLFlowConfig(name=file_name, **config)
        except pydantic.ValidationError as exc:
            logger.error("ValidationError: '%s': %s", file_name, exc)
            continue
        except Exception as exc:
            logger.error("Error: '%s': %s", file_name, exc)
            continue

        work = ETLWork(flow_config)

        for start_period, end_period in work.iter_period_for_execute():
            etl_flow = ETLOperator(flow_config)
            etl_flow_iterator = etl_flow(
                start_period, end_period, async_mode=async_mode, dry_run=dry_run
            )

            # The status is changed so that there is no repeated ordering of tasks.
            FlowItem.change_status(
                etl_flow.name,
                new_status=FlowStatus.run,
                from_time=start_period,
                to_time=end_period,
            )
            logger.info(
                "Order ETL flow [%s]: %s %s", etl_flow.name, start_period, end_period
            )

            yield etl_flow_iterator
Esempio n. 9
0
)
from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy

yml_visits_to_csv_config = ETLFlowConfig(
    name="ymlogs_to_csv",
    provider=YandexMetrikaLogsProvider.name,
    storage=CSVLoader.name,
    work=work_policy,
    export=YandexMetrikaLogsExportPolicy(
        credentials=YandexMetrikaLogsExportPolicy.CredentialsPolicy(
            counter_id=0, access_token="token"
        ),
        params=YandexMetrikaLogsExportPolicy.ParamsPolicy(
            source="visits",
            columns=[
                "ym:s:counterID",
                "ym:s:clientID",
                "ym:s:visitID",
                "ym:s:date",
                "ym:s:dateTime",
                "ym:s:lastTrafficSource",
                "ym:s:startURL",
                "ym:s:pageViews",
            ],
        ),
    ),
    transform=csv_transform_policy,
    load=csv_load_policy,
)

yml_visits_to_clickhouse_config = ETLFlowConfig(
    name="ymlogs_to_clickhouse",
Esempio n. 10
0
from flowmaster.operators.etl.providers import YandexDirectProvider
from flowmaster.operators.etl.providers.yandex_direct.policy import (
    YandexDirectExportPolicy as ExportPolicy, )
from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy

ya_direct_report_to_csv_config = ETLFlowConfig(
    name="ya_direct_report_to_csv",
    provider=YandexDirectProvider.name,
    storage=CSVLoader.name,
    work=work_policy,
    export=ExportPolicy(
        credentials=ExportPolicy.CredentialsPolicy(access_token="token"),
        resource="reports",
        headers=ExportPolicy.HeadersPolicy(return_money_in_micros=True),
        body=ExportPolicy.ReportBodyPolicy(
            params=ExportPolicy.ReportBodyPolicy.ReportParamsPolicy(
                ReportType="ACCOUNT_PERFORMANCE_REPORT",
                DateRangeType="AUTO",
                FieldNames=["CampaignType", "Cost"],
                IncludeVAT="NO",
                Page=ExportPolicy.ReportBodyPolicy.ReportParamsPolicy.
                PagePolicy(Limit=10),
            ), ),
    ),
    transform=csv_transform_policy,
    load=csv_load_policy,
)

ya_direct_report_to_clickhouse_config = ETLFlowConfig(
    **{
        **ya_direct_report_to_csv_config.dict(),
        **dict(
Esempio n. 11
0
from pathlib import Path

from flowmaster.operators.etl.loaders.csv.service import CSVLoader
from flowmaster.operators.etl.policy import ETLFlowConfig
from flowmaster.operators.etl.providers import FakeDataProvider
from flowmaster.operators.etl.providers.fakedata import FakeDataExportPolicy
from tests import get_tests_dir
from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy

FILE_TESTS_DIR = get_tests_dir() / "__test_files__"
Path.mkdir(FILE_TESTS_DIR, exist_ok=True)

fakedata_to_csv_config = ETLFlowConfig(
    name="fakedata_to_csv_config",
    provider=FakeDataProvider.name,
    storage=CSVLoader.name,
    work=work_policy,
    export=FakeDataExportPolicy(rows=1),
    transform=csv_transform_policy,
    load=csv_load_policy,
)
def test_flow_fakedata():
    config = ETLFlowConfig(**dict(fakedata_to_csv_config))
    etl_flow = ETLOperator(config)
    list(etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1)))