def test_attributes_to_csv(): ya_direct_campaigns_to_csv_config.export.credentials = yandex_direct_credentials config = ETLFlowConfig(**ya_direct_campaigns_to_csv_config.dict()) etl_flow = ETLOperator(config) list( etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1), max_pages=2))
def validate(): from flowmaster.operators.etl.policy import ETLFlowConfig for file_name, config in YamlHelper.iter_parse_file_from_dir( setttings.FLOW_CONFIGS_DIR, match=".flow" ): ETLFlowConfig(name=file_name, **config) typer.echo(f" {file_name} OK")
def test_reports_to_clickhouse(): ya_direct_report_to_clickhouse_config.export.credentials = yandex_direct_credentials ya_direct_report_to_clickhouse_config.load.credentials = clickhouse_credentials config = ETLFlowConfig(**ya_direct_report_to_clickhouse_config.dict()) etl_flow = ETLOperator(config) list( etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1), max_pages=2))
def work_policy(): return ETLFlowConfig.WorkPolicy( schedule=ETLFlowConfig.WorkPolicy.SchedulePolicy( timezone="Europe/Moscow", start_time="00:00:00", from_date=dt.date.today() - dt.timedelta(5), interval="daily", ) )
def sqlite_to_csv_config(tmp_path, work_policy, sqlite_export_policy, csv_transform_policy, csv_load_policy): return ETLFlowConfig( name="sqlite_to_csv", provider=SQLiteProvider.name, storage=CSVLoader.name, work=work_policy, export=sqlite_export_policy, transform=csv_transform_policy, load=csv_load_policy, )
def config_csv_to_csv_with_columns(work_policy, csv_transform_policy, csv_export_policy, csv_load_policy): return ETLFlowConfig( name="csv_to_csv_with_columns", provider=CSVProvider.name, storage=CSVLoader.name, work=work_policy, export=csv_export_policy, transform=csv_transform_policy, load=csv_load_policy, )
def test_codex_telegram(): def export_func(start_period, end_period) -> Iterator[tuple[dict, list, list]]: yield ({}, ["date"], [[start_period]]) yml_visits_to_csv_config.work.notifications = ETLFlowConfig.WorkPolicy.NotificationsPolicy( codex_telegram=ETLFlowConfig.WorkPolicy.NotificationsPolicy. CodexTelegramPolicy( links=[credentials["codex_telegram"]], on_success=True, )) config = ETLFlowConfig(**dict(yml_visits_to_csv_config)) YandexMetrikaLogsExport.__call__ = Mock(side_effect=export_func) etl_flow = ETLOperator(config) list( etl_flow(start_period=dt.datetime(2021, 1, 1), end_period=dt.datetime(2021, 1, 1)))
def order_etl_flow( *, logger: Logger, async_mode: bool = False, dry_run: bool = False ) -> Iterator: """Prepare flow function to be sent to the queue and executed""" from flowmaster.operators.etl.service import ETLOperator from flowmaster.operators.etl.policy import ETLFlowConfig for file_name, config in YamlHelper.iter_parse_file_from_dir( FLOW_CONFIGS_DIR, match=".etl.flow" ): if dry_run: if config.get("provider") != "fakedata": continue try: flow_config = ETLFlowConfig(name=file_name, **config) except pydantic.ValidationError as exc: logger.error("ValidationError: '%s': %s", file_name, exc) continue except Exception as exc: logger.error("Error: '%s': %s", file_name, exc) continue work = ETLWork(flow_config) for start_period, end_period in work.iter_period_for_execute(): etl_flow = ETLOperator(flow_config) etl_flow_iterator = etl_flow( start_period, end_period, async_mode=async_mode, dry_run=dry_run ) # The status is changed so that there is no repeated ordering of tasks. FlowItem.change_status( etl_flow.name, new_status=FlowStatus.run, from_time=start_period, to_time=end_period, ) logger.info( "Order ETL flow [%s]: %s %s", etl_flow.name, start_period, end_period ) yield etl_flow_iterator
) from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy yml_visits_to_csv_config = ETLFlowConfig( name="ymlogs_to_csv", provider=YandexMetrikaLogsProvider.name, storage=CSVLoader.name, work=work_policy, export=YandexMetrikaLogsExportPolicy( credentials=YandexMetrikaLogsExportPolicy.CredentialsPolicy( counter_id=0, access_token="token" ), params=YandexMetrikaLogsExportPolicy.ParamsPolicy( source="visits", columns=[ "ym:s:counterID", "ym:s:clientID", "ym:s:visitID", "ym:s:date", "ym:s:dateTime", "ym:s:lastTrafficSource", "ym:s:startURL", "ym:s:pageViews", ], ), ), transform=csv_transform_policy, load=csv_load_policy, ) yml_visits_to_clickhouse_config = ETLFlowConfig( name="ymlogs_to_clickhouse",
from flowmaster.operators.etl.providers import YandexDirectProvider from flowmaster.operators.etl.providers.yandex_direct.policy import ( YandexDirectExportPolicy as ExportPolicy, ) from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy ya_direct_report_to_csv_config = ETLFlowConfig( name="ya_direct_report_to_csv", provider=YandexDirectProvider.name, storage=CSVLoader.name, work=work_policy, export=ExportPolicy( credentials=ExportPolicy.CredentialsPolicy(access_token="token"), resource="reports", headers=ExportPolicy.HeadersPolicy(return_money_in_micros=True), body=ExportPolicy.ReportBodyPolicy( params=ExportPolicy.ReportBodyPolicy.ReportParamsPolicy( ReportType="ACCOUNT_PERFORMANCE_REPORT", DateRangeType="AUTO", FieldNames=["CampaignType", "Cost"], IncludeVAT="NO", Page=ExportPolicy.ReportBodyPolicy.ReportParamsPolicy. PagePolicy(Limit=10), ), ), ), transform=csv_transform_policy, load=csv_load_policy, ) ya_direct_report_to_clickhouse_config = ETLFlowConfig( **{ **ya_direct_report_to_csv_config.dict(), **dict(
from pathlib import Path from flowmaster.operators.etl.loaders.csv.service import CSVLoader from flowmaster.operators.etl.policy import ETLFlowConfig from flowmaster.operators.etl.providers import FakeDataProvider from flowmaster.operators.etl.providers.fakedata import FakeDataExportPolicy from tests import get_tests_dir from tests.fixtures import work_policy, csv_load_policy, csv_transform_policy FILE_TESTS_DIR = get_tests_dir() / "__test_files__" Path.mkdir(FILE_TESTS_DIR, exist_ok=True) fakedata_to_csv_config = ETLFlowConfig( name="fakedata_to_csv_config", provider=FakeDataProvider.name, storage=CSVLoader.name, work=work_policy, export=FakeDataExportPolicy(rows=1), transform=csv_transform_policy, load=csv_load_policy, )
def test_flow_fakedata(): config = ETLFlowConfig(**dict(fakedata_to_csv_config)) etl_flow = ETLOperator(config) list(etl_flow(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1)))