def restart(
        name: str,
        start_time: dt.datetime = typer.Option(..., "--start_time", "-s"),
        end_time: dt.datetime = typer.Option(..., "--end_time", "-e"),
):
    for name_ in name.split(","):
        if start_time or end_time:
            # Apply timezone.
            for file_name, config in YamlHelper.iter_parse_file_from_dir(
                    FLOW_CONFIGS_DIR, match=name_):
                tz = config["work"]["schedule"]["timezone"]

                if start_time:
                    start_time = start_time.replace(
                        tzinfo=pendulum.timezone(tz))

                if end_time:
                    end_time = end_time.replace(tzinfo=pendulum.timezone(tz))

                break

        count = FlowItem.change_status(name_,
                                       new_status=FlowStatus.add,
                                       from_time=start_time,
                                       to_time=end_time)
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
def restart_errors(name: str):
    for name_ in name.split(","):
        count = FlowItem.change_status(
            name_,
            new_status=FlowStatus.add,
            filter_statuses=FlowStatus.error_statuses)
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
Exemple #3
0
def prepare_items_for_order(flow: "BaseOperator", start_period: dt.datetime,
                            end_period: dt.datetime):
    # The status is changed so that there is no repeated ordering of tasks.
    FlowItem.change_status(
        flow.notebook.name,
        new_status=Statuses.run,
        from_time=start_period,
        to_time=end_period,
    )
    if flow.Work.expires is not None:
        FlowItem.change_expires(
            flow.notebook.name,
            expires=flow.Work.expires,
            from_time=start_period,
            to_time=end_period,
        )

    yield
Exemple #4
0
def order_etl_flow(
    *, logger: Logger, async_mode: bool = False, dry_run: bool = False
) -> Iterator:
    """Prepare flow function to be sent to the queue and executed"""
    from flowmaster.operators.etl.service import ETLOperator
    from flowmaster.operators.etl.policy import ETLFlowConfig

    for file_name, config in YamlHelper.iter_parse_file_from_dir(
        FLOW_CONFIGS_DIR, match=".etl.flow"
    ):
        if dry_run:
            if config.get("provider") != "fakedata":
                continue

        try:
            flow_config = ETLFlowConfig(name=file_name, **config)
        except pydantic.ValidationError as exc:
            logger.error("ValidationError: '%s': %s", file_name, exc)
            continue
        except Exception as exc:
            logger.error("Error: '%s': %s", file_name, exc)
            continue

        work = ETLWork(flow_config)

        for start_period, end_period in work.iter_period_for_execute():
            etl_flow = ETLOperator(flow_config)
            etl_flow_iterator = etl_flow(
                start_period, end_period, async_mode=async_mode, dry_run=dry_run
            )

            # The status is changed so that there is no repeated ordering of tasks.
            FlowItem.change_status(
                etl_flow.name,
                new_status=FlowStatus.run,
                from_time=start_period,
                to_time=end_period,
            )
            logger.info(
                "Order ETL flow [%s]: %s %s", etl_flow.name, start_period, end_period
            )

            yield etl_flow_iterator
Exemple #5
0
def test_change_status():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 6, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    items = FlowItem.create_missing_items(
        flow_name=FLOW_NAME,
        start_time=worktime - dt.timedelta(5),
        end_time=worktime,
        interval_timedelta=interval_timedelta,
    )

    FlowItem.change_status(
        FLOW_NAME,
        new_status=FlowStatus.success,
        from_time=worktime - dt.timedelta(5),
        to_time=worktime,
    )

    assert FlowItem.count_items(FLOW_NAME, statuses=[FlowStatus.success]) == len(items)