Example #1
0
def flowitem_model():
    from flowmaster.models import FlowItem

    FlowItem.name_for_test = "__fm_test__"
    FlowItem.clear("__fm_test__")

    return FlowItem
Example #2
0
def test_create_next_execute_item(flowitem_model):
    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    item = FlowItem.create_next_execute_item(
        flow_name=flowitem_model.name_for_test,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert item is None

    FlowItem.create(
        **{
            FlowItem.name.name: flowitem_model.name_for_test,
            FlowItem.worktime.name: worktime - dt.timedelta(1),
        })
    item = FlowItem.create_next_execute_item(
        flow_name=flowitem_model.name_for_test,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert item

    item = FlowItem.create_next_execute_item(
        flow_name=flowitem_model.name_for_test,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )
    assert item is None
Example #3
0
async def notebooks_view(request: Request):
    # TODO: Add pagination
    count_statuses_map = {
        (item.name, item.status): item.count
        for item in FlowItem.count_items_by_name_and_status()
    }
    count_names_map = {
        item.name: item.count
        for item in FlowItem.count_items_by_name()
    }
    notebooks = []

    for name in iter_active_notebook_filenames():
        data = {"name": name, "is_archive": False}
        validate, *args = get_notebook(name)

        data["count"] = count_names_map.get(name, 0)
        data["count_errors"] = sum(
            count_statuses_map.get((name, status), 0)
            for status in Statuses.error_statuses)
        data["count_fatal_errors"] = count_statuses_map.get(
            (name, Statuses.fatal_error), 0)
        data["validate"] = validate
        notebooks.append(data)

    for name in iter_archive_notebook_filenames():
        data = {"name": name, "is_archive": True, "validate": True}
        notebooks.append(data)

    return templates.TemplateResponse("/pages/notebooks.html",
                                      context={
                                          "request": request,
                                          "notebooks": notebooks
                                      })
Example #4
0
async def log_view(name: str, worktime_for_url: str, request: Request):
    import re

    item: FlowItem = FlowItem.get_or_none(
        **{
            FlowItem.name.name: name,
            FlowItem.worktime.name: FlowItem.worktime_from_url(
                worktime_for_url),
        })
    if item.logpath:
        if pathlib.Path.exists(pathlib.Path(item.logpath)):
            with open(item.logpath, "r", encoding="UTF8") as f:
                logtext = f.read()
                logtext = re.sub(r"\[\d\dm|\[\dm", "", logtext)
        else:
            logtext = "Logs not found: 'Logs file missing'"
    else:
        logtext = "Logs not found: 'Logs path missing'"

    return templates.TemplateResponse(
        "/pages/log.html",
        context={
            "request": request,
            "content": logtext,
            "filepath": item.logpath,
        },
    )
Example #5
0
def test_allow_execute_flow():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 6, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)
    worktime_list = iter_range_datetime(
        start_time=worktime - dt.timedelta(3),
        end_time=worktime,
        timedelta=interval_timedelta,
    )

    FlowItem.create_items(
        flow_name=FLOW_NAME,
        worktime_list=worktime_list,
        status=Statuses.fatal_error,
        notebook_hash="",
    )
    assert (FlowItem.allow_execute_flow(
        FLOW_NAME, notebook_hash="", max_fatal_errors=3) is False)
    assert (FlowItem.allow_execute_flow(
        FLOW_NAME, notebook_hash="new", max_fatal_errors=3) is True)

    FlowItem.recreate_prev_items(
        flow_name=FLOW_NAME,
        worktime=worktime,
        offset_periods=10,
        interval_timedelta=interval_timedelta,
    )
    assert (FlowItem.allow_execute_flow(
        FLOW_NAME, notebook_hash="", max_fatal_errors=3) is True)
Example #6
0
def prepare_items(dry_run: bool = False):
    typer.echo("\n===================" "\nFlowMaster" "\n===================\n")

    from flowmaster.models import FlowItem

    # Clearing statuses for unfulfilled flows.
    FlowItem.clear_statuses_of_lost_items()

    if dry_run:
        typer.echo(f"Dry-run mode!")
        FlowItem.clear("fakedata.etl.flow")
Example #7
0
def prepare_for_run(dry_run: bool = False):
    init()

    typer.echo("\n==================="
               "\nFlowMaster"
               "\n===================\n")

    from flowmaster.models import FlowItem

    # Clearing statuses for unfulfilled flows.
    FlowItem.clear_statuses_of_lost_items()

    if dry_run:
        FlowItem.delete().where("fakedata.etl.flow" in FlowItem.name).execute()
Example #8
0
def test_create_history_items():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    item = FlowItem.create_missing_items(
        flow_name=FLOW_NAME,
        start_time=worktime - dt.timedelta(5),
        end_time=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert len(item) == 6
Example #9
0
def test_create_missing_items():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 6, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    items = FlowItem.create_missing_items(
        flow_name=FLOW_NAME,
        start_time=worktime - dt.timedelta(5),
        end_time=worktime - dt.timedelta(5),
        interval_timedelta=interval_timedelta,
    )

    assert len(items) == 1

    FlowItem.create(**{FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: worktime})

    FlowItem.create_missing_items(
        flow_name=FLOW_NAME,
        start_time=worktime - dt.timedelta(5),
        end_time=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert (
        FlowItem.select()
        .where(FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add)
        .count()
    ) == 6
Example #10
0
def list_items(name: str, limit: int = 20):
    for i in FlowItem.iter_items(name, limit=limit):
        msg_parts = [
            f'  {i.worktime.strftime("%Y-%m-%dT%T").replace("T00:00:00", "")}  ',
            f"{i.status}  ",
            f"retries={i.retries}  ",
            f"duration={i.duration}  ",
            typer.style(f"log={i.info}", fg=typer.colors.WHITE)
            if i.info else "",
        ]

        if i.status in Statuses.error_statuses:
            msg_parts[1] = typer.style(msg_parts[1],
                                       fg=typer.colors.RED,
                                       bold=True)
        elif i.status == Statuses.add:
            msg_parts[1] = typer.style(msg_parts[1],
                                       fg=typer.colors.WHITE,
                                       bold=True)
        elif i.status == Statuses.run:
            msg_parts[1] = typer.style(msg_parts[1],
                                       fg=typer.colors.YELLOW,
                                       bold=True)
        elif i.status == Statuses.success:
            msg_parts[1] = typer.style(msg_parts[1],
                                       fg=typer.colors.GREEN,
                                       bold=True)

        typer.echo("".join(msg_parts))
Example #11
0
def restart_errors(name: str):
    for name_ in name.split(","):
        count = len(
            FlowItem.recreate_items(name,
                                    filter_statuses=Statuses.error_statuses))
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
Example #12
0
def restart(
        name: str,
        start_time: dt.datetime = typer.Option(..., "--start_time", "-s"),
        end_time: dt.datetime = typer.Option(..., "--end_time", "-e"),
):
    for name_ in name.split(","):
        if start_time or end_time:
            # Apply timezone.
            for file_name, config in YamlHelper.iter_parse_file_from_dir(
                    FLOW_CONFIGS_DIR, match=name_):
                tz = config["work"]["schedule"]["timezone"]

                if start_time:
                    start_time = start_time.replace(
                        tzinfo=pendulum.timezone(tz))

                if end_time:
                    end_time = end_time.replace(tzinfo=pendulum.timezone(tz))

                break

        count = FlowItem.change_status(name_,
                                       new_status=FlowStatus.add,
                                       from_time=start_time,
                                       to_time=end_time)
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
Example #13
0
def restart_errors(name: str):
    for name_ in name.split(","):
        count = FlowItem.change_status(
            name_,
            new_status=FlowStatus.add,
            filter_statuses=FlowStatus.error_statuses)
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
Example #14
0
def test_create_next_execute_item():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    item = FlowItem.create_next_execute_item(
        flow_name=FLOW_NAME,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert item is None

    FlowItem.create(
        **{
            FlowItem.name.name: FLOW_NAME,
            FlowItem.worktime.name: worktime - dt.timedelta(1),
        }
    )
    item = FlowItem.create_next_execute_item(
        flow_name=FLOW_NAME,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )

    assert item

    item = FlowItem.create_next_execute_item(
        flow_name=FLOW_NAME,
        worktime=worktime,
        interval_timedelta=interval_timedelta,
    )
    assert item is None
Example #15
0
def test_create_update_error_items():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    items = FlowItem.recreate_prev_items(
        flow_name=FLOW_NAME,
        worktime=worktime,
        offset_periods=[-1, -2],
        interval_timedelta=interval_timedelta,
    )

    assert items is None

    for i in range(10):
        FlowItem.create(
            **{
                FlowItem.name.name: FLOW_NAME,
                FlowItem.worktime.name: worktime - dt.timedelta(i),
                FlowItem.status.name: FlowStatus.error,
            }
        )

    items = FlowItem.recreate_prev_items(
        flow_name=FLOW_NAME,
        worktime=worktime,
        offset_periods=[-1, -2],
        interval_timedelta=interval_timedelta,
    )

    assert len(items) == 2
    assert FlowItem.count_items(FLOW_NAME, statuses=[FlowStatus.add]) == 2
    for i in items:
        assert i.retries == 0
Example #16
0
def prepare_items_for_order(flow: "BaseOperator", start_period: dt.datetime,
                            end_period: dt.datetime):
    # The status is changed so that there is no repeated ordering of tasks.
    FlowItem.change_status(
        flow.notebook.name,
        new_status=Statuses.run,
        from_time=start_period,
        to_time=end_period,
    )
    if flow.Work.expires is not None:
        FlowItem.change_expires(
            flow.notebook.name,
            expires=flow.Work.expires,
            from_time=start_period,
            to_time=end_period,
        )

    yield
Example #17
0
async def tasks_view(name: str, request: Request):
    # TODO: Add pagination
    return templates.TemplateResponse(
        "/pages/tasks.html",
        context={
            "request": request,
            "tasks": FlowItem.iter_items(name, limit=1000, offset=0),
        },
    )
def test_order_flow_with_period_length():
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    CONFIG.work.schedule = ETLFlowConfig.WorkPolicy.SchedulePolicy(
        timezone="Europe/Moscow",
        start_time="00:00:00",
        from_date=dt.date.today() - dt.timedelta(5),
        interval="daily",
        period_length=2,
    )
    config = dict(CONFIG)
    config.pop("name")

    rv = [(FLOW_NAME, config)]
    YamlHelper.iter_parse_file_from_dir = mock.Mock(return_value=rv)

    flows = list(order_flow(logger=logger))

    assert len(flows) == 3
Example #19
0
def order_etl_flow(
    *, logger: Logger, async_mode: bool = False, dry_run: bool = False
) -> Iterator:
    """Prepare flow function to be sent to the queue and executed"""
    from flowmaster.operators.etl.service import ETLOperator
    from flowmaster.operators.etl.policy import ETLFlowConfig

    for file_name, config in YamlHelper.iter_parse_file_from_dir(
        FLOW_CONFIGS_DIR, match=".etl.flow"
    ):
        if dry_run:
            if config.get("provider") != "fakedata":
                continue

        try:
            flow_config = ETLFlowConfig(name=file_name, **config)
        except pydantic.ValidationError as exc:
            logger.error("ValidationError: '%s': %s", file_name, exc)
            continue
        except Exception as exc:
            logger.error("Error: '%s': %s", file_name, exc)
            continue

        work = ETLWork(flow_config)

        for start_period, end_period in work.iter_period_for_execute():
            etl_flow = ETLOperator(flow_config)
            etl_flow_iterator = etl_flow(
                start_period, end_period, async_mode=async_mode, dry_run=dry_run
            )

            # The status is changed so that there is no repeated ordering of tasks.
            FlowItem.change_status(
                etl_flow.name,
                new_status=FlowStatus.run,
                from_time=start_period,
                to_time=end_period,
            )
            logger.info(
                "Order ETL flow [%s]: %s %s", etl_flow.name, start_period, end_period
            )

            yield etl_flow_iterator
Example #20
0
def errors():
    for name in list_notebook():
        count = FlowItem.count_items(name, statuses=[Statuses.error_statuses])
        if count > 0:
            count_text = typer.style(count, fg=typer.colors.RED, bold=True)
        else:
            count_text = typer.style(count, fg=typer.colors.GREEN, bold=True)

        name = typer.style(name, fg=typer.colors.WHITE, bold=True)
        typer.echo(f"  {name} {count_text}")
Example #21
0
def test_local_executor():
    config = fakedata_to_csv_config.dict()
    config.pop("name")
    YamlHelper.iter_parse_file_from_dir = mock.Mock(
        return_value=(("test_local_executor", config), ))

    start_executor(orders=1, dry_run=True)

    items = list(FlowItem.iter_items("test_local_executor"))

    assert len(items) == 5
Example #22
0
def restart(
        name: str,
        from_time: dt.datetime = typer.Option(..., "--from_time", "-s"),
        to_time: dt.datetime = typer.Option(..., "--to_time", "-e"),
):
    for name_ in name.split(","):
        count = len(
            FlowItem.recreate_items(name, from_time=from_time,
                                    to_time=to_time))
        typer.secho(
            f"  {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
Example #23
0
def test_items_for_execute_seconds_interval_without_keep_sequence(
        flowitem_model):
    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")

    FlowItem.create_items(flowitem_model.name_for_test,
                          worktime_list=[worktime - dt.timedelta(minutes=4)],
                          **{flowitem_model.status.name: Statuses.success})

    items = FlowItem.get_items_for_execute(
        flow_name=flowitem_model.name_for_test,
        worktime=worktime,
        start_time=worktime - dt.timedelta(minutes=10),
        interval_timedelta=dt.timedelta(minutes=1),
        keep_sequence=False,
        retries=0,
        retry_delay=0,
        notebook_hash="",
        max_fatal_errors=3,
    )

    assert len(items) == 1
Example #24
0
def test_create_update_items_start_time_equals_worktime():
    """Checking when the update date is equals the first worktime."""
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")
    interval_timedelta = dt.timedelta(1)

    FlowItem.create(
        **{
            FlowItem.name.name: FLOW_NAME,
            FlowItem.worktime.name: worktime,
            FlowItem.status.name: Statuses.error,
        })
    items = FlowItem.recreate_prev_items(
        flow_name=FLOW_NAME,
        worktime=worktime,
        offset_periods=[-1, -2, -3],
        interval_timedelta=interval_timedelta,
    )

    assert len(items) == 0
Example #25
0
    def resource_items(self, start_period, end_period,
                       **kwargs) -> Iterator[ExportContext]:
        query: peewee.ModelSelect = FlowItem.select()

        if self.export.export_mode == "by_date":
            query = query.where(FlowItem.worktime >= start_period,
                                FlowItem.worktime <= end_period)

        yield ExportContext(
            columns=self.export.columns,
            data=list(query.dicts()),
            data_orient=DataOrient.dict,
        )
Example #26
0
def list_errors(name: str, limit: int = 1000):
    for i in FlowItem.iter_items(name, limit=limit):
        if i.status in Statuses.error_statuses:
            msg_parts = [
                f'  {i.worktime.strftime("%Y-%m-%dT%T").replace("T00:00:00", "")}  ',
                typer.style(f"{i.status}  ", fg=typer.colors.RED, bold=True),
                f"retries={i.retries}  ",
                f"duration={i.duration}  ",
                typer.style(f"log={i.info}", fg=typer.colors.WHITE)
                if i.info else "",
            ]

            typer.echo("".join(msg_parts))
Example #27
0
def test_retries(create_retries, retries, result, pendulum_utctoday,
                 flowitem_model):
    name = "__test_retries__"
    flowitem_model.clear(name)
    flowitem_model.create(
        **{
            FlowItem.name.name: name,
            FlowItem.worktime.name: pendulum_utctoday,
            FlowItem.finished_utc.name: pendulum_utctoday,
            FlowItem.status.name: Statuses.error,
            FlowItem.retries.name: create_retries,
        })
    items = FlowItem.retry_error_items(name, retries=retries, retry_delay=0)

    assert len(items) == int(result)
Example #28
0
def test_items_for_execute_seconds_interval_with_keep_sequence(flowitem_model):
    worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow")

    items = FlowItem.get_items_for_execute(
        flow_name=flowitem_model.name_for_test,
        worktime=worktime,
        start_time=worktime - dt.timedelta(minutes=9),
        interval_timedelta=dt.timedelta(minutes=1),
        keep_sequence=True,
        retries=2,
        retry_delay=0,
        notebook_hash="",
        max_fatal_errors=1,
    )

    assert len(items) == 10
Example #29
0
def test_retries(create_retries, retries, result):
    # TODO: Не работает с started_utc=None
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    FlowItem.create(
        **{
            FlowItem.name.name: FLOW_NAME,
            FlowItem.worktime.name: pendulum.datetime(2020, 1, 1, tz="Europe/Moscow"),
            FlowItem.started_utc.name: dt.datetime(2020, 1, 1),
            FlowItem.status.name: FlowStatus.error,
            FlowItem.retries.name: create_retries,
        }
    )

    FlowItem.retry_error_items(FLOW_NAME, retries=retries, retry_delay=60)

    items = FlowItem.select().where(
        FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add
    )

    assert len(items) == int(result)
Example #30
0
def test_retry_delay(retry_delay, passed_sec, is_run):
    # TODO: Не работает с started_utc=None
    FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute()

    FlowItem.create(
        **{
            FlowItem.name.name: FLOW_NAME,
            FlowItem.worktime.name: pendulum.datetime(2020, 1, 1, tz="Europe/Moscow"),
            FlowItem.started_utc.name: dt.datetime(2020, 1, 1, 0, 0, 0),
            FlowItem.status.name: FlowStatus.error,
            FlowItem.retries.name: 0,
        }
    )

    FlowItem.get_utcnow = Mock(return_value=dt.datetime(2020, 1, 1, 0, 0, passed_sec))

    FlowItem.retry_error_items(FLOW_NAME, retries=1, retry_delay=retry_delay)

    items = FlowItem.select().where(
        FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add
    )

    assert len(items) == int(is_run)