def flowitem_model(): from flowmaster.models import FlowItem FlowItem.name_for_test = "__fm_test__" FlowItem.clear("__fm_test__") return FlowItem
def test_create_next_execute_item(flowitem_model): worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) item = FlowItem.create_next_execute_item( flow_name=flowitem_model.name_for_test, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item is None FlowItem.create( **{ FlowItem.name.name: flowitem_model.name_for_test, FlowItem.worktime.name: worktime - dt.timedelta(1), }) item = FlowItem.create_next_execute_item( flow_name=flowitem_model.name_for_test, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item item = FlowItem.create_next_execute_item( flow_name=flowitem_model.name_for_test, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item is None
async def notebooks_view(request: Request): # TODO: Add pagination count_statuses_map = { (item.name, item.status): item.count for item in FlowItem.count_items_by_name_and_status() } count_names_map = { item.name: item.count for item in FlowItem.count_items_by_name() } notebooks = [] for name in iter_active_notebook_filenames(): data = {"name": name, "is_archive": False} validate, *args = get_notebook(name) data["count"] = count_names_map.get(name, 0) data["count_errors"] = sum( count_statuses_map.get((name, status), 0) for status in Statuses.error_statuses) data["count_fatal_errors"] = count_statuses_map.get( (name, Statuses.fatal_error), 0) data["validate"] = validate notebooks.append(data) for name in iter_archive_notebook_filenames(): data = {"name": name, "is_archive": True, "validate": True} notebooks.append(data) return templates.TemplateResponse("/pages/notebooks.html", context={ "request": request, "notebooks": notebooks })
async def log_view(name: str, worktime_for_url: str, request: Request): import re item: FlowItem = FlowItem.get_or_none( **{ FlowItem.name.name: name, FlowItem.worktime.name: FlowItem.worktime_from_url( worktime_for_url), }) if item.logpath: if pathlib.Path.exists(pathlib.Path(item.logpath)): with open(item.logpath, "r", encoding="UTF8") as f: logtext = f.read() logtext = re.sub(r"\[\d\dm|\[\dm", "", logtext) else: logtext = "Logs not found: 'Logs file missing'" else: logtext = "Logs not found: 'Logs path missing'" return templates.TemplateResponse( "/pages/log.html", context={ "request": request, "content": logtext, "filepath": item.logpath, }, )
def test_allow_execute_flow(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 6, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) worktime_list = iter_range_datetime( start_time=worktime - dt.timedelta(3), end_time=worktime, timedelta=interval_timedelta, ) FlowItem.create_items( flow_name=FLOW_NAME, worktime_list=worktime_list, status=Statuses.fatal_error, notebook_hash="", ) assert (FlowItem.allow_execute_flow( FLOW_NAME, notebook_hash="", max_fatal_errors=3) is False) assert (FlowItem.allow_execute_flow( FLOW_NAME, notebook_hash="new", max_fatal_errors=3) is True) FlowItem.recreate_prev_items( flow_name=FLOW_NAME, worktime=worktime, offset_periods=10, interval_timedelta=interval_timedelta, ) assert (FlowItem.allow_execute_flow( FLOW_NAME, notebook_hash="", max_fatal_errors=3) is True)
def prepare_items(dry_run: bool = False): typer.echo("\n===================" "\nFlowMaster" "\n===================\n") from flowmaster.models import FlowItem # Clearing statuses for unfulfilled flows. FlowItem.clear_statuses_of_lost_items() if dry_run: typer.echo(f"Dry-run mode!") FlowItem.clear("fakedata.etl.flow")
def prepare_for_run(dry_run: bool = False): init() typer.echo("\n===================" "\nFlowMaster" "\n===================\n") from flowmaster.models import FlowItem # Clearing statuses for unfulfilled flows. FlowItem.clear_statuses_of_lost_items() if dry_run: FlowItem.delete().where("fakedata.etl.flow" in FlowItem.name).execute()
def test_create_history_items(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) item = FlowItem.create_missing_items( flow_name=FLOW_NAME, start_time=worktime - dt.timedelta(5), end_time=worktime, interval_timedelta=interval_timedelta, ) assert len(item) == 6
def test_create_missing_items(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 6, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) items = FlowItem.create_missing_items( flow_name=FLOW_NAME, start_time=worktime - dt.timedelta(5), end_time=worktime - dt.timedelta(5), interval_timedelta=interval_timedelta, ) assert len(items) == 1 FlowItem.create(**{FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: worktime}) FlowItem.create_missing_items( flow_name=FLOW_NAME, start_time=worktime - dt.timedelta(5), end_time=worktime, interval_timedelta=interval_timedelta, ) assert ( FlowItem.select() .where(FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add) .count() ) == 6
def list_items(name: str, limit: int = 20): for i in FlowItem.iter_items(name, limit=limit): msg_parts = [ f' {i.worktime.strftime("%Y-%m-%dT%T").replace("T00:00:00", "")} ', f"{i.status} ", f"retries={i.retries} ", f"duration={i.duration} ", typer.style(f"log={i.info}", fg=typer.colors.WHITE) if i.info else "", ] if i.status in Statuses.error_statuses: msg_parts[1] = typer.style(msg_parts[1], fg=typer.colors.RED, bold=True) elif i.status == Statuses.add: msg_parts[1] = typer.style(msg_parts[1], fg=typer.colors.WHITE, bold=True) elif i.status == Statuses.run: msg_parts[1] = typer.style(msg_parts[1], fg=typer.colors.YELLOW, bold=True) elif i.status == Statuses.success: msg_parts[1] = typer.style(msg_parts[1], fg=typer.colors.GREEN, bold=True) typer.echo("".join(msg_parts))
def restart_errors(name: str): for name_ in name.split(","): count = len( FlowItem.recreate_items(name, filter_statuses=Statuses.error_statuses)) typer.secho( f" {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
def restart( name: str, start_time: dt.datetime = typer.Option(..., "--start_time", "-s"), end_time: dt.datetime = typer.Option(..., "--end_time", "-e"), ): for name_ in name.split(","): if start_time or end_time: # Apply timezone. for file_name, config in YamlHelper.iter_parse_file_from_dir( FLOW_CONFIGS_DIR, match=name_): tz = config["work"]["schedule"]["timezone"] if start_time: start_time = start_time.replace( tzinfo=pendulum.timezone(tz)) if end_time: end_time = end_time.replace(tzinfo=pendulum.timezone(tz)) break count = FlowItem.change_status(name_, new_status=FlowStatus.add, from_time=start_time, to_time=end_time) typer.secho( f" {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
def restart_errors(name: str): for name_ in name.split(","): count = FlowItem.change_status( name_, new_status=FlowStatus.add, filter_statuses=FlowStatus.error_statuses) typer.secho( f" {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
def test_create_next_execute_item(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) item = FlowItem.create_next_execute_item( flow_name=FLOW_NAME, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item is None FlowItem.create( **{ FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: worktime - dt.timedelta(1), } ) item = FlowItem.create_next_execute_item( flow_name=FLOW_NAME, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item item = FlowItem.create_next_execute_item( flow_name=FLOW_NAME, worktime=worktime, interval_timedelta=interval_timedelta, ) assert item is None
def test_create_update_error_items(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) items = FlowItem.recreate_prev_items( flow_name=FLOW_NAME, worktime=worktime, offset_periods=[-1, -2], interval_timedelta=interval_timedelta, ) assert items is None for i in range(10): FlowItem.create( **{ FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: worktime - dt.timedelta(i), FlowItem.status.name: FlowStatus.error, } ) items = FlowItem.recreate_prev_items( flow_name=FLOW_NAME, worktime=worktime, offset_periods=[-1, -2], interval_timedelta=interval_timedelta, ) assert len(items) == 2 assert FlowItem.count_items(FLOW_NAME, statuses=[FlowStatus.add]) == 2 for i in items: assert i.retries == 0
def prepare_items_for_order(flow: "BaseOperator", start_period: dt.datetime, end_period: dt.datetime): # The status is changed so that there is no repeated ordering of tasks. FlowItem.change_status( flow.notebook.name, new_status=Statuses.run, from_time=start_period, to_time=end_period, ) if flow.Work.expires is not None: FlowItem.change_expires( flow.notebook.name, expires=flow.Work.expires, from_time=start_period, to_time=end_period, ) yield
async def tasks_view(name: str, request: Request): # TODO: Add pagination return templates.TemplateResponse( "/pages/tasks.html", context={ "request": request, "tasks": FlowItem.iter_items(name, limit=1000, offset=0), }, )
def test_order_flow_with_period_length(): FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() CONFIG.work.schedule = ETLFlowConfig.WorkPolicy.SchedulePolicy( timezone="Europe/Moscow", start_time="00:00:00", from_date=dt.date.today() - dt.timedelta(5), interval="daily", period_length=2, ) config = dict(CONFIG) config.pop("name") rv = [(FLOW_NAME, config)] YamlHelper.iter_parse_file_from_dir = mock.Mock(return_value=rv) flows = list(order_flow(logger=logger)) assert len(flows) == 3
def order_etl_flow( *, logger: Logger, async_mode: bool = False, dry_run: bool = False ) -> Iterator: """Prepare flow function to be sent to the queue and executed""" from flowmaster.operators.etl.service import ETLOperator from flowmaster.operators.etl.policy import ETLFlowConfig for file_name, config in YamlHelper.iter_parse_file_from_dir( FLOW_CONFIGS_DIR, match=".etl.flow" ): if dry_run: if config.get("provider") != "fakedata": continue try: flow_config = ETLFlowConfig(name=file_name, **config) except pydantic.ValidationError as exc: logger.error("ValidationError: '%s': %s", file_name, exc) continue except Exception as exc: logger.error("Error: '%s': %s", file_name, exc) continue work = ETLWork(flow_config) for start_period, end_period in work.iter_period_for_execute(): etl_flow = ETLOperator(flow_config) etl_flow_iterator = etl_flow( start_period, end_period, async_mode=async_mode, dry_run=dry_run ) # The status is changed so that there is no repeated ordering of tasks. FlowItem.change_status( etl_flow.name, new_status=FlowStatus.run, from_time=start_period, to_time=end_period, ) logger.info( "Order ETL flow [%s]: %s %s", etl_flow.name, start_period, end_period ) yield etl_flow_iterator
def errors(): for name in list_notebook(): count = FlowItem.count_items(name, statuses=[Statuses.error_statuses]) if count > 0: count_text = typer.style(count, fg=typer.colors.RED, bold=True) else: count_text = typer.style(count, fg=typer.colors.GREEN, bold=True) name = typer.style(name, fg=typer.colors.WHITE, bold=True) typer.echo(f" {name} {count_text}")
def test_local_executor(): config = fakedata_to_csv_config.dict() config.pop("name") YamlHelper.iter_parse_file_from_dir = mock.Mock( return_value=(("test_local_executor", config), )) start_executor(orders=1, dry_run=True) items = list(FlowItem.iter_items("test_local_executor")) assert len(items) == 5
def restart( name: str, from_time: dt.datetime = typer.Option(..., "--from_time", "-s"), to_time: dt.datetime = typer.Option(..., "--to_time", "-e"), ): for name_ in name.split(","): count = len( FlowItem.recreate_items(name, from_time=from_time, to_time=to_time)) typer.secho( f" {name_} {typer.style(f'{count=}', fg=typer.colors.WHITE)} OK")
def test_items_for_execute_seconds_interval_without_keep_sequence( flowitem_model): worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") FlowItem.create_items(flowitem_model.name_for_test, worktime_list=[worktime - dt.timedelta(minutes=4)], **{flowitem_model.status.name: Statuses.success}) items = FlowItem.get_items_for_execute( flow_name=flowitem_model.name_for_test, worktime=worktime, start_time=worktime - dt.timedelta(minutes=10), interval_timedelta=dt.timedelta(minutes=1), keep_sequence=False, retries=0, retry_delay=0, notebook_hash="", max_fatal_errors=3, ) assert len(items) == 1
def test_create_update_items_start_time_equals_worktime(): """Checking when the update date is equals the first worktime.""" FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") interval_timedelta = dt.timedelta(1) FlowItem.create( **{ FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: worktime, FlowItem.status.name: Statuses.error, }) items = FlowItem.recreate_prev_items( flow_name=FLOW_NAME, worktime=worktime, offset_periods=[-1, -2, -3], interval_timedelta=interval_timedelta, ) assert len(items) == 0
def resource_items(self, start_period, end_period, **kwargs) -> Iterator[ExportContext]: query: peewee.ModelSelect = FlowItem.select() if self.export.export_mode == "by_date": query = query.where(FlowItem.worktime >= start_period, FlowItem.worktime <= end_period) yield ExportContext( columns=self.export.columns, data=list(query.dicts()), data_orient=DataOrient.dict, )
def list_errors(name: str, limit: int = 1000): for i in FlowItem.iter_items(name, limit=limit): if i.status in Statuses.error_statuses: msg_parts = [ f' {i.worktime.strftime("%Y-%m-%dT%T").replace("T00:00:00", "")} ', typer.style(f"{i.status} ", fg=typer.colors.RED, bold=True), f"retries={i.retries} ", f"duration={i.duration} ", typer.style(f"log={i.info}", fg=typer.colors.WHITE) if i.info else "", ] typer.echo("".join(msg_parts))
def test_retries(create_retries, retries, result, pendulum_utctoday, flowitem_model): name = "__test_retries__" flowitem_model.clear(name) flowitem_model.create( **{ FlowItem.name.name: name, FlowItem.worktime.name: pendulum_utctoday, FlowItem.finished_utc.name: pendulum_utctoday, FlowItem.status.name: Statuses.error, FlowItem.retries.name: create_retries, }) items = FlowItem.retry_error_items(name, retries=retries, retry_delay=0) assert len(items) == int(result)
def test_items_for_execute_seconds_interval_with_keep_sequence(flowitem_model): worktime = pendulum.datetime(2020, 1, 1, tz="Europe/Moscow") items = FlowItem.get_items_for_execute( flow_name=flowitem_model.name_for_test, worktime=worktime, start_time=worktime - dt.timedelta(minutes=9), interval_timedelta=dt.timedelta(minutes=1), keep_sequence=True, retries=2, retry_delay=0, notebook_hash="", max_fatal_errors=1, ) assert len(items) == 10
def test_retries(create_retries, retries, result): # TODO: Не работает с started_utc=None FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() FlowItem.create( **{ FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: pendulum.datetime(2020, 1, 1, tz="Europe/Moscow"), FlowItem.started_utc.name: dt.datetime(2020, 1, 1), FlowItem.status.name: FlowStatus.error, FlowItem.retries.name: create_retries, } ) FlowItem.retry_error_items(FLOW_NAME, retries=retries, retry_delay=60) items = FlowItem.select().where( FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add ) assert len(items) == int(result)
def test_retry_delay(retry_delay, passed_sec, is_run): # TODO: Не работает с started_utc=None FlowItem.delete().where(FlowItem.name == FLOW_NAME).execute() FlowItem.create( **{ FlowItem.name.name: FLOW_NAME, FlowItem.worktime.name: pendulum.datetime(2020, 1, 1, tz="Europe/Moscow"), FlowItem.started_utc.name: dt.datetime(2020, 1, 1, 0, 0, 0), FlowItem.status.name: FlowStatus.error, FlowItem.retries.name: 0, } ) FlowItem.get_utcnow = Mock(return_value=dt.datetime(2020, 1, 1, 0, 0, passed_sec)) FlowItem.retry_error_items(FLOW_NAME, retries=1, retry_delay=retry_delay) items = FlowItem.select().where( FlowItem.name == FLOW_NAME, FlowItem.status == FlowStatus.add ) assert len(items) == int(is_run)