def test_parallels2(fakedata_to_csv_notebook): """ Checking that 1 worker executes in 3 seconds, processes with a total duration of 9 seconds. """ flow1 = Flow(fakedata_to_csv_notebook) flow2 = Flow(fakedata_to_csv_notebook) flow1.Load.__enter = True assert flow1.Load.__enter == True assert flow2.Load.__enter == False flow2.Load.__enter = False assert flow1.Load.__enter == True assert flow2.Load.__enter == False flow2.Load.__enter = True assert flow1.Load.__enter == True assert flow2.Load.__enter == True flow2.Load.__enter = False assert flow1.Load.__enter == True assert flow2.Load.__enter == False flow1.Load.__enter = False assert flow1.Load.__enter == False assert flow2.Load.__enter == False
def test_flow(): notebook = get_notebook() flow = Flow(notebook) flow.dry_run(dt.datetime.now(), dt.datetime.now()) with flow.Load.open_file(mode="r") as loadfile: text = loadfile.read() assert text
def test_parallels(fakedata_to_csv_notebook): """ Checking that 1 worker executes in 3 seconds, processes with a total duration of 9 seconds. """ fakedata_to_csv_notebook.export.rows = 100 flow1 = Flow(fakedata_to_csv_notebook) flow2 = Flow(fakedata_to_csv_notebook) task1 = flow1.task(start_period=dt.datetime.now(), end_period=dt.datetime.now()) task2 = flow2.task(start_period=dt.datetime.now(), end_period=dt.datetime.now()) next(task1), next(task1), next(task2), next(task2) while True: next(task1) assert flow2.Load.__enter == True if flow1.Load.__enter == False: break next(task2) list(task1) assert flow2.Load.__enter == True list(task2) assert flow2.Load.__enter == False
def test_real_load_clickhouse(csv_to_clickhouse_notebook): def export_func(start_period, end_period) -> Iterator[tuple[dict, list, list]]: yield ExportContext(columns=["date"], data=[[start_period]], data_orient=DataOrient.values) Flow.ETLOperator.Providers.CSVProvider.export_class.__call__ = Mock( side_effect=export_func) flow = Flow(csv_to_clickhouse_notebook) flow.Load.Table.drop_table() try: list( flow(start_period=dt.datetime(2021, 1, 1), end_period=dt.datetime(2021, 1, 1))) assert flow.Load.Table.select(columns=["Date"]) == [ (dt.date(2021, 1, 1), ) ] # test data_cleaning_mode off csv_to_clickhouse_notebook.load.data_cleaning_mode = ( ClickhouseLoader.DataCleaningMode.off) flow = Flow(csv_to_clickhouse_notebook) list( flow(start_period=dt.datetime(2021, 1, 2), end_period=dt.datetime(2021, 1, 2))) assert flow.Load.Table.select(columns=["Date"]) == [ (dt.date(2021, 1, 1), ), (dt.date(2021, 1, 2), ), ] # test data_cleaning_mode partition csv_to_clickhouse_notebook.load.data_cleaning_mode = ( ClickhouseLoader.DataCleaningMode.partition) flow = Flow(csv_to_clickhouse_notebook) list( flow(start_period=dt.datetime(2021, 1, 2), end_period=dt.datetime(2021, 1, 2))) assert flow.Load.Table.select(columns=["Date"]) == [ (dt.date(2021, 1, 1), ), (dt.date(2021, 1, 2), ), ] # test data_cleaning_mode truncate csv_to_clickhouse_notebook.load.data_cleaning_mode = ( ClickhouseLoader.DataCleaningMode.truncate) flow = Flow(csv_to_clickhouse_notebook) list( flow(start_period=dt.datetime(2021, 1, 2), end_period=dt.datetime(2021, 1, 2))) assert flow.Load.Table.select(columns=["Date"]) == [ (dt.date(2021, 1, 2), ) ] finally: flow.Load.Table.drop_table()
def test_flow_collection(fakedata_to_csv_notebook): flow = Flow(fakedata_to_csv_notebook) assert flow.name == Flow.ETLOperator.name flow = Flow(fakedata_to_csv_notebook.dict()) assert flow.name == Flow.ETLOperator.name
def test_logs(ya_metrika_logs_to_csv_notebook2): from flowmaster.flow import Flow flow = Flow(ya_metrika_logs_to_csv_notebook2) flow.dry_run(dt.datetime(2021, 1, 1), dt.datetime(2021, 1, 1))
def test_management_clients(ymm_clients_to_csv_notebook): from flowmaster.flow import Flow flow = Flow(ymm_clients_to_csv_notebook) flow.dry_run(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1))
def test_stats(ymstats_to_csv_notebook): from flowmaster.flow import Flow flow = Flow(ymstats_to_csv_notebook) flow.dry_run(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1))