Example #1
0
def test_parallels2(fakedata_to_csv_notebook):
    """
    Checking that 1 worker executes in 3 seconds,
    processes with a total duration of 9 seconds.
    """
    flow1 = Flow(fakedata_to_csv_notebook)
    flow2 = Flow(fakedata_to_csv_notebook)
    flow1.Load.__enter = True
    assert flow1.Load.__enter == True
    assert flow2.Load.__enter == False

    flow2.Load.__enter = False
    assert flow1.Load.__enter == True
    assert flow2.Load.__enter == False

    flow2.Load.__enter = True
    assert flow1.Load.__enter == True
    assert flow2.Load.__enter == True

    flow2.Load.__enter = False
    assert flow1.Load.__enter == True
    assert flow2.Load.__enter == False

    flow1.Load.__enter = False
    assert flow1.Load.__enter == False
    assert flow2.Load.__enter == False
Example #2
0
    def test_flow():
        notebook = get_notebook()
        flow = Flow(notebook)
        flow.dry_run(dt.datetime.now(), dt.datetime.now())

        with flow.Load.open_file(mode="r") as loadfile:
            text = loadfile.read()

        assert text
Example #3
0
def test_parallels(fakedata_to_csv_notebook):
    """
    Checking that 1 worker executes in 3 seconds,
    processes with a total duration of 9 seconds.
    """
    fakedata_to_csv_notebook.export.rows = 100
    flow1 = Flow(fakedata_to_csv_notebook)
    flow2 = Flow(fakedata_to_csv_notebook)
    task1 = flow1.task(start_period=dt.datetime.now(),
                       end_period=dt.datetime.now())
    task2 = flow2.task(start_period=dt.datetime.now(),
                       end_period=dt.datetime.now())

    next(task1), next(task1), next(task2), next(task2)
    while True:
        next(task1)
        assert flow2.Load.__enter == True
        if flow1.Load.__enter == False:
            break
        next(task2)

    list(task1)
    assert flow2.Load.__enter == True
    list(task2)
    assert flow2.Load.__enter == False
Example #4
0
def test_real_load_clickhouse(csv_to_clickhouse_notebook):
    def export_func(start_period,
                    end_period) -> Iterator[tuple[dict, list, list]]:
        yield ExportContext(columns=["date"],
                            data=[[start_period]],
                            data_orient=DataOrient.values)

    Flow.ETLOperator.Providers.CSVProvider.export_class.__call__ = Mock(
        side_effect=export_func)

    flow = Flow(csv_to_clickhouse_notebook)
    flow.Load.Table.drop_table()
    try:
        list(
            flow(start_period=dt.datetime(2021, 1, 1),
                 end_period=dt.datetime(2021, 1, 1)))

        assert flow.Load.Table.select(columns=["Date"]) == [
            (dt.date(2021, 1, 1), )
        ]

        # test data_cleaning_mode off

        csv_to_clickhouse_notebook.load.data_cleaning_mode = (
            ClickhouseLoader.DataCleaningMode.off)
        flow = Flow(csv_to_clickhouse_notebook)

        list(
            flow(start_period=dt.datetime(2021, 1, 2),
                 end_period=dt.datetime(2021, 1, 2)))

        assert flow.Load.Table.select(columns=["Date"]) == [
            (dt.date(2021, 1, 1), ),
            (dt.date(2021, 1, 2), ),
        ]

        # test data_cleaning_mode partition

        csv_to_clickhouse_notebook.load.data_cleaning_mode = (
            ClickhouseLoader.DataCleaningMode.partition)
        flow = Flow(csv_to_clickhouse_notebook)

        list(
            flow(start_period=dt.datetime(2021, 1, 2),
                 end_period=dt.datetime(2021, 1, 2)))

        assert flow.Load.Table.select(columns=["Date"]) == [
            (dt.date(2021, 1, 1), ),
            (dt.date(2021, 1, 2), ),
        ]

        # test data_cleaning_mode truncate

        csv_to_clickhouse_notebook.load.data_cleaning_mode = (
            ClickhouseLoader.DataCleaningMode.truncate)
        flow = Flow(csv_to_clickhouse_notebook)

        list(
            flow(start_period=dt.datetime(2021, 1, 2),
                 end_period=dt.datetime(2021, 1, 2)))

        assert flow.Load.Table.select(columns=["Date"]) == [
            (dt.date(2021, 1, 2), )
        ]

    finally:
        flow.Load.Table.drop_table()
Example #5
0
def test_flow_collection(fakedata_to_csv_notebook):
    flow = Flow(fakedata_to_csv_notebook)
    assert flow.name == Flow.ETLOperator.name

    flow = Flow(fakedata_to_csv_notebook.dict())
    assert flow.name == Flow.ETLOperator.name
def test_logs(ya_metrika_logs_to_csv_notebook2):
    from flowmaster.flow import Flow

    flow = Flow(ya_metrika_logs_to_csv_notebook2)
    flow.dry_run(dt.datetime(2021, 1, 1), dt.datetime(2021, 1, 1))
def test_management_clients(ymm_clients_to_csv_notebook):
    from flowmaster.flow import Flow

    flow = Flow(ymm_clients_to_csv_notebook)
    flow.dry_run(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1))
def test_stats(ymstats_to_csv_notebook):
    from flowmaster.flow import Flow

    flow = Flow(ymstats_to_csv_notebook)
    flow.dry_run(dt.datetime(2021, 2, 1), dt.datetime(2021, 2, 1))