Ejemplo n.º 1
0
def test_consolidate(ingest, clean_etl):

    date = pendulum.from_format("2020_03_27", "YYYY_MM_DD").naive()

    for file_stem in ["2020_04_01_00_00_00-v2", "2020_03_27_00_00_00-v2"]:
        file_path = Path(f"tmp/raw/{file_stem}_2020_03_27.csv")
        ingest(file_path)

    table = Fact.child_or_load_table(date)
    task_instance = TaskInstanceMock("init")
    task_instance.xcom_push("config", {
        "date": str(date),
        "table_name": table.fullname
    })

    clean_etl("consolidation", table.fullname, date)
    consolidate_callable(ti=task_instance)

    with engine.begin() as conn:
        count = conn.execute(table.select()).rowcount
        assert count == 198

        count = conn.execute(
            table.select(table.c.session_end == None)).rowcount
        assert count == 0

        count = conn.execute(table.select(table.c.pulltime_last)).rowcount
        assert count == 101

        count = conn.execute(
            table.select(
                and_(table.c.pulltime_last,
                     table.c.session_end != table.c.pulltime))).rowcount
        assert count == 1

        count1 = conn.execute(table.select(table.c.pulltime_last)).rowcount
        count2 = conn.execute(
            select(
                [
                    table.c.userid_key,
                    table.c.mac_key,
                    table.c.ap_key,
                    table.c.ssid_key,
                    table.c.protocol_key,
                    table.c.session_start,
                ],
                distinct=True,
            )).rowcount
        assert count1 == count2
Ejemplo n.º 2
0
def test_ingest_preprocessed(ingest):

    file_stem = "2020_04_01_00_00_00-v2"
    file_path = Path(f"tmp/raw/{file_stem}_2020_03_27.csv")
    ingest(file_path)

    date = pendulum.from_format(file_path.stem[23:], "YYYY_MM_DD").naive()
    child_fact = Fact.child_or_load_table(date)

    with engine.begin() as conn:
        count = conn.execute(child_fact.select()).rowcount
        assert count == 104

        count = conn.execute(
            child_fact.select(child_fact.c.session_end == None)).rowcount
        assert count == 104