예제 #1
0
def test_actuals_are_loaded(api_w_actuals):
    api = api_w_actuals

    expected = markdown_to_df(
        """
        | card_id | name   | school_name | class_name        | season       |
        | -       | -      | -           | -                 | -            |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Fall 2001    | # BasicDenormalization
        | stu2    | Willow | Sunnydale   |     Good Spells   | Spring 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Fall 2002    |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Fall 2002    |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Summer 2002  | # MissingClasses
        | stu2    | Willow | Sunnydale   |     Good Spells   | Summer 2002  |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Summer 2002  |
        | stu2    | Willow | Sunnydale   |     Good Spells   | Summer 2002  | # MultipleClasses
        | stu2    | Willow | Sunnydale   | Season 6 Spells   | Summer 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Summer 2002  |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Summer 2002  |
        | stu4    | Ted    | San Dimas   |         Station   | Summer 2002  |
        | stu1    | Buffy  | Sunnydale   |  Applied Stabby   | Fall 2001    | # IdConcatenation
        | stu2    | Willow | Sunnydale   |     Good Spells   | Spring 2002  |
        | stu3    | Bill   | San Dimas   |         Station   | Fall 2002    |
        | stu4    | Ted    | San Dimas   | Being Excellent   | Fall 2002    |
        """
    )

    actual = api.spec["targets"]["student_classes"].data[expected.columns]
    assert_frame_equal(actual, expected)
예제 #2
0
def test_overriding_defaults(identifiers, cases):
    source = Source(
        defaults={"last_name": "Jones"},
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
    )

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Bob        | Not Jones |
        | s2 | Nancy      | Not Jones |
        """),
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | first_name | last_name |
        | -    | -          | -         |
        | {s1} | Bob        | Not Jones |
        | {s2} | Nancy      | Not Jones |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
예제 #3
0
def test_setting_values(identifiers, cases):
    source = Source(id_mapping={
        "id": {
            "identifier": identifiers["student"],
            "attribute": "id"
        }
    })

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bob        |
        | s2 | Nancy      |
        """),
        values={"last_name": "Summers"},
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | first_name | last_name |
        | -    | -          | -         |
        | {s1} | Bob        | Summers   |
        | {s2} | Nancy      | Summers   |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
예제 #4
0
def test_factories_stack_a_source(identifiers, sources):
    factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    factory.generate("TestCase")

    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | {s1} | Buffy      |
        | {s2} | Willow     |
        """.format(
        s1=identifiers["student"].generate(case="TestCase",
                                           named_id="s1")["id"],
        s2=identifiers["student"].generate(case="TestCase",
                                           named_id="s2")["id"],
    ))
    actual = sources["students"].data[expected.columns]
    assert_frame_equal(actual, expected)
예제 #5
0
def test_multiple_identifers_are_translated(source_w_multiple_ids, identifiers,
                                            cases):
    source_w_multiple_ids.stack(
        cases[0],
        markdown_to_df("""
        | id | uuid | organization_id |first_name  |
        | -  | -    | -               | -          |
        | s1 | s1   | o1              | Bob        |
        | s2 | s2   | o1              | Nancy      |
        """),
    )

    actual = source_w_multiple_ids.data
    expected = markdown_to_df("""
        | id   | uuid  | organization_id | first_name |
        | -    | -     | -               | -          |
        | {s1} | {su1} | {o1}            | Bob        |
        | {s2} | {su2} | {o1}            | Nancy      |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
        su1=identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["uuid"],
        su2=identifiers["student"].generate(case=cases[0],
                                            named_id="s2")["uuid"],
        o1=identifiers["organization"].generate(case=cases[0],
                                                named_id="o1")["id"],
    ))
    assert_frame_equal(actual, expected)
예제 #6
0
def test_multiple_embedded_identifiers_are_translated(identifiers, cases):
    source = Source(
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
        identifiers=identifiers,
    )

    source.stack(
        cases[0],
        markdown_to_df("""
        | id | prefixed_id                            | first_name |
        | -  | -                                      | -          |
        | s1 | {organization.id[o1]}-{student.id[s1]} | Bob        |
        | s2 | {organization.id[o1]}-{student.id[s2]} | Nancy      |
        """),
    )

    actual = source.data
    expected = markdown_to_df("""
        | id   | prefixed_id | first_name |
        | -    | -           | -          |
        | {s1} | {o1}-{s1}   | Bob        |
        | {s2} | {o1}-{s2}   | Nancy      |
        """.format(
        s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"],
        s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"],
        o1=identifiers["organization"].generate(case=cases[0],
                                                named_id="o1")["id"],
    ))
    assert_frame_equal(actual, expected)
예제 #7
0
def test_inheritance_w_new_data(sources):
    base_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    modified_table = """
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Buffy      | Summers   |
        | s2 | Xander     | Harris    |
    """

    composite_factory = Factory(
        data={"students": {
            "table": deepcopy(modified_table)
        }},
        inherit_from=[base_factory],
        sources=sources,
    )

    expected = markdown_to_df(modified_table)
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)
예제 #8
0
def test_null_identifiers_go_to_the_right_case(multiple_identifier_target, stu,
                                               cases):
    """
    If an identifying column can be null, then there is no way to
    associate it with a case unless there is another non-null identifying column.
    """

    multiple_identifier_target.load_actual([
        {
            "id": stu["c1stu1"]["id"],
            "uuid": stu["c1stu1"]["uuid"],
            "first_name": "Buffy",
        },
        {
            "id": stu["c2stu2"]["id"],
            "uuid": None,
            "first_name": "Willow"
        },
    ])

    actual = multiple_identifier_target.case_data(cases[1])
    expected = markdown_to_df("""
        | id   | uuid   | first_name |
        | -    | -      | -          |
        | stu2 | {NULL} | Willow     |
        """)

    assert_frame_equal(actual, expected)
예제 #9
0
def test_empty_data_can_be_loaded_with_columns_specified(simple_target):
    simple_target.load_actual([], columns=["id", "first_name"])

    actual = simple_target.data.drop(columns="__dtspec_case__")
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        """)

    assert_frame_equal(actual, expected)
예제 #10
0
def test_scenario_case_factories_can_override(identifiers, sources,
                                              student_factory,
                                              organization_factory):
    scenario = Scenario(
        cases={
            "StudentOrg":
            Case(factory=Factory(
                sources=sources,
                inherit_from=[student_factory, organization_factory],
                data={
                    "students": {
                        "table":
                        """
                            | id | organization_id | first_name |
                            | -  | -               | -          |
                            | s1 | o1              | Bill       |
                            | s2 | o1              | Ted        |
                            """
                    }
                },
            ))
        })

    scenario.generate()

    expected = markdown_to_df("""
        | id   | organization_id | first_name |
        | -    | -               | -          |
        | {s1} | {o1}            | Bill       |
        | {s2} | {o1}            | Ted        |
        """.format(
        s1=identifiers["student"].generate(case=scenario.cases["StudentOrg"],
                                           named_id="s1")["id"],
        s2=identifiers["student"].generate(case=scenario.cases["StudentOrg"],
                                           named_id="s2")["id"],
        o1=identifiers["organization"].generate(
            case=scenario.cases["StudentOrg"], named_id="o1")["id"],
    ))
    actual = sources["students"].data
    assert_frame_equal(actual, expected)

    expected = markdown_to_df("""
        | id   | name                    |
        | -    | -                       |
        | {o1} | San Dimas High          |
        | {o2} | Alaska Military Academy |
        """.format(
        o1=identifiers["organization"].generate(
            case=scenario.cases["StudentOrg"], named_id="o1")["id"],
        o2=identifiers["organization"].generate(
            case=scenario.cases["StudentOrg"], named_id="o2")["id"],
    ))
    actual = sources["organizations"].data
    assert_frame_equal(actual, expected)
예제 #11
0
def test_target_can_be_split_into_case(simple_target, simple_data, cases):
    simple_target.load_actual(simple_data)

    actual = simple_target.case_data(cases[1])
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | stu1 | Faith      |
        | stu2 | Willow     |
        """)

    assert_frame_equal(actual, expected)
예제 #12
0
def test_factories_stack_sources(identifiers, sources):
    factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | organization_id | first_name |
                | -  | -               | -          |
                | s1 | o1              | Buffy      |
                | s2 | o1              | Willow     |
                """
            },
            "organizations": {
                "table":
                """
                | id | name           |
                | -  | -              |
                | o1 | Sunnydale High |
                """
            },
        },
        sources=sources,
    )

    factory.generate("TestCase")

    expected_students = markdown_to_df("""
        | id   | organization_id | first_name |
        | -    | -               | -          |
        | {s1} | {o1}            | Buffy      |
        | {s2} | {o1}            | Willow     |
        """.format(
        s1=identifiers["student"].generate(case="TestCase",
                                           named_id="s1")["id"],
        s2=identifiers["student"].generate(case="TestCase",
                                           named_id="s2")["id"],
        o1=identifiers["organization"].generate(case="TestCase",
                                                named_id="o1")["id"],
    ))
    actual_students = sources["students"].data.drop(columns=["external_id"])

    expected_organizations = markdown_to_df("""
        | id   | name           |
        | -    | -              |
        | {o1} | Sunnydale High |
        """.format(o1=identifiers["organization"].generate(
        case="TestCase", named_id="o1")["id"]))
    actual_organizations = sources["organizations"].data.drop(columns=["uuid"])

    assert_frame_equal(actual_students, expected_students)
    assert_frame_equal(actual_organizations, expected_organizations)
예제 #13
0
def test_actual_data_is_loaded_ids_translated(simple_target, simple_data):
    simple_target.load_actual(simple_data)

    actual = simple_target.data.drop(columns=["__dtspec_case__"])
    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | stu1 | Buffy      |
        | stu2 | Willow     |
        | stu1 | Faith      |
        | stu2 | Willow     |
        """)

    assert_frame_equal(actual, expected)
예제 #14
0
def test_source_without_identifier_generates_data(cases):
    table = """
        | date       | season      |
        | -          | -           |
        | 2001-09-08 | Fall 2001   |
        | 2002-01-09 | Spring 2002 |
    """

    source = Source()
    source.stack(cases[0], markdown_to_df(table))

    actual = source.data
    expected = markdown_to_df(table)
    assert_frame_equal(actual, expected)
예제 #15
0
def test_inheritance_w_multiple_composite_sources(sources):
    base_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    modified_students_table = """
        | id | first_name | last_name |
        | -  | -          | -         |
        | s1 | Buffy      | Summers   |
        | s2 | Xander     | Harris    |
    """

    new_organizations_table = """
        | id | name           |
        | -  | -              |
        | o1 | Sunnydale High |
    """

    composite_factory = Factory(
        data={
            "students": {
                "table": deepcopy(modified_students_table)
            },
            "organizations": {
                "table": deepcopy(new_organizations_table)
            },
        },
        inherit_from=[base_factory],
        sources=sources,
    )

    expected = markdown_to_df(modified_students_table)
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)

    expected = markdown_to_df(new_organizations_table)
    actual = composite_factory.data["organizations"]["dataframe"]
    assert_frame_equal(actual, expected)
예제 #16
0
def test_inheritance_defaults_are_overridden(identifiers, sources):
    base_factory = Factory(
        data={
            "students": {
                "table": """
                | id |
                | -  |
                | s1 |
                """,
                "values": {
                    "first_name": "Bob"
                },
            }
        },
        sources=sources,
    )

    composite_factory = Factory(
        data={
            "students": {
                "table": """
                | id |
                | -  |
                | s1 |
                """,
                "values": {
                    "last_name": "Loblaw"
                },
            }
        },
        inherit_from=[base_factory],
        sources=sources,
    )

    composite_factory.generate("TestCase")

    expected = markdown_to_df("""
        | id   | first_name | last_name |
        | -    | -          | -         |
        | {s1} | Bob        | Loblaw    |
        """.format(s1=identifiers["student"].generate(case="TestCase",
                                                      named_id="s1")["id"]))
    actual = sources["students"].data.drop(
        columns=["external_id", "organization_id"])
    assert_frame_equal(actual, expected)
예제 #17
0
def test_defaults_override_identifiers(identifiers, cases):
    """
    If a column is marked as an identifier, but is given a default, then
    the default will be used (e.g., it will not revert to anonymous id generation).
    """

    source = Source(
        id_mapping={
            "id": {
                "identifier": identifiers["student"],
                "attribute": "id"
            }
        },
        defaults={"id": "stu1"},
    )

    source.stack(
        cases[0],
        markdown_to_df("""
            | first_name |
            | -          |
            | Bob        |
            | Still Bob  |
            """),
    )

    generated_id = list(identifiers["student"].cached_ids[id(
        cases[0])].named_ids.values())[0]["id"]

    actual = source.data
    expected = markdown_to_df("""
        | first_name | id   |
        | -          | -    |
        | Bob        | {s1} |
        | Still Bob  | {s1} |
        """.format(s1=generated_id))
    assert_frame_equal(actual, expected)

    generated_name_id = list(identifiers["student"].cached_ids[id(
        cases[0])].named_ids.keys())[0]
    assert generated_name_id == source.defaults["id"]
예제 #18
0
def test_scenarios_generate_case_data_over_multiple_cases(
        identifiers, sources, student_factory, organization_factory):
    scenario = Scenario(
        cases={
            "SimpleStudent":
            Case(factory=Factory(sources=sources,
                                 inherit_from=[student_factory])),
            "SimpleOrganization":
            Case(factory=Factory(sources=sources,
                                 inherit_from=[organization_factory])),
        })

    scenario.generate()

    expected = markdown_to_df("""
        | id   | first_name |
        | -    | -          |
        | {s1} | Bill       |
        | {s2} | Ted        |
        """.format(
        s1=identifiers["student"].generate(
            case=scenario.cases["SimpleStudent"], named_id="s1")["id"],
        s2=identifiers["student"].generate(
            case=scenario.cases["SimpleStudent"], named_id="s2")["id"],
    ))
    actual = sources["students"].data.drop(columns="organization_id")
    assert_frame_equal(actual, expected)

    expected = markdown_to_df("""
        | id   | name                    |
        | -    | -                       |
        | {o1} | San Dimas High          |
        | {o2} | Alaska Military Academy |
        """.format(
        o1=identifiers["organization"].generate(
            case=scenario.cases["SimpleOrganization"], named_id="o1")["id"],
        o2=identifiers["organization"].generate(
            case=scenario.cases["SimpleOrganization"], named_id="o2")["id"],
    ))
    actual = sources["organizations"].data
    assert_frame_equal(actual, expected)
예제 #19
0
def test_scenarios_stack_case_data(identifiers, sources, student_factory):
    scenario = Scenario(
        cases={
            "SimpleStudent":
            Case(factory=Factory(sources=sources,
                                 inherit_from=[student_factory])),
            "AltStudent":
            Case(factory=Factory(
                sources=sources,
                inherit_from=[student_factory],
                data={
                    "students": {
                        "table":
                        """
                            | id | first_name |
                            | -  | -          |
                            | s1 | Napoleon   |
                            """
                    }
                },
            )),
        })

    scenario.generate()

    expected = markdown_to_df("""
        | id    | first_name |
        | -     | -          |
        | {s1}  | Bill       |
        | {s2}  | Ted        |
        | {as1} | Napoleon   |
        """.format(
        s1=identifiers["student"].generate(
            case=scenario.cases["SimpleStudent"], named_id="s1")["id"],
        s2=identifiers["student"].generate(
            case=scenario.cases["SimpleStudent"], named_id="s2")["id"],
        as1=identifiers["student"].generate(case=scenario.cases["AltStudent"],
                                            named_id="s1")["id"],
    ))
    actual = sources["students"].data.drop(columns="organization_id")
    assert_frame_equal(actual, expected)
예제 #20
0
def test_inheritance_wo_new_data(sources):
    base_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    composite_factory = Factory(inherit_from=[base_factory], sources=sources)

    expected = base_factory.data["students"]["dataframe"]
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)
예제 #21
0
def test_sources_stack(simple_source, identifiers, cases):
    simple_source.stack(
        cases[0],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bob        |
        | s2 | Nancy      |
        """),
    )

    simple_source.stack(
        cases[1],
        markdown_to_df("""
        | id | first_name |
        | -  | -          |
        | s1 | Bobob      |
        | s2 | Nanci      |
        """),
    )

    actual = simple_source.data
    expected = markdown_to_df("""
        | id    | first_name |
        | -     | -          |
        | {s11} | Bob        |
        | {s12} | Nancy      |
        | {s21} | Bobob      |
        | {s22} | Nanci      |
        """.format(
        s11=identifiers["student"].generate(case=cases[0],
                                            named_id="s1")["id"],
        s12=identifiers["student"].generate(case=cases[0],
                                            named_id="s2")["id"],
        s21=identifiers["student"].generate(case=cases[1],
                                            named_id="s1")["id"],
        s22=identifiers["student"].generate(case=cases[1],
                                            named_id="s2")["id"],
    ))
    assert_frame_equal(actual, expected)
예제 #22
0
def test_multiple_inheritance(sources):
    base1_factory = Factory(
        data={
            "students": {
                "table":
                """
                | id | first_name |
                | -  | -          |
                | s1 | Buffy      |
                | s2 | Willow     |
                """
            }
        },
        sources=sources,
    )

    base2_factory = Factory(
        data={
            "organizations": {
                "table":
                """
                | id | name           |
                | -  | -              |
                | o1 | Sunnydale High |
                """
            }
        },
        sources=sources,
    )

    composite_factory = Factory(inherit_from=[base1_factory, base2_factory],
                                sources=sources)

    expected = base1_factory.data["students"]["dataframe"]
    actual = composite_factory.data["students"]["dataframe"]
    assert_frame_equal(actual, expected)

    expected = base2_factory.data["organizations"]["dataframe"]
    actual = composite_factory.data["organizations"]["dataframe"]
    assert_frame_equal(actual, expected)
예제 #23
0
def test_identifiers_generate_defaults(identifiers, cases):
    """
    If a column is marked as an identifier column, but is not given
    a specific named id, then "anonymous" named ids will be generated
    when the data is stacked.
    """

    source = Source(id_mapping={
        "id": {
            "identifier": identifiers["student"],
            "attribute": "id"
        }
    })

    source.stack(
        cases[0],
        markdown_to_df("""
            | first_name |
            | -          |
            | Bob        |
            | Nancy      |
            """),
    )

    anonymous_ids = [
        v["id"] for v in identifiers["student"].cached_ids[id(
            cases[0])].named_ids.values()
    ]

    actual = source.data
    expected = markdown_to_df("""
        | first_name | id   |
        | -          | -    |
        | Bob        | {s1} |
        | Nancy      | {s2} |
        """.format(s1=anonymous_ids[0], s2=anonymous_ids[1]))
    assert_frame_equal(actual, expected)