def test_source_without_identifer_raises_if_data_changes(cases): source = Source() source.stack( cases[0], markdown_to_df(""" | date | season | | - | - | | 2001-09-08 | Fall 2001 | | 2002-01-09 | Spring 2002 | """), ) with pytest.raises(dtspec.core.CannotStackStaticSourceError) as excinfo: source.stack( cases[0], markdown_to_df(""" | date | season | | - | - | | 2002-06-01 | Summer 2002 | | 2002-09-07 | Fall 2002 | """), ) # Error message contains a readable case name assert "TestCase1" in str(excinfo.value).split("\n")[0]
def test_source_without_identifier_generates_data(cases): table = """ | date | season | | - | - | | 2001-09-08 | Fall 2001 | | 2002-01-09 | Spring 2002 | """ source = Source() source.stack(cases[0], markdown_to_df(table)) actual = source.data expected = markdown_to_df(table) assert_frame_equal(actual, expected)
def test_inheritance_w_multiple_composite_sources(sources): base_factory = Factory( data={ "students": { "table": """ | id | first_name | | - | - | | s1 | Buffy | | s2 | Willow | """ } }, sources=sources, ) modified_students_table = """ | id | first_name | last_name | | - | - | - | | s1 | Buffy | Summers | | s2 | Xander | Harris | """ new_organizations_table = """ | id | name | | - | - | | o1 | Sunnydale High | """ composite_factory = Factory( data={ "students": { "table": deepcopy(modified_students_table) }, "organizations": { "table": deepcopy(new_organizations_table) }, }, inherit_from=[base_factory], sources=sources, ) expected = markdown_to_df(modified_students_table) actual = composite_factory.data["students"]["dataframe"] assert_frame_equal(actual, expected) expected = markdown_to_df(new_organizations_table) actual = composite_factory.data["organizations"]["dataframe"] assert_frame_equal(actual, expected)
def test_incompatible_keys_raise_specific_exception(target, case): expected_table = """ | id | name | | - | - | | 1 | Buffy | | 2 | Willow | | 3 | Xander | """ actual_data = markdown_to_df(""" | id | name | | - | - | | 0.0 | The First | | 1.0 | Buffy | | 2.0 | Willow | | 3.0 | Xander | """) expectation = DataExpectation(target, expected_table, by=["id"], compare_via="keys") expectation.load_actual(actual_data) with pytest.raises(MissingExpectedKeysAssertionError): expectation.assert_expected(case)
def test_actuals_are_loaded(api_w_actuals): api = api_w_actuals expected = markdown_to_df( """ | card_id | name | school_name | class_name | season | | - | - | - | - | - | | stu1 | Buffy | Sunnydale | Applied Stabby | Fall 2001 | # BasicDenormalization | stu2 | Willow | Sunnydale | Good Spells | Spring 2002 | | stu3 | Bill | San Dimas | Station | Fall 2002 | | stu4 | Ted | San Dimas | Being Excellent | Fall 2002 | | stu1 | Buffy | Sunnydale | Applied Stabby | Summer 2002 | # MissingClasses | stu2 | Willow | Sunnydale | Good Spells | Summer 2002 | | stu1 | Buffy | Sunnydale | Applied Stabby | Summer 2002 | | stu2 | Willow | Sunnydale | Good Spells | Summer 2002 | # MultipleClasses | stu2 | Willow | Sunnydale | Season 6 Spells | Summer 2002 | | stu3 | Bill | San Dimas | Station | Summer 2002 | | stu4 | Ted | San Dimas | Being Excellent | Summer 2002 | | stu4 | Ted | San Dimas | Station | Summer 2002 | | stu1 | Buffy | Sunnydale | Applied Stabby | Fall 2001 | # IdConcatenation | stu2 | Willow | Sunnydale | Good Spells | Spring 2002 | | stu3 | Bill | San Dimas | Station | Fall 2002 | | stu4 | Ted | San Dimas | Being Excellent | Fall 2002 | """ ) actual = api.spec["targets"]["student_classes"].data[expected.columns] assert_frame_equal(actual, expected)
def test_inheritance_w_new_data(sources): base_factory = Factory( data={ "students": { "table": """ | id | first_name | | - | - | | s1 | Buffy | | s2 | Willow | """ } }, sources=sources, ) modified_table = """ | id | first_name | last_name | | - | - | - | | s1 | Buffy | Summers | | s2 | Xander | Harris | """ composite_factory = Factory( data={"students": { "table": deepcopy(modified_table) }}, inherit_from=[base_factory], sources=sources, ) expected = markdown_to_df(modified_table) actual = composite_factory.data["students"]["dataframe"] assert_frame_equal(actual, expected)
def test_null_identifiers_go_to_the_right_case(multiple_identifier_target, stu, cases): """ If an identifying column can be null, then there is no way to associate it with a case unless there is another non-null identifying column. """ multiple_identifier_target.load_actual([ { "id": stu["c1stu1"]["id"], "uuid": stu["c1stu1"]["uuid"], "first_name": "Buffy", }, { "id": stu["c2stu2"]["id"], "uuid": None, "first_name": "Willow" }, ]) actual = multiple_identifier_target.case_data(cases[1]) expected = markdown_to_df(""" | id | uuid | first_name | | - | - | - | | stu2 | {NULL} | Willow | """) assert_frame_equal(actual, expected)
def test_setting_values(identifiers, cases): source = Source(id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }) source.stack( cases[0], markdown_to_df(""" | id | first_name | | - | - | | s1 | Bob | | s2 | Nancy | """), values={"last_name": "Summers"}, ) actual = source.data expected = markdown_to_df(""" | id | first_name | last_name | | - | - | - | | {s1} | Bob | Summers | | {s2} | Nancy | Summers | """.format( s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], )) assert_frame_equal(actual, expected)
def test_null_in_source_translated_correctly(simple_source, identifiers, cases): simple_source.stack( cases[0], markdown_to_df(""" | id | first_name | | - | - | | s1 | {NULL} | | {NULL} | Nancy | """), ) actual = simple_source.serialize() expected = [ { "id": identifiers["student"].generate(case=cases[0], named_id="s1")["id"], "first_name": None, }, { "id": None, "first_name": "Nancy" }, ] assert actual == expected
def test_multiple_identifers_are_translated(source_w_multiple_ids, identifiers, cases): source_w_multiple_ids.stack( cases[0], markdown_to_df(""" | id | uuid | organization_id |first_name | | - | - | - | - | | s1 | s1 | o1 | Bob | | s2 | s2 | o1 | Nancy | """), ) actual = source_w_multiple_ids.data expected = markdown_to_df(""" | id | uuid | organization_id | first_name | | - | - | - | - | | {s1} | {su1} | {o1} | Bob | | {s2} | {su2} | {o1} | Nancy | """.format( s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], su1=identifiers["student"].generate(case=cases[0], named_id="s1")["uuid"], su2=identifiers["student"].generate(case=cases[0], named_id="s2")["uuid"], o1=identifiers["organization"].generate(case=cases[0], named_id="o1")["id"], )) assert_frame_equal(actual, expected)
def test_multiple_embedded_identifiers_are_translated(identifiers, cases): source = Source( id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }, identifiers=identifiers, ) source.stack( cases[0], markdown_to_df(""" | id | prefixed_id | first_name | | - | - | - | | s1 | {organization.id[o1]}-{student.id[s1]} | Bob | | s2 | {organization.id[o1]}-{student.id[s2]} | Nancy | """), ) actual = source.data expected = markdown_to_df(""" | id | prefixed_id | first_name | | - | - | - | | {s1} | {o1}-{s1} | Bob | | {s2} | {o1}-{s2} | Nancy | """.format( s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], o1=identifiers["organization"].generate(case=cases[0], named_id="o1")["id"], )) assert_frame_equal(actual, expected)
def test_data_converts_to_json(simple_source, identifiers, cases): simple_source.stack( cases[0], markdown_to_df(""" | id | first_name | | - | - | | s1 | Bob | | s2 | Nancy | """), ) actual = simple_source.serialize() expected = [ { "id": identifiers["student"].generate(case=cases[0], named_id="s1")["id"], "first_name": "Bob", }, { "id": identifiers["student"].generate(case=cases[0], named_id="s2")["id"], "first_name": "Nancy", }, ]
def test_overriding_defaults(identifiers, cases): source = Source( defaults={"last_name": "Jones"}, id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }, ) source.stack( cases[0], markdown_to_df(""" | id | first_name | last_name | | - | - | - | | s1 | Bob | Not Jones | | s2 | Nancy | Not Jones | """), ) actual = source.data expected = markdown_to_df(""" | id | first_name | last_name | | - | - | - | | {s1} | Bob | Not Jones | | {s2} | Nancy | Not Jones | """.format( s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], )) assert_frame_equal(actual, expected)
def test_setting_constant_values(expected_table, target, case): expectation = DataExpectation(target, expected_table, values={"school_name": "Sunnydale High"}) actual_data = markdown_to_df(expected_table) actual_data["school_name"] = "Sunnydale High" expectation.load_actual(actual_data.copy()) expectation.assert_expected(case)
def _build_expected_data(self, table): try: expected_df = markdown_to_df(table) except BadMarkdownTableError as err: raise BadMarkdownTableError( f"Unable to generate data for target {self.target}:\n{err}" ) self._add_constants(expected_df) return expected_df
def test_empty_data_can_be_loaded_with_columns_specified(simple_target): simple_target.load_actual([], columns=["id", "first_name"]) actual = simple_target.data.drop(columns="__dtspec_case__") expected = markdown_to_df(""" | id | first_name | | - | - | """) assert_frame_equal(actual, expected)
def test_defaults_override_identifiers(identifiers, cases): """ If a column is marked as an identifier, but is given a default, then the default will be used (e.g., it will not revert to anonymous id generation). """ source = Source( id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }, defaults={"id": "stu1"}, ) source.stack( cases[0], markdown_to_df(""" | first_name | | - | | Bob | | Still Bob | """), ) generated_id = list(identifiers["student"].cached_ids[id( cases[0])].named_ids.values())[0]["id"] actual = source.data expected = markdown_to_df(""" | first_name | id | | - | - | | Bob | {s1} | | Still Bob | {s1} | """.format(s1=generated_id)) assert_frame_equal(actual, expected) generated_name_id = list(identifiers["student"].cached_ids[id( cases[0])].named_ids.keys())[0] assert generated_name_id == source.defaults["id"]
def test_target_can_be_split_into_case(simple_target, simple_data, cases): simple_target.load_actual(simple_data) actual = simple_target.case_data(cases[1]) expected = markdown_to_df(""" | id | first_name | | - | - | | stu1 | Faith | | stu2 | Willow | """) assert_frame_equal(actual, expected)
def test_sources_stack(simple_source, identifiers, cases): simple_source.stack( cases[0], markdown_to_df(""" | id | first_name | | - | - | | s1 | Bob | | s2 | Nancy | """), ) simple_source.stack( cases[1], markdown_to_df(""" | id | first_name | | - | - | | s1 | Bobob | | s2 | Nanci | """), ) actual = simple_source.data expected = markdown_to_df(""" | id | first_name | | - | - | | {s11} | Bob | | {s12} | Nancy | | {s21} | Bobob | | {s22} | Nanci | """.format( s11=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s12=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], s21=identifiers["student"].generate(case=cases[1], named_id="s1")["id"], s22=identifiers["student"].generate(case=cases[1], named_id="s2")["id"], )) assert_frame_equal(actual, expected)
def test_actual_data_is_loaded_ids_translated(simple_target, simple_data): simple_target.load_actual(simple_data) actual = simple_target.data.drop(columns=["__dtspec_case__"]) expected = markdown_to_df(""" | id | first_name | | - | - | | stu1 | Buffy | | stu2 | Willow | | stu1 | Faith | | stu2 | Willow | """) assert_frame_equal(actual, expected)
def test_identifiers_generate_defaults(identifiers, cases): """ If a column is marked as an identifier column, but is not given a specific named id, then "anonymous" named ids will be generated when the data is stacked. """ source = Source(id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }) source.stack( cases[0], markdown_to_df(""" | first_name | | - | | Bob | | Nancy | """), ) anonymous_ids = [ v["id"] for v in identifiers["student"].cached_ids[id( cases[0])].named_ids.values() ] actual = source.data expected = markdown_to_df(""" | first_name | id | | - | - | | Bob | {s1} | | Nancy | {s2} | """.format(s1=anonymous_ids[0], s2=anonymous_ids[1])) assert_frame_equal(actual, expected)
def test_ignores_trailing_comments(): given = """ | id | name | | - | - | | 1 | one | | 2 | two | # Some comment | 3 | three | """ expected = pd.DataFrame({ "id": ["1", "2", "3"], "name": ["one", "two", "three"] }) actual = markdown_to_df(given) assert_frame_equal(actual, expected)
def test_convert_table_to_df(): given = """ | id | name | | - | - | | 1 | one | | 2 | two | | 3 | three | """ expected = pd.DataFrame({ "id": ["1", "2", "3"], "name": ["one", "two", "three"] }) actual = markdown_to_df(given) assert_frame_equal(actual, expected)
def test_honors_embedded_octothorpes(): given = """ | id | name | | - | - | | 1 | one | | 2 | #2 | | 3 | three | """ expected = pd.DataFrame({ "id": ["1", "2", "3"], "name": ["one", "#2", "three"] }) actual = markdown_to_df(given) assert_frame_equal(actual, expected)
def test_cases_assert_expectations(): # (sources, student_factory): table = """ | id | name | | - | - | | 1 | Buffy | | 2 | Willow | | 3 | Xander | """ actual_data = markdown_to_df(table) actual_data["name"].iloc[1] = "Evil Willow" expectation = DataExpectation(Target(), table) expectation.load_actual(actual_data) case = Case(expectations=[expectation]) with pytest.raises(AssertionError): case.assert_expectations()
def actual_data(expected_table): return markdown_to_df(expected_table)