def test_normalize_ww_init(): es = EntitySet() df = pd.DataFrame({ 'id': [1, 2, 3, 4], 'col': ['a', 'b', 'c', 'd'], 'df2_id': [1, 1, 2, 2], 'df2_col': [True, False, True, True] }) df.ww.init(index='id', name='test_name') es.add_dataframe(dataframe=df) assert es['test_name'].ww.name == 'test_name' assert es['test_name'].ww.schema.name == 'test_name' es.normalize_dataframe('test_name', 'new_df', 'df2_id', additional_columns=['df2_col']) assert es['test_name'].ww.name == 'test_name' assert es['test_name'].ww.schema.name == 'test_name' assert es['new_df'].ww.name == 'new_df' assert es['new_df'].ww.schema.name == 'new_df'
def test_normalize_dataframe(): df = pd.DataFrame({ 'id': range(4), 'full_name': ['Mr. John Doe', 'Doe, Mrs. Jane', 'James Brown', 'Ms. Paige Turner'], 'email': [ '*****@*****.**', np.nan, '*****@*****.**', '*****@*****.**' ], 'phone_number': ['5555555555', '555-555-5555', '1-(555)-555-5555', '555-555-5555'], 'age': pd.Series([33, None, 33, 57], dtype='Int64'), 'signup_date': [pd.to_datetime('2020-09-01')] * 4, 'is_registered': pd.Series([True, False, True, None], dtype='boolean'), }) df.ww.init(name='first_table', index='id', time_index='signup_date') es = EntitySet('es') es.add_dataframe(df) es.normalize_dataframe('first_table', 'second_table', 'age', additional_columns=['phone_number', 'full_name'], make_time_index=True) assert len(es.dataframe_dict) == 2 assert 'foreign_key' in es['first_table'].ww.semantic_tags['age']
def test_normalize_ww_init(): es = EntitySet() df = pd.DataFrame({ "id": [1, 2, 3, 4], "col": ["a", "b", "c", "d"], "df2_id": [1, 1, 2, 2], "df2_col": [True, False, True, True], }) df.ww.init(index="id", name="test_name") es.add_dataframe(dataframe=df) assert es["test_name"].ww.name == "test_name" assert es["test_name"].ww.schema.name == "test_name" es.normalize_dataframe("test_name", "new_df", "df2_id", additional_columns=["df2_col"]) assert es["test_name"].ww.name == "test_name" assert es["test_name"].ww.schema.name == "test_name" assert es["new_df"].ww.name == "new_df" assert es["new_df"].ww.schema.name == "new_df"
def test_normalize_dataframe(): df = pd.DataFrame({ "id": range(4), "full_name": [ "Mr. John Doe", "Doe, Mrs. Jane", "James Brown", "Ms. Paige Turner", ], "email": [ "*****@*****.**", np.nan, "*****@*****.**", "*****@*****.**", ], "phone_number": [ "5555555555", "555-555-5555", "1-(555)-555-5555", "555-555-5555", ], "age": pd.Series([33, None, 33, 57], dtype="Int64"), "signup_date": [pd.to_datetime("2020-09-01")] * 4, "is_registered": pd.Series([True, False, True, None], dtype="boolean"), }) df.ww.init(name="first_table", index="id", time_index="signup_date") es = EntitySet("es") es.add_dataframe(df) es.normalize_dataframe( "first_table", "second_table", "age", additional_columns=["phone_number", "full_name"], make_time_index=True, ) assert len(es.dataframe_dict) == 2 assert "foreign_key" in es["first_table"].ww.semantic_tags["age"]