Example #1
0
def test_normalize_ww_init():
    es = EntitySet()
    df = pd.DataFrame({
        'id': [1, 2, 3, 4],
        'col': ['a', 'b', 'c', 'd'],
        'df2_id': [1, 1, 2, 2],
        'df2_col': [True, False, True, True]
    })

    df.ww.init(index='id', name='test_name')
    es.add_dataframe(dataframe=df)

    assert es['test_name'].ww.name == 'test_name'
    assert es['test_name'].ww.schema.name == 'test_name'

    es.normalize_dataframe('test_name',
                           'new_df',
                           'df2_id',
                           additional_columns=['df2_col'])

    assert es['test_name'].ww.name == 'test_name'
    assert es['test_name'].ww.schema.name == 'test_name'

    assert es['new_df'].ww.name == 'new_df'
    assert es['new_df'].ww.schema.name == 'new_df'
Example #2
0
def test_normalize_dataframe():
    df = pd.DataFrame({
        'id':
        range(4),
        'full_name':
        ['Mr. John Doe', 'Doe, Mrs. Jane', 'James Brown', 'Ms. Paige Turner'],
        'email': [
            '*****@*****.**', np.nan, '*****@*****.**',
            '*****@*****.**'
        ],
        'phone_number':
        ['5555555555', '555-555-5555', '1-(555)-555-5555', '555-555-5555'],
        'age':
        pd.Series([33, None, 33, 57], dtype='Int64'),
        'signup_date': [pd.to_datetime('2020-09-01')] * 4,
        'is_registered':
        pd.Series([True, False, True, None], dtype='boolean'),
    })

    df.ww.init(name='first_table', index='id', time_index='signup_date')
    es = EntitySet('es')
    es.add_dataframe(df)
    es.normalize_dataframe('first_table',
                           'second_table',
                           'age',
                           additional_columns=['phone_number', 'full_name'],
                           make_time_index=True)
    assert len(es.dataframe_dict) == 2
    assert 'foreign_key' in es['first_table'].ww.semantic_tags['age']
Example #3
0
def test_normalize_ww_init():
    es = EntitySet()
    df = pd.DataFrame({
        "id": [1, 2, 3, 4],
        "col": ["a", "b", "c", "d"],
        "df2_id": [1, 1, 2, 2],
        "df2_col": [True, False, True, True],
    })

    df.ww.init(index="id", name="test_name")
    es.add_dataframe(dataframe=df)

    assert es["test_name"].ww.name == "test_name"
    assert es["test_name"].ww.schema.name == "test_name"

    es.normalize_dataframe("test_name",
                           "new_df",
                           "df2_id",
                           additional_columns=["df2_col"])

    assert es["test_name"].ww.name == "test_name"
    assert es["test_name"].ww.schema.name == "test_name"

    assert es["new_df"].ww.name == "new_df"
    assert es["new_df"].ww.schema.name == "new_df"
Example #4
0
def test_normalize_dataframe():
    df = pd.DataFrame({
        "id":
        range(4),
        "full_name": [
            "Mr. John Doe",
            "Doe, Mrs. Jane",
            "James Brown",
            "Ms. Paige Turner",
        ],
        "email": [
            "*****@*****.**",
            np.nan,
            "*****@*****.**",
            "*****@*****.**",
        ],
        "phone_number": [
            "5555555555",
            "555-555-5555",
            "1-(555)-555-5555",
            "555-555-5555",
        ],
        "age":
        pd.Series([33, None, 33, 57], dtype="Int64"),
        "signup_date": [pd.to_datetime("2020-09-01")] * 4,
        "is_registered":
        pd.Series([True, False, True, None], dtype="boolean"),
    })

    df.ww.init(name="first_table", index="id", time_index="signup_date")
    es = EntitySet("es")
    es.add_dataframe(df)
    es.normalize_dataframe(
        "first_table",
        "second_table",
        "age",
        additional_columns=["phone_number", "full_name"],
        make_time_index=True,
    )
    assert len(es.dataframe_dict) == 2
    assert "foreign_key" in es["first_table"].ww.semantic_tags["age"]