コード例 #1
0
def test_replace_dataframe_different_dataframe_types():
    dask_es = EntitySet(id="dask_es")

    sessions = pd.DataFrame({
        "id": [0, 1, 2, 3],
        "user": [1, 2, 1, 3],
        "time": [
            pd.to_datetime('2019-01-10'),
            pd.to_datetime('2019-02-03'),
            pd.to_datetime('2019-01-01'),
            pd.to_datetime('2017-08-25')
        ],
        "strings": ["I am a string", "23", "abcdef ghijk", ""]
    })
    sessions_dask = dd.from_pandas(sessions, npartitions=2)
    sessions_logical_types = {
        "id": Integer,
        "user": Integer,
        "time": Datetime,
        "strings": NaturalLanguage
    }
    sessions_semantic_tags = {'user': '******'}

    dask_es.add_dataframe(dataframe_name="sessions",
                          dataframe=sessions_dask,
                          index="id",
                          time_index="time",
                          logical_types=sessions_logical_types,
                          semantic_tags=sessions_semantic_tags)

    with pytest.raises(TypeError, match='Incorrect DataFrame type used'):
        dask_es.replace_dataframe('sessions', sessions)
コード例 #2
0
def test_replace_dataframe():
    df = pd.DataFrame({
        'id':
        range(4),
        'full_name':
        ['Mr. John Doe', 'Doe, Mrs. Jane', 'James Brown', 'Ms. Paige Turner'],
        'email': [
            '*****@*****.**', np.nan, '*****@*****.**',
            '*****@*****.**'
        ],
        'phone_number':
        ['5555555555', '555-555-5555', '1-(555)-555-5555', '555-555-5555'],
        'age':
        pd.Series([33, None, 33, 57], dtype='Int64'),
        'signup_date': [pd.to_datetime('2020-09-01')] * 4,
        'is_registered':
        pd.Series([True, False, True, None], dtype='boolean'),
    })

    df.ww.init(name='table', index='id')
    es = EntitySet('es')
    es.add_dataframe(df)
    original_schema = es['table'].ww.schema

    new_df = df.iloc[2:]
    es.replace_dataframe('table', new_df)

    assert len(es['table']) == 2
    assert es['table'].ww.schema == original_schema
コード例 #3
0
def test_replace_dataframe_data_transformation(latlong_df):
    initial_df = latlong_df.copy()
    initial_df.ww.init(
        name='latlongs',
        index='string_tuple',
        logical_types={col_name: 'LatLong'
                       for col_name in initial_df.columns})
    es = EntitySet()
    es.add_dataframe(dataframe=initial_df)

    df = to_pandas(es['latlongs'])
    expected_val = (1, 2)
    if ks and isinstance(es['latlongs'], ks.DataFrame):
        expected_val = [1, 2]
    for col in latlong_df.columns:
        series = df[col]
        assert series.iloc[0] == expected_val

    es.replace_dataframe('latlongs', latlong_df)
    df = to_pandas(es['latlongs'])
    expected_val = (3, 4)
    if ks and isinstance(es['latlongs'], ks.DataFrame):
        expected_val = [3, 4]
    for col in latlong_df.columns:
        series = df[col]
        assert series.iloc[-1] == expected_val
コード例 #4
0
ファイル: test_ww_es.py プロジェクト: RomaKoks/featuretools
def test_replace_dataframe():
    df = pd.DataFrame({
        "id":
        range(4),
        "full_name": [
            "Mr. John Doe",
            "Doe, Mrs. Jane",
            "James Brown",
            "Ms. Paige Turner",
        ],
        "email": [
            "*****@*****.**",
            np.nan,
            "*****@*****.**",
            "*****@*****.**",
        ],
        "phone_number": [
            "5555555555",
            "555-555-5555",
            "1-(555)-555-5555",
            "555-555-5555",
        ],
        "age":
        pd.Series([33, None, 33, 57], dtype="Int64"),
        "signup_date": [pd.to_datetime("2020-09-01")] * 4,
        "is_registered":
        pd.Series([True, False, True, None], dtype="boolean"),
    })

    df.ww.init(name="table", index="id")
    es = EntitySet("es")
    es.add_dataframe(df)
    original_schema = es["table"].ww.schema

    new_df = df.iloc[2:]
    es.replace_dataframe("table", new_df)

    assert len(es["table"]) == 2
    assert es["table"].ww.schema == original_schema