Beispiel #1
0
def make_ecommerce_entityset(with_integer_time_index=False):
    """ Makes a entityset with the following shape:

          R         Regions
         / \\       .
        S   C       Stores, Customers
            |       .
            S   P   Sessions, Products
             \\ /   .
              L     Log
    """
    dataframes = make_ecommerce_dataframes(
        with_integer_time_index=with_integer_time_index)
    dataframe_names = dataframes.keys()
    es_id = 'ecommerce'
    if with_integer_time_index:
        es_id += "_int_time_index"

    logical_types = make_logical_types(
        with_integer_time_index=with_integer_time_index)
    semantic_tags = make_semantic_tags()
    time_indexes = make_time_indexes(
        with_integer_time_index=with_integer_time_index)

    es = EntitySet(id=es_id)

    for df_name in dataframe_names:
        time_index = time_indexes.get(df_name, None)
        ti_name = None
        secondary = None
        if time_index is not None:
            ti_name = time_index['name']
            secondary = time_index['secondary']
        df = dataframes[df_name]
        es.add_dataframe(df,
                         dataframe_name=df_name,
                         index='id',
                         logical_types=logical_types[df_name],
                         semantic_tags=semantic_tags[df_name],
                         time_index=ti_name,
                         secondary_time_index=secondary)

    es.normalize_dataframe('customers',
                           'cohorts',
                           'cohort',
                           additional_columns=['cohort_name'],
                           make_time_index=True,
                           new_dataframe_time_index='cohort_end')

    es.add_relationships([(u'régions', 'id', 'customers', u'région_id'),
                          (u'régions', 'id', 'stores', u'région_id'),
                          ('customers', 'id', 'sessions', 'customer_id'),
                          ('sessions', 'id', 'log', 'session_id'),
                          ('products', 'id', 'log', 'product_id')])

    return es
Beispiel #2
0
def test_operations_invalidate_metadata(es):
    new_es = EntitySet(id="test")
    # test metadata gets created on access
    assert new_es._data_description is None
    assert new_es.metadata is not None  # generated after access
    assert new_es._data_description is not None
    if not isinstance(es['customers'], pd.DataFrame):
        customers_ltypes = es["customers"].ww.logical_types
        customers_ltypes['signup_date'] = Datetime
    else:
        customers_ltypes = None
    new_es.add_dataframe(es["customers"],
                         "customers",
                         index=es["customers"].index,
                         logical_types=customers_ltypes)
    if not isinstance(es['sessions'], pd.DataFrame):
        sessions_ltypes = es["sessions"].ww.logical_types
    else:
        sessions_ltypes = None
    new_es.add_dataframe(es["sessions"],
                         "sessions",
                         index=es["sessions"].index,
                         logical_types=sessions_ltypes)

    assert new_es._data_description is None
    assert new_es.metadata is not None
    assert new_es._data_description is not None

    new_es = new_es.add_relationship("customers", "id", "sessions",
                                     "customer_id")
    assert new_es._data_description is None
    assert new_es.metadata is not None
    assert new_es._data_description is not None

    new_es = new_es.normalize_dataframe("customers", "cohort", "cohort")
    assert new_es._data_description is None
    assert new_es.metadata is not None
    assert new_es._data_description is not None

    new_es.add_last_time_indexes()
    assert new_es._data_description is None
    assert new_es.metadata is not None
    assert new_es._data_description is not None

    # automatically adding interesting values not supported in Dask or Koalas
    if new_es.dataframe_type == Library.PANDAS.value:
        new_es.add_interesting_values()
        assert new_es._data_description is None
        assert new_es.metadata is not None
        assert new_es._data_description is not None
Beispiel #3
0
def make_ecommerce_entityset(with_integer_time_index=False):
    """Makes a entityset with the following shape:

      R         Regions
     / \\       .
    S   C       Stores, Customers
        |       .
        S   P   Sessions, Products
         \\ /   .
          L     Log
    """
    dataframes = make_ecommerce_dataframes(
        with_integer_time_index=with_integer_time_index
    )
    dataframe_names = dataframes.keys()
    es_id = "ecommerce"
    if with_integer_time_index:
        es_id += "_int_time_index"

    logical_types = make_logical_types(with_integer_time_index=with_integer_time_index)
    semantic_tags = make_semantic_tags()
    time_indexes = make_time_indexes(with_integer_time_index=with_integer_time_index)

    es = EntitySet(id=es_id)

    for df_name in dataframe_names:
        time_index = time_indexes.get(df_name, None)
        ti_name = None
        secondary = None
        if time_index is not None:
            ti_name = time_index["name"]
            secondary = time_index["secondary"]
        df = dataframes[df_name]
        es.add_dataframe(
            df,
            dataframe_name=df_name,
            index="id",
            logical_types=logical_types[df_name],
            semantic_tags=semantic_tags[df_name],
            time_index=ti_name,
            secondary_time_index=secondary,
        )

    es.normalize_dataframe(
        "customers",
        "cohorts",
        "cohort",
        additional_columns=["cohort_name"],
        make_time_index=True,
        new_dataframe_time_index="cohort_end",
    )

    es.add_relationships(
        [
            ("régions", "id", "customers", "région_id"),
            ("régions", "id", "stores", "région_id"),
            ("customers", "id", "sessions", "customer_id"),
            ("sessions", "id", "log", "session_id"),
            ("products", "id", "log", "product_id"),
        ]
    )

    return es