def make_ecommerce_entityset(with_integer_time_index=False): """ Makes a entityset with the following shape: R Regions / \\ . S C Stores, Customers | . S P Sessions, Products \\ / . L Log """ dataframes = make_ecommerce_dataframes( with_integer_time_index=with_integer_time_index) dataframe_names = dataframes.keys() es_id = 'ecommerce' if with_integer_time_index: es_id += "_int_time_index" logical_types = make_logical_types( with_integer_time_index=with_integer_time_index) semantic_tags = make_semantic_tags() time_indexes = make_time_indexes( with_integer_time_index=with_integer_time_index) es = EntitySet(id=es_id) for df_name in dataframe_names: time_index = time_indexes.get(df_name, None) ti_name = None secondary = None if time_index is not None: ti_name = time_index['name'] secondary = time_index['secondary'] df = dataframes[df_name] es.add_dataframe(df, dataframe_name=df_name, index='id', logical_types=logical_types[df_name], semantic_tags=semantic_tags[df_name], time_index=ti_name, secondary_time_index=secondary) es.normalize_dataframe('customers', 'cohorts', 'cohort', additional_columns=['cohort_name'], make_time_index=True, new_dataframe_time_index='cohort_end') es.add_relationships([(u'régions', 'id', 'customers', u'région_id'), (u'régions', 'id', 'stores', u'région_id'), ('customers', 'id', 'sessions', 'customer_id'), ('sessions', 'id', 'log', 'session_id'), ('products', 'id', 'log', 'product_id')]) return es
def test_operations_invalidate_metadata(es): new_es = EntitySet(id="test") # test metadata gets created on access assert new_es._data_description is None assert new_es.metadata is not None # generated after access assert new_es._data_description is not None if not isinstance(es['customers'], pd.DataFrame): customers_ltypes = es["customers"].ww.logical_types customers_ltypes['signup_date'] = Datetime else: customers_ltypes = None new_es.add_dataframe(es["customers"], "customers", index=es["customers"].index, logical_types=customers_ltypes) if not isinstance(es['sessions'], pd.DataFrame): sessions_ltypes = es["sessions"].ww.logical_types else: sessions_ltypes = None new_es.add_dataframe(es["sessions"], "sessions", index=es["sessions"].index, logical_types=sessions_ltypes) assert new_es._data_description is None assert new_es.metadata is not None assert new_es._data_description is not None new_es = new_es.add_relationship("customers", "id", "sessions", "customer_id") assert new_es._data_description is None assert new_es.metadata is not None assert new_es._data_description is not None new_es = new_es.normalize_dataframe("customers", "cohort", "cohort") assert new_es._data_description is None assert new_es.metadata is not None assert new_es._data_description is not None new_es.add_last_time_indexes() assert new_es._data_description is None assert new_es.metadata is not None assert new_es._data_description is not None # automatically adding interesting values not supported in Dask or Koalas if new_es.dataframe_type == Library.PANDAS.value: new_es.add_interesting_values() assert new_es._data_description is None assert new_es.metadata is not None assert new_es._data_description is not None
def make_ecommerce_entityset(with_integer_time_index=False): """Makes a entityset with the following shape: R Regions / \\ . S C Stores, Customers | . S P Sessions, Products \\ / . L Log """ dataframes = make_ecommerce_dataframes( with_integer_time_index=with_integer_time_index ) dataframe_names = dataframes.keys() es_id = "ecommerce" if with_integer_time_index: es_id += "_int_time_index" logical_types = make_logical_types(with_integer_time_index=with_integer_time_index) semantic_tags = make_semantic_tags() time_indexes = make_time_indexes(with_integer_time_index=with_integer_time_index) es = EntitySet(id=es_id) for df_name in dataframe_names: time_index = time_indexes.get(df_name, None) ti_name = None secondary = None if time_index is not None: ti_name = time_index["name"] secondary = time_index["secondary"] df = dataframes[df_name] es.add_dataframe( df, dataframe_name=df_name, index="id", logical_types=logical_types[df_name], semantic_tags=semantic_tags[df_name], time_index=ti_name, secondary_time_index=secondary, ) es.normalize_dataframe( "customers", "cohorts", "cohort", additional_columns=["cohort_name"], make_time_index=True, new_dataframe_time_index="cohort_end", ) es.add_relationships( [ ("régions", "id", "customers", "région_id"), ("régions", "id", "stores", "région_id"), ("customers", "id", "sessions", "customer_id"), ("sessions", "id", "log", "session_id"), ("products", "id", "log", "product_id"), ] ) return es