def ks_es(make_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") ks_es = copy.deepcopy(make_es) for entity in ks_es.entities: cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True) entity.df = ks.from_pandas(cleaned_df) return ks_es
def ks_es(make_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") if sys.platform.startswith('win'): pytest.skip('skipping Koalas tests for Windows') ks_es = copy.deepcopy(make_es) for entity in ks_es.entities: cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True) entity.df = ks.from_pandas(cleaned_df) return ks_es
def test_create_entity_from_ks_df(pd_es): cleaned_df = pd_to_ks_clean(pd_es["log"].df) log_ks = ks.from_pandas(cleaned_df) ks_es = EntitySet(id="ks_es") ks_es = ks_es.entity_from_dataframe( entity_id="log_ks", dataframe=log_ks, index="id", time_index="datetime", variable_types=pd_es["log"].variable_types ) pd.testing.assert_frame_equal(cleaned_df, ks_es["log_ks"].df.to_pandas(), check_like=True)
def test_add_dataframe_from_ks_df(pd_es): cleaned_df = pd_to_ks_clean(pd_es["log"]) log_ks = ks.from_pandas(cleaned_df) ks_es = EntitySet(id="ks_es") ks_es = ks_es.add_dataframe(dataframe_name="log_ks", dataframe=log_ks, index="id", time_index="datetime", logical_types=pd_es["log"].ww.logical_types, semantic_tags=get_df_tags(pd_es["log"])) pd.testing.assert_frame_equal(cleaned_df, ks_es["log_ks"].to_pandas(), check_like=True)
def ks_home_games_es(pd_home_games_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") entities = {} for entity in pd_home_games_es.entities: entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)), entity.index, None, entity.variable_types) relationships = [(rel.parent_entity.id, rel.parent_variable.name, rel.child_entity.id, rel.child_variable.name) for rel in pd_home_games_es.relationships] return ft.EntitySet(id=pd_home_games_es.id, entities=entities, relationships=relationships)
def ks_int_es(pd_int_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") es = ft.EntitySet(id=pd_int_es.id) for df in pd_int_es.dataframes: cleaned_df = pd_to_ks_clean(df).reset_index(drop=True) ks_df = ks.from_pandas(cleaned_df) ks_df.ww.init(schema=df.ww.schema) es.add_dataframe(ks_df) for rel in pd_int_es.relationships: es.add_relationship(rel._parent_dataframe_name, rel._parent_column_name, rel._child_dataframe_name, rel._child_column_name) return es
def ks_mock_customer(pd_mock_customer): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") entities = {} for entity in pd_mock_customer.entities: cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True) entities[entity.id] = (ks.from_pandas(cleaned_df), entity.index, entity.time_index, entity.variable_types) relationships = [(rel.parent_entity.id, rel.parent_variable.name, rel.child_entity.id, rel.child_variable.name) for rel in pd_mock_customer.relationships] return ft.EntitySet(id=pd_mock_customer.id, entities=entities, relationships=relationships)
def ks_mock_customer(pd_mock_customer): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") dataframes = {} for df in pd_mock_customer.dataframes: cleaned_df = pd_to_ks_clean(df).reset_index(drop=True) dataframes[df.ww.name] = (ks.from_pandas(cleaned_df), df.ww.index, df.ww.time_index, df.ww.logical_types) relationships = [(rel._parent_dataframe_name, rel._parent_column_name, rel._child_dataframe_name, rel._child_column_name) for rel in pd_mock_customer.relationships] return ft.EntitySet(id=pd_mock_customer.id, dataframes=dataframes, relationships=relationships)
def ks_home_games_es(pd_home_games_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") dataframes = {} for df in pd_home_games_es.dataframes: ks_df = ks.from_pandas(pd_to_ks_clean(df)) ks_df.ww.init(schema=df.ww.schema) dataframes[df.ww.name] = (ks_df, ) relationships = [(rel._parent_dataframe_name, rel._parent_column_name, rel._child_dataframe_name, rel._child_column_name) for rel in pd_home_games_es.relationships] return ft.EntitySet(id=pd_home_games_es.id, dataframes=dataframes, relationships=relationships)
def ks_diamond_es(pd_diamond_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") if sys.platform.startswith('win'): pytest.skip('skipping Koalas tests for Windows') entities = {} for entity in pd_diamond_es.entities: entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)), entity.index, None, entity.variable_types) relationships = [(rel.parent_entity.id, rel.parent_variable.name, rel.child_entity.id, rel.child_variable.name) for rel in pd_diamond_es.relationships] return ft.EntitySet(id=pd_diamond_es.id, entities=entities, relationships=relationships)
def ks_es(make_es): ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping") es = ft.EntitySet(id=make_es.id) for entity in make_es.entities: cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True) es.entity_from_dataframe( entity.id, ks.from_pandas(cleaned_df), index=entity.index, time_index=entity.time_index, variable_types=entity.variable_types, secondary_time_index=entity.secondary_time_index) for rel in make_es.relationships: es.add_relationship( ft.Relationship(es[rel.parent_entity.id][rel.parent_variable.id], es[rel.child_entity.id][rel.child_variable.id])) return es