Beispiel #1
0
def spark_mock_customer(pd_mock_customer):
    ps = pytest.importorskip("pyspark.pandas", reason="Spark not installed, skipping")
    dataframes = {}
    for df in pd_mock_customer.dataframes:
        cleaned_df = pd_to_spark_clean(df).reset_index(drop=True)
        dataframes[df.ww.name] = (
            ps.from_pandas(cleaned_df),
            df.ww.index,
            df.ww.time_index,
            df.ww.logical_types,
        )

    relationships = [
        (
            rel._parent_dataframe_name,
            rel._parent_column_name,
            rel._child_dataframe_name,
            rel._child_column_name,
        )
        for rel in pd_mock_customer.relationships
    ]

    return ft.EntitySet(
        id=pd_mock_customer.id, dataframes=dataframes, relationships=relationships
    )
Beispiel #2
0
def spark_latlong_df(pd_latlong_df):
    ps = pytest.importorskip("pyspark.pandas", reason="Spark not installed, skipping")
    cleaned_df = pd_to_spark_clean(pd_latlong_df)

    pdf = ps.from_pandas(cleaned_df)

    return pdf
Beispiel #3
0
def test_add_dataframe_from_spark_df(pd_es):
    cleaned_df = pd_to_spark_clean(pd_es["log"])
    log_spark = ps.from_pandas(cleaned_df)

    spark_es = EntitySet(id="spark_es")
    spark_es = spark_es.add_dataframe(
        dataframe_name="log_spark",
        dataframe=log_spark,
        index="id",
        time_index="datetime",
        logical_types=pd_es["log"].ww.logical_types,
        semantic_tags=get_df_tags(pd_es["log"]),
    )
    pd.testing.assert_frame_equal(
        cleaned_df, spark_es["log_spark"].to_pandas(), check_like=True
    )
Beispiel #4
0
def spark_int_es(pd_int_es):
    ps = pytest.importorskip("pyspark.pandas", reason="Spark not installed, skipping")
    es = ft.EntitySet(id=pd_int_es.id)
    for df in pd_int_es.dataframes:
        cleaned_df = pd_to_spark_clean(df).reset_index(drop=True)
        spark_df = ps.from_pandas(cleaned_df)
        spark_df.ww.init(schema=df.ww.schema)
        es.add_dataframe(spark_df)

    for rel in pd_int_es.relationships:
        es.add_relationship(
            rel._parent_dataframe_name,
            rel._parent_column_name,
            rel._child_dataframe_name,
            rel._child_column_name,
        )
    return es
Beispiel #5
0
def spark_home_games_es(pd_home_games_es):
    ps = pytest.importorskip("pyspark.pandas", reason="Spark not installed, skipping")
    dataframes = {}
    for df in pd_home_games_es.dataframes:
        spark_df = ps.from_pandas(pd_to_spark_clean(df))
        spark_df.ww.init(schema=df.ww.schema)
        dataframes[df.ww.name] = (spark_df,)

    relationships = [
        (
            rel._parent_dataframe_name,
            rel._parent_column_name,
            rel._child_dataframe_name,
            rel._child_column_name,
        )
        for rel in pd_home_games_es.relationships
    ]

    return ft.EntitySet(
        id=pd_home_games_es.id, dataframes=dataframes, relationships=relationships
    )