Beispiel #1
0
def test_e2e(spark, pandas_dummy_dataset, header):
    sar = SARPlus(spark, **header)

    df = spark.createDataFrame(pandas_dummy_dataset)
    sar.fit(df)

    test_df = spark.createDataFrame(
        pd.DataFrame({header["col_user"]: [3], header["col_item"]: [2]})
    )

    r1 = (
        sar.recommend_k_items_slow(test_df, top_k=3, remove_seen=False)
        .toPandas()
        .sort_values([header["col_user"], header["col_item"]])
        .reset_index(drop=True)
    )

    r2 = (
        sar.recommend_k_items(
            test_df,
            "tests/test_e2e_cache",
            top_k=3,
            n_user_prediction_partitions=2,
            remove_seen=False,
        )
        .toPandas()
        .sort_values([header["col_user"], header["col_item"]])
        .reset_index(drop=True)
    )

    assert (r1.iloc[:, :2] == r2.iloc[:, :2]).all().all()
    assert np.allclose(r1.score.values, r2.score.values, 1e-3)
def test_e2e(spark, pandas_dummy_dataset, header):
    sar = SARPlus(spark, **header)

    df = spark.createDataFrame(pandas_dummy_dataset)
    sar.fit(df)

    # assert 4*4 + 32 == sar.item_similarity.count()

    # print(sar.item_similarity
    # .toPandas()
    # .pivot_table(index='i1', columns='i2', values='value'))

    test_df = spark.createDataFrame(
        pd.DataFrame({
            header['col_user']: [3],
            header['col_item']: [2]
        }))

    r1 = sar.recommend_k_items_slow(test_df, top_k=3, remove_seen=False)\
        .toPandas()\
        .sort_values([header['col_user'], header['col_item']])\
        .reset_index(drop=True)

    r2 = sar.recommend_k_items(test_df, "tests/test_e2e_cache", top_k=3, n_user_prediction_partitions=2, remove_seen=False)\
        .toPandas()\
        .sort_values([header['col_user'], header['col_item']])\
        .reset_index(drop=True)

    assert (r1.iloc[:, :2] == r2.iloc[:, :2]).all().all()
    assert np.allclose(r1.score.values, r2.score.values, 1e-3)