Ejemplo n.º 1
0
def test_ml1m() -> None:
    if sys.platform == "win32":
        pytest.skip("Skip on Windows.")

    try:
        with ZipFile(ZIPFILE_NAME, "w") as zf:
            with zf.open("ml-1m/ratings.dat", "w") as ofs:
                ofs.write("""1::2::5::0
    1::3::5::86400
    """.encode())
            with zf.open("ml-1m/movies.dat", "w") as ofs:
                ofs.write("""1::A fantastic movie (2020)::fantasy|thriller
    1917::Vinni-Pukh(1969)::children
    """.encode("latin-1"))
            with zf.open("ml-1m/users.dat", "w") as ofs:
                ofs.write("""1::M::32::0::1690074
    2::F::4::1::1760013
    """.encode())

        loader = MovieLens1MDataManager(ZIPFILE_NAME)
        df = loader.read_interaction()
        movie_info = loader.read_item_info()
        user_info = loader.read_user_info()
        assert df.shape == (2, 4)
        np.testing.assert_array_equal(df["userId"].values, [1, 1])
        np.testing.assert_array_equal(df["movieId"].values, [2, 3])
        np.testing.assert_array_equal(df["rating"].values, [5, 5])
        np.testing.assert_array_equal(
            df["timestamp"].values,
            np.asarray(
                [
                    "1970-01-01",
                    "1970-01-02",
                ],
                dtype="datetime64[ns]",
            ),
        )
        np.testing.assert_array_equal(movie_info.index.values, [1, 1917])
        np.testing.assert_array_equal(movie_info.release_year, [2020, 1969])
        np.testing.assert_array_equal(user_info.index.values, [1, 2])
        np.testing.assert_array_equal(user_info.gender, ["M", "F"])
    finally:
        ZIPFILE_NAME.unlink()
Ejemplo n.º 2
0
from irspack.evaluator import Evaluator
from irspack.optimizers import (  # BPRFMOptimizer, #requires lightFM; MultVAEOptimizer, #requires jax & haiku & optax; SLIMOptimizer,
    AsymmetricCosineKNNOptimizer, BaseOptimizer, CosineKNNOptimizer,
    DenseSLIMOptimizer, IALSOptimizer, P3alphaOptimizer, RP3betaOptimizer,
    TopPopOptimizer, TverskyIndexKNNOptimizer,
)
from irspack.split import split_dataframe_partial_user_holdout

os.environ["OMP_NUM_THREADS"] = "8"
os.environ["IRSPACK_NUM_THREADS_DEFAULT"] = "8"

if __name__ == "__main__":

    BASE_CUTOFF = 20

    data_manager = MovieLens1MDataManager()
    df_all = data_manager.read_interaction()

    data_all, _ = split_dataframe_partial_user_holdout(
        df_all,
        "userId",
        "movieId",
        test_user_ratio=0.2,
        val_user_ratio=0.2,
        heldout_ratio_test=0.5,
        heldout_ratio_val=0.5,
    )

    data_train = data_all["train"]
    data_val = data_all["val"]
    data_test = data_all["test"]
Ejemplo n.º 3
0
    UserCBCosineKNNOptimizer,
    UserColdStartEvaluator,
)
from irspack.utils.encoders import (
    BinningEncoder,
    CategoricalValueEncoder,
    DataFrameEncoder,
)

os.environ["OMP_NUM_THREADS"] = "8"
os.environ["IRSPACK_NUM_THREADS_DEFAULT"] = "8"


if __name__ == "__main__":
    BASE_CUTOFF = 20
    loader = MovieLens1MDataManager()
    user_df = loader.read_user_info()
    ratings = loader.read_interaction()
    user_ids_unique: List[int] = np.unique(ratings.userId)
    movie_ids_unique: List[int] = np.unique(ratings.movieId)
    movie_id_to_index = {id_: i for i, id_ in enumerate(movie_ids_unique)}

    def df_to_sparse(df: pd.DataFrame) -> Tuple[UserIndexArray, sps.csr_matrix]:
        unique_uids, row = np.unique(df.userId, return_inverse=True)
        col = df.movieId.map(movie_id_to_index)
        return (
            unique_uids,
            sps.csr_matrix(
                (np.ones(df.shape[0]), (row, col)),
                shape=(len(unique_uids), len(movie_id_to_index)),
            ),