def test_ml1m() -> None: if sys.platform == "win32": pytest.skip("Skip on Windows.") try: with ZipFile(ZIPFILE_NAME, "w") as zf: with zf.open("ml-1m/ratings.dat", "w") as ofs: ofs.write("""1::2::5::0 1::3::5::86400 """.encode()) with zf.open("ml-1m/movies.dat", "w") as ofs: ofs.write("""1::A fantastic movie (2020)::fantasy|thriller 1917::Vinni-Pukh(1969)::children """.encode("latin-1")) with zf.open("ml-1m/users.dat", "w") as ofs: ofs.write("""1::M::32::0::1690074 2::F::4::1::1760013 """.encode()) loader = MovieLens1MDataManager(ZIPFILE_NAME) df = loader.read_interaction() movie_info = loader.read_item_info() user_info = loader.read_user_info() assert df.shape == (2, 4) np.testing.assert_array_equal(df["userId"].values, [1, 1]) np.testing.assert_array_equal(df["movieId"].values, [2, 3]) np.testing.assert_array_equal(df["rating"].values, [5, 5]) np.testing.assert_array_equal( df["timestamp"].values, np.asarray( [ "1970-01-01", "1970-01-02", ], dtype="datetime64[ns]", ), ) np.testing.assert_array_equal(movie_info.index.values, [1, 1917]) np.testing.assert_array_equal(movie_info.release_year, [2020, 1969]) np.testing.assert_array_equal(user_info.index.values, [1, 2]) np.testing.assert_array_equal(user_info.gender, ["M", "F"]) finally: ZIPFILE_NAME.unlink()
from irspack.evaluator import Evaluator from irspack.optimizers import ( # BPRFMOptimizer, #requires lightFM; MultVAEOptimizer, #requires jax & haiku & optax; SLIMOptimizer, AsymmetricCosineKNNOptimizer, BaseOptimizer, CosineKNNOptimizer, DenseSLIMOptimizer, IALSOptimizer, P3alphaOptimizer, RP3betaOptimizer, TopPopOptimizer, TverskyIndexKNNOptimizer, ) from irspack.split import split_dataframe_partial_user_holdout os.environ["OMP_NUM_THREADS"] = "8" os.environ["IRSPACK_NUM_THREADS_DEFAULT"] = "8" if __name__ == "__main__": BASE_CUTOFF = 20 data_manager = MovieLens1MDataManager() df_all = data_manager.read_interaction() data_all, _ = split_dataframe_partial_user_holdout( df_all, "userId", "movieId", test_user_ratio=0.2, val_user_ratio=0.2, heldout_ratio_test=0.5, heldout_ratio_val=0.5, ) data_train = data_all["train"] data_val = data_all["val"] data_test = data_all["test"]
UserCBCosineKNNOptimizer, UserColdStartEvaluator, ) from irspack.utils.encoders import ( BinningEncoder, CategoricalValueEncoder, DataFrameEncoder, ) os.environ["OMP_NUM_THREADS"] = "8" os.environ["IRSPACK_NUM_THREADS_DEFAULT"] = "8" if __name__ == "__main__": BASE_CUTOFF = 20 loader = MovieLens1MDataManager() user_df = loader.read_user_info() ratings = loader.read_interaction() user_ids_unique: List[int] = np.unique(ratings.userId) movie_ids_unique: List[int] = np.unique(ratings.movieId) movie_id_to_index = {id_: i for i, id_ in enumerate(movie_ids_unique)} def df_to_sparse(df: pd.DataFrame) -> Tuple[UserIndexArray, sps.csr_matrix]: unique_uids, row = np.unique(df.userId, return_inverse=True) col = df.movieId.map(movie_id_to_index) return ( unique_uids, sps.csr_matrix( (np.ones(df.shape[0]), (row, col)), shape=(len(unique_uids), len(movie_id_to_index)), ),