def write_csv_output(dataframes, directory):
    """Write csv file outputs."""
    movies, users, ratings = dataframes
    file_util.makedirs(directory, exist_ok=True)

    del movies["tag_id"]  # This column isn't necessary.

    users.to_csv(
        file_util.open(os.path.join(directory, "users.csv"), "w"),
        index=False,
    )
    movies.to_csv(
        file_util.open(os.path.join(directory, "movies.csv"), "w"), index=False
    )
    ratings.to_csv(
        file_util.open(os.path.join(directory, "ratings.csv"), "w"), index=False
    )
 def _read_users(self, path):
     """Returns a dict of User objects."""
     users = {}
     for _, row in pd.read_csv(file_util.open(path)).iterrows():
         users[row.userId] = self._user_ctor(
             user_id=row.userId,
             sex=row.sex,
             age=row.age,
             occupation=row.occupation,
             zip_code=row.zip_code,
             budget=self._responses.get(row.userId, 0),
         )
     return users
def load_embeddings(env_config):
    """Attempts to loads user and movie embeddings from a json or pickle file."""
    path = env_config.embeddings_path
    suffix = pathlib.Path(path).suffix
    if suffix == ".json":
        loader = json
        logging.info("Reading a json file. %s", path)
    elif suffix in (".pkl", ".pickle"):
        loader = pickle
        logging.info("Reading a pickle file. %s", path)
    else:
        raise ValueError("Unrecognized file type! %s" % path)

    embedding_dict = loader.load(file_util.open(path, "rb"))
    return types.SimpleNamespace(
        movies=np.array(embedding_dict[env_config.embedding_movie_key]),
        users=np.array(embedding_dict[env_config.embedding_user_key]),
    )
    def _read_movies(self, path):
        """Returns a dict of Movie objects."""
        movies = {}
        movie_df = pd.read_csv(file_util.open(path))

        for _, row in movie_df.iterrows():
            genres = [
                GENRE_MAP.get(genre, OTHER_GENRE_IDX)
                for genre in row.genres.split("|")
            ]
            assert isinstance(row.movieId, int)
            movie_id = row.movieId
            # `movie_vec` is left as None, and will be filled in later in the init
            # of this Dataset.
            movies[movie_id] = self._movie_ctor(
                movie_id,
                row.title,
                genres,
                vec=None,
                violence=row.violence_tag_relevance,
            )
        return movies
 def _read_responses(self, path):
     """Returns a dict containing the count of Response objects per user."""
     df = pd.read_csv(file_util.open(path))
     return df.groupby(by=["userId"]).rating.count().to_dict()