Python ALS.predictAll Examples

Programming Language: Python

Namespace/Package Name: pyspark.mllib.recommendation

Class/Type: ALS

Method/Function: predictAll

Examples at hotexamples.com: 2

Python ALS.predictAll - 2 examples found. These are the top rated real world Python examples of pyspark.mllib.recommendation.ALS.predictAll extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

train(30)

trainImplicit(30)

ALS(11)

fit(4)

predictAll(2)

explainParams(1)

transform(1)

Example #1

Show file

File: engine.py Project: movie-recommender/MovieHunter-Recommendation-Engine

class Recommender(DataHandler):
    def __init__(self, spark_session, sqlContext, jdbcUrl, table_name):
        super().__init__(spark_session, sqlContext, jdbcUrl, table_name)
        self.best_rank = 24
        self.best_iteration = 10
        self.regularization_parameter = 0.1
        self.model = ALS()  # ALS Matrix factorization model.

    def train_model(self):
        # movielens dataset has explicit features. I could train algorithm that used for explicit features,
        # but for some reason (See: https://stackoverflow.com/questions/26213573/apache-spark-als-collaborative-filtering-results-they-dont-make-sense)
        # that algorithm doesn't give reasonable recommendations.
        model = ALS().trainImplicit(self.data.rdd.persist(),
                                    rank=self.best_rank,
                                    iterations=self.best_iteration,
                                    lambda_=self.regularization_parameter)
        self.model = model

    def generate_recommendation(self,
                                existing_user_id=None,
                                external_user_id=None):
        data_rdd = self.data.rdd
        if existing_user_id:
            # Generates recommendation for particular user that already exists in ratings dataset.
            rated_movies_by_user = self.rated_movies[
                existing_user_id]  # list ids of rated movies by this user.
            unrated_movs = data_rdd.filter(lambda x: x[1] not in rated_movies_by_user) \
                                    .map(lambda s: (existing_user_id, s[1]))  # unrated movies
            raw_recommendations = self.model.predictAll(unrated_movs)

        else:
            # Generates recommendation for last user.
            new_id = self.NEW_USERID
            rated_movies_by_user = self.rated_movies[new_id]
            unrated_movs = data_rdd.filter(lambda x: x[1] not in rated_movies_by_user) \
                .map(lambda s: (new_id, s[1]))
            if external_user_id:
                raw_recommendations = self.model.predictAll(unrated_movs) \
                    .map(lambda x: (external_user_id, x[1], x[2]))
            else:
                raw_recommendations = self.model.predictAll(unrated_movs)

        return raw_recommendations

Example #2

Show file

class Recommender:
    def __init__(self, spark_session, sqlContext, jdbcUrl, table_name):
        self.spark_session = spark_session
        self.jdbcUrl = jdbcUrl
        self.sqlContext = sqlContext
        self.table_name = table_name
        self.data = self._process_data()
        self.columns = self.data.columns
        self.NEW_USERID = 610  # last user's id in ratings dataframe.
        self.rated_movies = dict()  # keys: ids of new users, values: corresponding movie ids. ==> {user_id: movie_ids}
        self.best_rank = 24
        self.best_iteration = 10
        self.regularization_parameter = 0.1
        self.model = ALS()  # ALS Matrix factorization model.

    def _process_data(self):

        raw_data = self.sqlContext.read.jdbc(url=self.jdbcUrl, table=self.table_name)
        cols = raw_data.columns
        if 'rating' in cols:  # if raw_data is ratings dataset
            data = raw_data.withColumn(cols[0], raw_data[cols[0]].cast(IntegerType()))  # convert dtype of userId to integer
            data = data.withColumn(cols[1], data[cols[1]].cast(IntegerType()))  # convert dtype of movieId to integer
            data = data.withColumn(cols[2], data[cols[2]].cast(FloatType()))  # convert dtype of rating to float
            # shuffle pairs and create 18 partitions with data that's distributed evenly across 3 machines. Then persist data.
            data = data.repartition(18).persist()
        else:
            data = raw_data.repartiton(18).persist()
        return data

    def add_new_user(self, movie_ids):
        """
        user_id: integer.
            id of new user
        movie_ids: list.
            list of user provided movie ids (string).
        """
        self.NEW_USERID += 1
        rated_movies = [(self.NEW_USERID, int(i), 5) for i in movie_ids]  # [(user id, movie id, rating), ...]
        # converting "rated_movies" list to RDD.
        new_user_ratings_rdd = self.spark_session.parallelize(rated_movies)
        # converting new_user_ratings_rdd to spark dataframe object.
        new_user_ratings_df = new_user_ratings_rdd.toDF(schema=self.columns)
        self.rated_movies[self.NEW_USERID] = movie_ids
        self.data = self.data.union(new_user_ratings_df)  # concatenating existing ratings data with new_user_ratings_df

    def train_model(self):
        # movielens dataset has explicit features. I could train algorithm that used for explicit features,
        # but for some reason (See: https://stackoverflow.com/questions/26213573/apache-spark-als-collaborative-filtering-results-they-dont-make-sense)
        # that algorithm doesn't give reasonable recommendations.
        model = ALS().trainImplicit(self.data.rdd.persist(), rank=self.best_rank, iterations=self.best_iteration,
                            lambda_=self.regularization_parameter)
        self.model = model

    def generate_recommendation(self, existing_user_id=None):
        data_rdd = self.data.rdd
        if existing_user_id:
            # Generates recommendation for particular user that already exists in ratings dataset.
            rated_movies_by_user = self.rated_movies[existing_user_id]  # list ids of rated movies by this user.
            unrated_movs = data_rdd.filter(lambda x: x[1] not in rated_movies_by_user) \
                .map(lambda s: (existing_user_id, s[1]))  # unrated movies
            raw_recommendations = self.model.predictAll(unrated_movs)

        else:
            # Generates recommendation for last user.
            new_id = self.NEW_USERID
            rated_movies_by_user = self.rated_movies[new_id]
            unrated_movs = data_rdd.filter(lambda x: x[1] not in rated_movies_by_user) \
                .map(lambda s: (new_id, s[1]))
            raw_recommendations = self.model.predictAll(unrated_movs)

        return raw_recommendations