Exemple #1
0
    def test_is_empty_dataframe(self):
        df = utils.create_dataframe(Row(col1='la'),
                                    schema=StructType(
                                        [StructField('col1', StringType())]))

        status = candidate_sets._is_empty_dataframe(df)
        self.assertFalse(status)

        # empty df
        df = df.select('*').where(f.col('col1') == 'laa')
        status = candidate_sets._is_empty_dataframe(df)
        self.assertTrue(status)
def get_user_name_and_user_id(params: RecommendationParams, users):
    """ Get users from top artist candidate set.

        Args:
            params: RecommendationParams class object.
            users = list of users names to generate recommendations.

        Returns:
            users_df: dataframe of user id and user names.
    """
    if len(users) == 0:
        users_df = params.top_artist_candidate_set_df.select('user_id', 'user_name').distinct()

    else:
        users_df = params.top_artist_candidate_set_df.select('user_id', 'user_name') \
                                                     .where(params.top_artist_candidate_set_df.user_name.isin(users)) \
                                                     .distinct()

    if _is_empty_dataframe(users_df):
        raise EmptyDataframeExcpetion('No active users found!')

    return users_df
def get_candidate_set_rdd_for_user(candidate_set_df, users):
    """ Get candidate set RDD for a given user.

        Args:
            candidate_set_df: A dataframe of user_id and recording_id for all users.
            users: list of user names to generate recommendations for.

        Returns:
            candidate_set_rdd: An RDD of user_id and recording_id for a given user.
    """
    if users:
        candidate_set_user_df = candidate_set_df.select('user_id', 'recording_id') \
                                                .where(col('user_name').isin(users))
    else:
        candidate_set_user_df = candidate_set_df.select('user_id', 'recording_id')

    if _is_empty_dataframe(candidate_set_user_df):
        raise EmptyDataframeExcpetion('Empty Candidate sets!')

    candidate_set_rdd = candidate_set_user_df.rdd.map(lambda r: (r['user_id'], r['recording_id']))

    return candidate_set_rdd