def get_user_profile(user_id):
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    ratings = client.get_data_table(session, keyspace, table)

    if ratings.empty:
        return empty_json

    ratings = ratings.drop(columns="rating_id")

    avg_ratings = calculate_avg_ratings(ratings, converted_genres)
    avg_user_ratings = pd.read_json(get_user_avg_rating(user_id),
                                    orient='records')

    appended_avg_ratings = avg_ratings.append(avg_user_ratings,
                                              ignore_index=True,
                                              sort=False)
    profile = pd.DataFrame(columns=converted_genres)

    # FIXME: Change profile
    profile.loc[0] = [
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
    ]
    for genre in converted_genres:
        profile.iloc[0, profile.columns.get_loc(genre)] = appended_avg_ratings.iloc[0][genre] - \
                                                          appended_avg_ratings.iloc[1][genre]

    return profile.to_json(orient='records')
def delete_rating(json_rating):
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    rating = pd.read_json(json_rating, orient=records)
    client.delete_rows(session, keyspace, table, rating)
    return json_rating
def add_rating(json_rating):
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    new_rating = pd.read_json(json_rating, orient=records)
    client.push_data_table(session, keyspace, table, new_rating)

    return json_rating
def get_ratings():
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    df = client.get_data_table(session, keyspace, table)
    if df.empty:
        return empty_json
    df = df.drop(columns="rating_id")
    return df.to_json(orient='records')
def get_user_avg_rating(user_id):
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    ratings = client.get_data_table(session, keyspace, table)

    ratings = ratings.sort_values(by=['userid'])
    ratings.set_index('userid')
    ratings = ratings[int(user_id) == ratings['userid']]
    return calculate_avg_ratings(ratings,
                                 converted_genres).to_json(orient=records)
def get_avg_ratings():
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    ratings = client.get_data_table(session, keyspace, table)

    if ratings.empty:
        return empty_json

    ratings = ratings.drop(columns="rating_id")
    return calculate_avg_ratings(ratings,
                                 converted_genres).to_json(orient=records)
Exemple #7
0
 def __init__(self):
     self.genres = wtiproj03_ETL.get_genres_list()
     self.keyspace = "user_ratings"
     self.rating_table = "rating"
     self.user_avg_table = "user_avg"
     self.all_avg_table = "all_avg"
     self.cluster = Cluster(['127.0.0.1'], port=9042)
     self.session = self.cluster.connect()
     wtiproj06_cassandra_client.create_keyspace(self.session, self.keyspace)
     wtiproj06_cassandra_client.create_rating_table(self.session,
                                                    self.keyspace,
                                                    self.rating_table)
     wtiproj06_cassandra_client.create_user_avg_table(
         self.session, self.keyspace, self.user_avg_table)
     wtiproj06_cassandra_client.create_all_avg_table(
         self.session, self.keyspace, self.all_avg_table)
     self.session.set_keyspace(self.keyspace)
     self.session.row_factory = dict_factory
def init():
    np.set_printoptions(threshold=sys.maxsize)

    users = pd.read_csv("user_ratedmovies.dat",
                        header=0,
                        delimiter="\t",
                        usecols=['userID', 'movieID', 'rating'])
    movies = pd.read_csv("movie_genres.dat",
                         header=0,
                         delimiter="\t",
                         usecols=['movieID', 'genre'])

    joinedTable = join_tables(users, movies, 'movieID')

    mergedTable, genres = build_dataframe(joinedTable, movies)
    session = client.create_session()
    client.create_keyspace(session, keyspace)
    session.set_keyspace(keyspace)
    session.row_factory = dict_factory

    client.push_data_table(session, keyspace, table, mergedTable)