def __init__(self, min_values=0, threshold=0):
     self.min_values = min_values
     self.threshold = threshold
     self.uim = None
     self.md = MovieData('../data/movies.dat')
     self.movies = self.md.get_movies()
     self.p_movies = None
     self.unique_rated_movies = None
Esempio n. 2
0
class RandomPredictor:
    def __init__(self, min_ocena, max_ocena):
        self.min_ocena = min_ocena
        self.max_ocena = max_ocena
        self.uim = None
        self.md = MovieData('../data/movies.dat').get_movies()

    def fit(self, X):
        self.uim = X

    def predict(self, user_id):
        user = self.uim.get_user(user_id)
        p = {}

        # for key, movie in user.iterrows():
        #     if movie["movieID"] not in p:
        #         p[movie["movieID"]] = movie["rating"]

        for key, movie in self.md.iterrows():
            if movie["id"] not in p:
                p[movie["id"]] = random.randint(self.min_ocena, self.max_ocena)

        return p
Esempio n. 3
0
 def __init__(self):
     self.uim = None
     self.md = MovieData('../data/movies.dat')
     self.movies = self.md.get_movies()
     self.p_movies = None
     self.unique_rated_movies = None
Esempio n. 4
0
class SlopeOnePredictor:
    def __init__(self):
        self.uim = None
        self.md = MovieData('../data/movies.dat')
        self.movies = self.md.get_movies()
        self.p_movies = None
        self.unique_rated_movies = None

    def fit(self, X):
        self.uim = X.get_df()
        self.unique_rated_movies = pd.DataFrame(self.uim["movieID"].unique(),
                                                columns=["movieID"])
        self.unique_rated_movies = self.unique_rated_movies.set_index(
            "movieID", drop=False)
        self.unique_rated_movies["key"] = 1
        cartesian_p = pd.merge(self.unique_rated_movies,
                               self.unique_rated_movies,
                               on="key")[["movieID_x", "movieID_y"]]
        cartesian_p = cartesian_p[
            cartesian_p["movieID_x"] != cartesian_p["movieID_y"]][[
                "movieID_x", "movieID_y"
            ]]
        cartesian_p["similarity_d"] = cartesian_p.apply(
            lambda x: self.similar_d(x["movieID_x"], x["movieID_y"]), axis=1)
        self.p_movies = cartesian_p

    def predict(self, user_id):
        pred = {}

        user_movies = dict(
            self.uim.groupby(self.uim.index)['movieID'].apply(list))

        if user_id not in user_movies.keys():
            return pred

        rating_movies_from_user = pd.DataFrame(
            {'rated_movies': user_movies[user_id]})

        non_rating_movies = self.uim[
            ~self.uim["movieID"].isin(user_movies[user_id])].drop_duplicates(
                subset=["movieID"])

        for key_non_rating, value_non_rating in non_rating_movies.iterrows():
            s = []
            p = []
            for key_rating, value_rating in rating_movies_from_user.iterrows():
                p1 = self.uim.loc[self.uim["movieID"] ==
                                  value_rating["rated_movies"], "rating"]
                calculated_d = self.p_movies.loc[
                    (self.p_movies["movieID_x"] == value_non_rating["movieID"])
                    &
                    (self.p_movies["movieID_y"] == value_rating["rated_movies"]
                     ), "similarity_d"].iat[0]
                s += [(p1[user_id] - calculated_d[0])]
                p += [calculated_d[1]]

            if np.sum(np.array(p)) == 0:
                pred[int(value_non_rating["movieID"])] = 0.0
            else:
                pred[int(value_non_rating["movieID"])] = (
                    (np.sum(np.array(s) * np.array(p))) / np.sum(np.array(p)))

        return pred

    def similar_d(self, p1, p2):
        p_intersection = set(
            self.uim.loc[self.uim["movieID"] == p1]["userID"].unique()
        ).intersection(
            set(self.uim.loc[self.uim["movieID"] == p2]["userID"].unique()))

        all_p = self.uim[self.uim["userID"].isin(p_intersection)]
        x_p1 = (all_p.loc[all_p["movieID"] == p1])
        x_p2 = (all_p.loc[all_p["movieID"] == p2])
        p1 = x_p1[x_p1["userID"].isin(p_intersection)][["movieID", "rating"]]
        p2 = x_p2[x_p2["userID"].isin(p_intersection)][["movieID", "rating"]]

        concated_movies_rating = pd.concat([p1, p2], axis=1)
        concated_movies_rating.columns = [
            "movieID_x", "rating_x", "movieID_y", "rating_y"
        ]
        concated_movies_rating["calculated_d"] = concated_movies_rating.apply(
            lambda x: x["rating_x"] - x["rating_y"], axis=1)

        return tuple((-(concated_movies_rating["calculated_d"].sum() /
                        concated_movies_rating["calculated_d"].count()),
                      concated_movies_rating["calculated_d"].count()))
Esempio n. 5
0
 def __init__(self, min_ocena, max_ocena):
     self.min_ocena = min_ocena
     self.max_ocena = max_ocena
     self.uim = None
     self.md = MovieData('../data/movies.dat').get_movies()
Esempio n. 6
0
from code.UserItemData import UserItemData
from code.MovieData import MovieData
from code.RandomPredictor import RandomPredictor
import pandas as pd

md = MovieData('../data/movies.dat')
uim = UserItemData('../data/user_ratedmovies.dat')
rp = RandomPredictor(1, 5)
rp.fit(uim)
pred = rp.predict(78)
print(type(pred))
items = [1, 3, 20, 50, 100]
for item in items:
    print("Film: {}, ocena: {}".format(md.get_title(item), pred[item]))
class ItemBasedPredictor:
    def __init__(self, min_values=0, threshold=0):
        self.min_values = min_values
        self.threshold = threshold
        self.uim = None
        self.md = MovieData('../data/movies.dat')
        self.movies = self.md.get_movies()
        self.p_movies = None
        self.unique_rated_movies = None

    def fit(self, X):
        self.uim = X.get_df()
        self.unique_rated_movies = pd.DataFrame(self.uim["movieID"].unique(), columns=["movieID"])
        self.unique_rated_movies = self.unique_rated_movies.set_index("movieID", drop=False)
        self.unique_rated_movies["key"] = 1
        cartesian_p = pd.merge(self.unique_rated_movies, self.unique_rated_movies, on="key")[["movieID_x", "movieID_y"]]
        cartesian_p = cartesian_p[cartesian_p["movieID_x"] != cartesian_p["movieID_y"]][["movieID_x", "movieID_y"]]
        cartesian_p["similarity"] = cartesian_p.apply(lambda x: self.similarity(x["movieID_x"], x["movieID_y"]), axis=1)
        self.p_movies = cartesian_p

    def predict(self, user_id):
        pred = {}
        user_movies = dict(self.uim.groupby(self.uim.index)['movieID'].apply(list))

        rating_movies_from_user = pd.DataFrame({'rated_movies': user_movies[user_id]})

        non_rating_movies = self.uim[~self.uim["movieID"].isin(user_movies[user_id])].drop_duplicates(subset=["movieID"])

        for key_non_rating, value_non_rating in non_rating_movies.iterrows():
            s = []
            p = []
            for key_rating, value_rating in rating_movies_from_user.iterrows():
                p1 = self.uim.loc[self.uim["movieID"] == value_rating["rated_movies"], "rating"]
                s += [self.p_movies.loc[(self.p_movies["movieID_x"] == value_non_rating["movieID"]) & (self.p_movies["movieID_y"] == value_rating["rated_movies"]), "similarity"].iat[0]]
                p += [p1[user_id]]

            if np.sum(np.array(s)) == 0:
                pred[int(value_non_rating["movieID"])] = 0.0
            else:
                pred[int(value_non_rating["movieID"])] = ((np.sum(np.array(s) * np.array(p))) / np.sum(np.array(s)))

        return pred

    def similarity(self, p1, p2):
        if p1 == p2:
            return 0.0

        # start = time.time()
        p_intersection = set(self.uim.loc[self.uim["movieID"] == p1]["userID"].unique()).intersection(set(self.uim.loc[self.uim["movieID"] == p2]["userID"].unique()))

        all_p = self.uim[self.uim["userID"].isin(p_intersection)]
        x_p1 = (all_p.loc[all_p["movieID"] == p1])
        x_p2 = (all_p.loc[all_p["movieID"] == p2])
        p1 = x_p1[x_p1["userID"].isin(p_intersection)]["rating"]
        p2 = x_p2[x_p2["userID"].isin(p_intersection)]["rating"]
        avg = (all_p.groupby(all_p.index).mean()["rating"])

        # end = time.time()
        # print(end - start)

        if max(len(p1.keys()), len(p2.keys())) < self.min_values:
            return 0.0

        c = 0
        iml = 0
        imr = 0
        for ocena_1, ocena_2, user_avg in zip(p1, p2, avg):
            c += (ocena_1 - user_avg) * (ocena_2 - user_avg)
            iml += (ocena_1 - user_avg)**2
            imr += (ocena_2 - user_avg)**2

        # end = time.time()
        # print(end - start)

        if c <= 0:
            return 0.0
        result = c / (math.sqrt(iml) * math.sqrt(imr))

        if result < self.threshold:
            return 0.0

        return result

    def mostSimilarFilms(self):
        most_similar = self.p_movies.sort_values(by='similarity', ascending=False).head(n=20)
        for key, value in most_similar.iterrows():
            print("Film1: {}, Film2: {}, podobnost: {}".format(self.md.get_title(value["movieID_x"]),
                                                               self.md.get_title(value["movieID_y"]),
                                                               value["similarity"]))

    def similarItems(self, item, n):
        films = self.uim.groupby("movieID")[["movieID"]].apply(lambda x: self.similarity(item, x.name))
        return sorted(list(dict(films).items()), key=lambda x: x[1], reverse=True)[0:n]
Esempio n. 8
0
from code.MovieData import MovieData
from code.Recommender import Recommender
from code.SlopeOnePredictor import SlopeOnePredictor
from code.UserItemData import UserItemData

# evalvacija priporočilnega sistema
print("------------------------------")
print("Evalvacija priporočilnega sistema")
print("------------------------------")

md = MovieData('../data/movies.dat')
uim = UserItemData('../data/user_ratedmovies.dat', min_ratings=1000, end_date='1.1.2008')
rp = SlopeOnePredictor()
rec = Recommender(rp)
rec.fit(uim)

uim_test = UserItemData('../data/user_ratedmovies.dat', min_ratings=200, start_date='2.1.2008')
mse, mae, precision, recall, f = rec.evaluate(uim_test, 20)
print(mse, mae, precision, recall, f)
Esempio n. 9
0
from code.MovieData import MovieData
import pandas as pd

md = MovieData('../data/movies.dat')
print(md.get_title(1))
Esempio n. 10
0
from code.AveragePredictor import AveragePredictor
from code.ItemBasedPredictor import ItemBasedPredictor
from code.MovieData import MovieData
from code.RandomPredictor import RandomPredictor
from code.Recommender import Recommender
from code.SlopeOnePredictor import SlopeOnePredictor
from code.UserItemData import UserItemData
from code.ViewsPredictor import ViewsPredictor
import pandas as pd
import time

start = time.time()

md = MovieData('../data/movies.dat')
uim = UserItemData('../data/user_ratedmovies.dat')

# Priporočanje z naključnim prediktorjem
print("------------------------------")
print("Priporočanje z naključnim prediktorjem")
print("------------------------------")
rp = RandomPredictor(1, 5)
rec = Recommender(rp)
rec.fit(uim)
rec_items = rec.recommend(78, n=5, rec_seen=False)
for idmovie, val in rec_items:
    print("Film: {}, ocena: {}".format(md.get_title(idmovie), val))

# Priporočanje s povprečnim prediktorjem
print("------------------------------")
print("Priporočanje s povprečnim prediktorjem")
print("------------------------------")
Esempio n. 11
0
 def __init__(self, b):
     self.b = b
     self.uim = None
     self.md = MovieData('../data/movies.dat').get_movies()
     self.fitted = {}
 def __init__(self, predictor):
     self.predictor = predictor
     self.uim = None
     self.md = MovieData('../data/movies.dat').get_movies()