Beispiel #1
0
class duckSVD(AlgoBase):
    def __init__(self):
        """
        Le SVD avec un top1 à la fin
        """
        AlgoBase.__init__(self)
        self.SVD = SVD()

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)
        self.SVD.fit(trainset)
        return self

    def preprocess(self, test_set):

        self.predicted = dict()
        possible_prediction = defaultdict(list)
        for u, i, _ in test_set:
            possible_prediction[u].append((i, self.SVD.estimate(u,i)))
        for u in possible_prediction:
            max_sim = -1
            for el in possible_prediction[u]:
                if float(el[1])>max_sim:
                    max_sim = el[1]
                    self.predicted[int(u)]=int(el[0])


    def estimate(self, u, i):
        return -1
Beispiel #2
0
class GlobalProportionAlgo(AlgoBase):
    def __init__(self, cat_products, cat_target):
        """
        Cette méthode consiste à recommander peu à peu des objets en prenant à chaque fois l'objet avec la meilleure similarité
        dans la catégorie des objets qui est le plus loin de sa valeur cible en proportion parmi les résultat déjà obtenus
        """

        AlgoBase.__init__(self)
        # Le modèle qui nous donne les \hat{r}_ij.
        self.SVD = SVD()

        # Les informations pour la partnership
        self.cat_products = cat_products
        self.cat_target = cat_target

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)
        self.SVD.fit(trainset)
        return self


    def preprocess(self, test_set):
        C = len(self.cat_target)
        self.predicted = dict()
        heaps = [[] for _ in range(C)]
        n = 0
        current_prop = np.zeros(C)
        # We use C heaps to gather the similarities
        for u, i, _ in test_set:
            heapq.heappush(heaps[self.cat_products[i]],(self.SVD.estimate(u,i),u,i))
        while 1:
            if n == 0:
                selected_category = np.argmax(np.array([heap==[] for heap in heaps]))
            else:
                status = current_prop - self.cat_target*(n+1)
                status = np.abs(np.clip(status,a_min = None, a_max = 0))
                for c in range(C):
                    if heaps[c]==[]:
                        status[c]=-1
                selected_category = np.argmax(status)

            continu = True
            while heaps[selected_category]!=[] and continu:
                est, u, i = heapq.heappop(heaps[selected_category])
                if not (int(u) in self.predicted):
                    self.predicted[int(u)]=int(i)
                    current_prop[selected_category]+=1
                    n+=1
                    continu = False
            if heaps == [[] for _ in range(C)]:
                return


    def estimate(self, u, i):
        return -1
Beispiel #3
0
class PerUserAlgo(AlgoBase):
    def __init__(self, cat_products, cat_target):
        """
        Cette fonction décide de manière aléatoire pondéré par la fréquence cible
        pour chaque utilisateur dans quelle catégorie tirer les résultats.
        """
        AlgoBase.__init__(self)

        # Le modèle qui nous donne les \hat{r}_ij.
        self.SVD = SVD()

        # Les informations pour la partnership
        self.cat_products = cat_products
        self.cat_target = cat_target

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)
        self.SVD.fit(trainset)
        return self

    def preprocess(self, test_set):
        self.predicted = dict()
        possible_prediction = defaultdict(list)
        for u, i, _ in test_set:
            possible_prediction[u].append((i, self.SVD.estimate(u,i),self.cat_products[i]))

        for u in possible_prediction:
            custom_target = np.zeros(len(self.cat_target))
            for i in range(len(self.cat_target)):
                if [el for el in possible_prediction[u] if el[2]==i]!=[]:
                    custom_target[i] = self.cat_target[i]
            custom_target/=np.sum(custom_target)
            selected_category = np.random.choice(np.arange(0, len(self.cat_target)), p=custom_target)
            selected_possible = [el for el in possible_prediction[u] if el[2]==selected_category]

            max_sim = -1

            for el in selected_possible:
                if el[1]>max_sim:
                    max_sim = el[1]
                    self.predicted[int(u)] = int(el[0])



    def estimate(self, u, i):
        return -1
class RecommenderSVDSimilarUsers(Recommender):
    """ 
        Instead of building new dataset when the new user is in, we get similar users,
        and based on that try to get similar movies
    """
    def __init__(self, movies):
        super(RecommenderSVDSimilarUsers, self).__init__(movies)
        self.algorithm = SVD()

    def fit(self, dataset):
        return self.algorithm.fit(dataset)

    def test(self, test_set):
        return self.algorithm.test(test_set)

    def get_recommendation(self, watched, k=20, k_inner_item=5):
        # get dataset
        full_dataset = self.algorithm.trainset

        # watched movies
        watched = {
            full_dataset.to_inner_iid(key): value
            for key, value in watched.items()
        }

        # get similar users
        similar_users = self.get_similar_user_ids(watched, k=k_inner_item)

        # Calculate for all similar user, predictions
        candidates = defaultdict(float)
        for inner_move_id in range(0, full_dataset.n_items):
            if inner_move_id not in watched:
                movie_id = full_dataset.to_raw_iid(inner_move_id)
                for inner_user_id, similarity in similar_users.items():
                    prediction = self.algorithm.estimate(
                        inner_user_id, inner_move_id)
                    candidates[movie_id] += similarity * prediction

        # heapq.nlargest(k, candidates.items(), key=itemgetter(1))
        return self.movies.get_movie_by_movie_ids(
            heapq.nlargest(k, candidates, key=candidates.get))
Beispiel #5
0
lf = LoadFoods()
data = lf.loadFoodData()
trainset = data.build_full_trainset()

np.random.seed(0)
random.seed(0)

print('training the model ....')
SVD = SVD()
SVD.fit(trainset)

test_user = '******'
k = 10
test_user_innerID = trainset.to_inner_uid(test_user)

userNRFID = getAntiUserFoodID(test_user_innerID)

pred_ratings = {}
for foodID in userNRFID:
    pred = SVD.estimate(test_user_innerID,foodID)
    foodName = lf.getFoodName(foodID)
    pred_ratings[foodName] = pred

top_k_predictions = heapq.nlargest(k,pred_ratings,key = lambda x: pred_ratings[x])

print('now predicting ....')
for prediction in top_k_predictions:
    print(prediction)
    

Beispiel #6
0
class MovieLens(gym.Env):
    def __init__(self,
                 embedding_dimension=20,
                 n_items_to_recommend=4,
                 seed=0,
                 n_users=40,
                 n_items=500,
                 normalize_reward=False):
        """
        Environment that models some sequential recommendation process by using MovieLens Dataset
        PMF (Probabilistic Matrix Factorization) is performed to obtain user/item embeddings

        :param embedding_dimension: size of the user/item embeddings
        :param n_items_to_recommend:  number of items to recommend actions is a list of that size
        :param seed:
        :param n_users: number of users
        :param n_items: number of items
        :param normalize_reward: normalize [1,5] ranks to [-1,1] rewards
        """
        self.normalize_reward = normalize_reward
        self.embedding_dimension = embedding_dimension
        self.n_rec = n_items_to_recommend
        self.seed(seed)
        # Load the movielens-100k dataset (download it if needed),
        data = Dataset.load_builtin('ml-100k')

        # sample random trainset and testset
        # test set is made of 25% of the ratings.
        self.trainset, self.testset = train_test_split(data, test_size=.25)

        self.algo = SVD(n_factors=self.embedding_dimension, biased=False)
        self.algo.fit(self.trainset)

        self.users = self.algo.pu[:n_users]
        self.items = self.algo.qi[:n_items]

        self.n_users = len(self.users)
        self.n_items = len(self.items)

        if self.n_users < n_users:
            warnings.warn("Only %d users are available in dataset" %
                          self.n_users)
        if self.n_items < n_items:
            warnings.warn("Only %d items are available in dataset" %
                          self.n_items)

        self.Users = {}
        for i in range(self.n_users):
            user = User(id=i, embedding=self.users[i])
            self.Users[user.id] = user

        self.Items = {}
        for j in range(self.n_items):
            item = Item(id=j, embedding=self.items[j], use_until=np.inf)
            self.Items[item.id] = item

        self.active_uid = self.np_random.choice(range(self.n_users))
        self.bought_items = defaultdict(set)
        # logs
        self.steps_count = 0
        self.info = {}

        # TODO: make action and observation space. checkout robotics envs + FlattenDictWrapper
        # https://github.com/openai/gym/tree/5404b39d06f72012f562ec41f60734bd4b5ceb4b/gym/envs/robotics
        self.action_space = None
        self.observation_space = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _get_observation(self):
        """
        Get items available for recommendation for `self.active_uid` user
        i.e. the items that user didn't interacted with (didn't receive as a recommendation)

        :return: (user_repr, possible_items):
                user_repr - User
                possible_items - list of Items
        """
        pos = 0
        self.item_pos2id = {}
        possible_items = []

        for i in set(range(self.n_items)) - self.bought_items[self.active_uid]:
            possible_items.append(self.Items[i])
            self.item_pos2id[pos] = i
            pos += 1

        self.action_space = NDiscreteTuple(Discrete(len(possible_items)),
                                           self.n_rec)
        self.observation_space = None
        return self.Users[self.active_uid], possible_items

    def _reward(self, action):
        """
        Compute reward as scalar product of user and item embeddings obtained by PMF
        Normalize if `self.normalize_reward` is True

        :param action: array of indexes of size `self.n_rec` in possible items
        :return:
        """
        assert len(action) == self.n_rec
        uid = self.active_uid
        rewards = []
        iids = []
        for a in action:
            iid = self.item_pos2id[a]
            r = self.algo.estimate(u=uid, i=iid)
            if self.normalize_reward:
                r = 0.5 * (r - 3)
            rewards.append(r)
            self.bought_items[uid].add(iid)
            iids.append(iid)

        self.info = {
            'rewards': rewards,
            'recs': iids,
        }

        return np.sum(rewards)

    def _evolve(self):
        """
        Choose next active user at random uniformly between users who have possible items to recommend
        :return:
        """
        users_to_play = []
        for i in range(self.n_users):
            if len(self.bought_items[i]) < (self.n_items - self.n_rec + 1):
                users_to_play.append(i)

        if len(users_to_play) == 0:
            for i in range(self.n_users):
                print(len(self.bought_items[i]))
        self.active_uid = self.np_random.choice(users_to_play)

    def step(self, action):
        """

        :param action: array of indexes of size `self.n_rec` in possible items
        :return: observation: (user_repr, possible_items)
                 reward:  sum of scores for each item in the action
                 done:  always False
                 info:
        """
        self.steps_count += 1
        self.info = {}
        rewards = self._reward(action)

        reward = rewards
        self._evolve()
        observation = self._get_observation()
        done = None
        info = self.info
        return observation, reward, done, info

    def reset(self):
        """
        :return: initial observation
        """
        observation = self._get_observation()
        return observation

    def render(self, mode='human'):
        pass