Exemple #1
0
class SocialReg(MF):
    """
    docstring for SocialReg

    Ma H, Zhou D, Liu C, et al. Recommender systems with social regularization[C]//Proceedings of the fourth ACM international conference on Web search and data mining. ACM, 2011: 287-296.
    """
    def __init__(self):
        super(SocialReg, self).__init__()
        # self.config.lambdaP = 0.001
        # self.config.lambdaQ = 0.001
        self.config.alpha = 0.1
        self.tg = TrustGetter()
        # self.init_model()

    def init_model(self, k):
        super(SocialReg, self).init_model(k)
        from collections import defaultdict
        self.user_sim = SimMatrix()
        print('constructing user-user similarity matrix...')

        # self.user_sim = util.load_data('../data/sim/ft_cf_soreg08_cv1.pkl')

        for u in self.rg.user:
            for f in self.tg.get_followees(u):
                if self.user_sim.contains(u, f):
                    continue
                sim = self.get_sim(u, f)
                self.user_sim.set(u, f, sim)

        # util.save_data(self.user_sim,'../data/sim/ft_cf_soreg08.pkl')

    def get_sim(self, u, k):
        sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) +
               1.0) / 2.0  # fit the value into range [0.0,1.0]
        return sim

    def train_model(self, k):
        super(SocialReg, self).train_model(k)
        iteration = 0
        while iteration < self.config.maxIter:
            self.loss = 0
            for index, line in enumerate(self.rg.trainSet()):
                user, item, rating = line
                u = self.rg.user[user]
                i = self.rg.item[item]
                error = rating - self.predict(user, item)
                self.loss += 0.5 * error**2
                p, q = self.P[u], self.Q[i]

                social_term_p, social_term_loss = np.zeros(
                    (self.config.factor)), 0.0
                followees = self.tg.get_followees(user)
                for followee in followees:
                    if self.rg.containsUser(followee):
                        s = self.user_sim[user][followee]
                        uf = self.P[self.rg.user[followee]]
                        social_term_p += s * (p - uf)
                        social_term_loss += s * ((p - uf).dot(p - uf))

                social_term_m = np.zeros((self.config.factor))
                followers = self.tg.get_followers(user)
                for follower in followers:
                    if self.rg.containsUser(follower):
                        s = self.user_sim[user][follower]
                        ug = self.P[self.rg.user[follower]]
                        social_term_m += s * (p - ug)

                # update latent vectors
                self.P[u] += self.config.lr * (
                    error * q - self.config.alpha *
                    (social_term_p + social_term_m) - self.config.lambdaP * p)
                self.Q[i] += self.config.lr * (error * p -
                                               self.config.lambdaQ * q)

                self.loss += 0.5 * self.config.alpha * social_term_loss

            self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum(
            ) + 0.5 * self.config.lambdaQ * (self.Q * self.Q).sum()

            iteration += 1
            if self.isConverged(iteration):
                break
Exemple #2
0
class TrustSVD(MF):
    """
    docstring for TrustSVD
    implement the TrustSVD

    Koren Y. Factor in the neighbors: Scalable and accurate collaborative filtering[J]. ACM Transactions on Knowledge Discovery from Data (TKDD), 2010, 4(1): 1.
    """
    def __init__(self):
        super(TrustSVD, self).__init__()

        self.config.lr = 0.005
        self.config.maxIter = 100
        self.config.lambdaP = 1.2
        self.config.lambdaQ = 1.2

        self.config.lambdaB = 1.2
        self.config.lambdaY = 1.2
        self.config.lambdaW = 1.2
        self.config.lambdaT = 0.9

        self.tg = TrustGetter()
        self.init_model()

    def init_model(self):
        super(TrustSVD, self).init_model()
        self.Bu = np.random.rand(self.rg.get_train_size()[0]) / (
            self.config.factor**0.5)  # bias value of user
        self.Bi = np.random.rand(self.rg.get_train_size()[1]) / (
            self.config.factor**0.5)  # bias value of item
        self.Y = np.random.rand(self.rg.get_train_size()[1],
                                self.config.factor) / (self.config.factor**0.5
                                                       )  # implicit preference
        self.W = np.random.rand(self.rg.get_train_size()[0],
                                self.config.factor) / (self.config.factor**0.5
                                                       )  # implicit preference

    def train_model(self):
        iteration = 0
        while iteration < self.config.maxIter:
            self.loss = 0
            for index, line in enumerate(self.rg.trainSet()):
                user, item, rating = line
                u = self.rg.user[user]
                i = self.rg.item[item]
                error = rating - self.predict(user, item)
                self.loss += error**2

                p, q = self.P[u], self.Q[i]
                nu, sum_y = self.get_sum_y(user)
                nv, sum_w = self.get_sum_w(user)

                frac = lambda x: 1.0 / math.sqrt(x)

                # update latent vectors
                self.Bu[u] += self.config.lr * (
                    error - self.config.lambdaB * frac(nu) * self.Bu[u])
                self.Bi[i] += self.config.lr * (
                    error - self.config.lambdaB * frac(nv) * self.Bi[i])

                self.Q[i] += self.config.lr * (
                    error *
                    (p + sum_y + sum_w) - self.config.lambdaQ * frac(nu) * q)

                followees = self.tg.get_followees(user)
                ws = np.zeros(self.config.factor)
                for followee in followees:
                    if self.rg.containsUser(user) and self.rg.containsUser(
                            followee):
                        nw = len(self.tg.get_followers(followee))
                        vid = self.rg.user[followee]
                        w = self.W[vid]
                        weight = 1  # followees[followee]
                        err = w.dot(p) - weight
                        self.loss += err**2
                        ws += err * w
                        self.W[vid] += self.config.lr * (
                            err * frac(nv) * q - self.config.lambdaT * err * p
                            - self.config.lambdaW * frac(nw) * w)  # 更新w
                self.P[u] += self.config.lr * (
                    error * q - self.config.lambdaT * ws -
                    (self.config.lambdaP * frac(nu) +
                     self.config.lambdaT * frac(nv)) * p)

                u_items = self.rg.user_rated_items(u)  # 更新y
                for j in u_items:
                    idj = self.rg.item[j]
                    self.Y[idj] += self.config.lr * (
                        error * frac(nu) * q -
                        self.config.lambdaY * frac(nv) * self.Y[idj])

            self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \
                         + self.config.lambdaB * (
            (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaY * (
            self.Y * self.Y).sum() + self.config.lambdaW * (self.W * self.W).sum()
            iteration += 1
            if self.isConverged(iteration):
                break

    def predict(self, u, i):
        if self.rg.containsUser(u) and self.rg.containsItem(i):
            _, sum_y = self.get_sum_y(u)
            _, sum_w = self.get_sum_w(u)
            u = self.rg.user[u]
            i = self.rg.item[i]
            return self.Q[i].dot(
                self.P[u] + sum_y +
                sum_w) + self.rg.globalMean + self.Bi[i] + self.Bu[u]
        else:
            return self.rg.globalMean

    def get_sum_y(self, u):
        u_items = self.rg.user_rated_items(u)
        nu = len(u_items)
        sum_y = np.zeros(self.config.factor)
        for j in u_items:
            sum_y += self.Y[self.rg.item[j]]
        sum_y /= (np.sqrt(nu))
        return nu, sum_y

    def get_sum_w(self, u):
        followees = self.tg.get_followees(u)
        nu = 1
        sum_w = np.zeros(self.config.factor)
        for v in followees.keys():
            if self.rg.containsUser(v):
                nu += 1
                sum_w += self.W[self.rg.user[v]]
        sum_w /= np.sqrt(nu)
        return nu, sum_w
Exemple #3
0
class RSTE(MF):
    """
    docstring for RSTE

    Ma H, King I, Lyu M R. Learning to recommend with social trust ensemble[C]//Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval. ACM, 2009: 203-210.

    """
    def __init__(self):
        super(RSTE, self).__init__()
        # self.maxIter=700
        self.config.alpha = 0.5
        # self.config.lambdaH=0.01
        self.tg = TrustGetter()
        # self.init_model()

    def init_model(self, k):
        super(RSTE, self).init_model(k)

    # from collections import defaultdict
    # self.Sim = defaultdict(dict)
    # print('constructing similarity matrix...')
    # for user in self.rg.user:
    # 	for k in self.tg.get_followees(user):
    # 		if user in self.Sim and k in self.Sim[user]:
    # 			pass
    # 		else:
    # 			self.Sim[user][k]=self.get_sim(user,k)

    def train_model(self, k):
        super(RSTE, self).train_model(k)
        iteration = 0
        while iteration < self.config.maxIter:
            self.loss = 0
            for index, line in enumerate(self.rg.trainSet()):
                user, item, rating = line

                error = rating - self.predict(user, item)
                self.loss += error**2
                social_term, _ = self.get_social_term_Q(user, item)

                u = self.rg.user[user]
                i = self.rg.item[item]
                p, q = self.P[u], self.Q[i]

                # update latent vectors

                self.P[u] += self.config.lr * (self.config.alpha * error * q + \
                                               (1 - self.config.alpha) * self.get_social_term_P(user,
                                                                                                item) - self.config.lambdaP * p)

                self.Q[i] += self.config.lr * (error * (self.config.alpha * p + (1 - self.config.alpha) * social_term) \
                                               - self.config.lambdaQ * q)

            self.loss += self.config.lambdaP * (self.P * self.P).sum(
            ) + self.config.lambdaQ * (self.Q * self.Q).sum()

            iteration += 1
            if self.isConverged(iteration):
                break

    def get_social_term_Q(self, user, item):
        if self.rg.containsUser(user) and self.rg.containsItem(item):
            i = self.rg.item[item]
            u = self.rg.user[user]
            social_term_loss = 0
            social_term = np.zeros(self.config.factor)
            followees = self.tg.get_followees(user)
            weights = []
            indexes = []
            for followee in followees:
                if self.rg.containsUser(followee):  # followee is in rating set
                    indexes.append(self.rg.user[followee])
                    weights.append(followees[followee])
            weights = np.array(weights)
            qw = weights.sum()
            indexes = np.array(indexes)
            if qw != 0:
                social_term = weights.dot(self.P[indexes])
                social_term /= qw
                social_term_loss += weights.dot(
                    (self.P[indexes].dot(self.Q[i]))) / qw
            return social_term, social_term_loss

    def get_social_term_P(self, user, item):
        i = self.rg.item[item]
        # social_term_loss = 0
        social_term = np.zeros(self.config.factor)

        followers = self.tg.get_followers(user)
        weights = []
        indexes = []
        errs = []
        for follower in followers:
            if self.rg.containsUser(follower) and self.rg.containsItem(
                    item) and self.rg.containsUserItem(
                        follower, item):  # followee is in rating set
                indexes.append(self.rg.user[follower])
                weights.append(followers[follower])
                errs.append(self.rg.trainSet_u[follower][item] -
                            self.predict(follower, item))
        weights = np.array(weights)
        indexes = np.array(indexes)
        errs = np.array(errs)
        qw = weights.sum()
        if qw != 0:
            for es in errs * weights:
                social_term += es * self.Q[i]
            social_term /= qw
        # social_term_loss += weights.dot((self.P[indexes].dot(self.Q[i])))
        return social_term

    def predict(self, u, i):
        if self.rg.containsUser(u) and self.rg.containsItem(i):
            _, social_term_loss = self.get_social_term_Q(u, i)
            i = self.rg.item[i]
            u = self.rg.user[u]

            if social_term_loss != 0:
                return self.config.alpha * self.P[u].dot(
                    self.Q[i]) + (1 - self.config.alpha) * social_term_loss
            else:
                return self.P[u].dot(self.Q[i])
        else:
            return self.rg.globalMean
class SocialRec(MF):
    """
    docstring for SocialRec

    Ma H, Yang H, Lyu M R, et al. Sorec: social recommendation using probabilistic matrix factorization[C]//Proceedings of the 17th ACM conference on Information and knowledge management. ACM, 2008: 931-940.

    """
    def __init__(self):
        super(SocialRec, self).__init__()
        # self.config.lr=0.0001
        self.config.alpha = 0.1
        self.config.lambdaZ = 0.01
        self.tg = TrustGetter()
        # self.init_model()

    def init_model(self, k):
        super(SocialRec, self).init_model(k)
        self.Z = np.random.rand(
            self.rg.get_train_size()[0], self.config.factor) / (
                self.config.factor**0.5)  # latent user social matrix

    def train_model(self, k):
        super(SocialRec, self).train_model(k)
        iteration = 0
        while iteration < self.config.maxIter:
            # tempP=np.zeros((self.rg.get_train_size()[0], self.config.factor))
            self.loss = 0
            for index, line in enumerate(self.rg.trainSet()):
                user, item, rating = line
                u = self.rg.user[user]
                i = self.rg.item[item]
                error = rating - self.predict(user, item)
                self.loss += error**2
                p, q = self.P[u], self.Q[i]

                followees = self.tg.get_followees(user)
                zs = np.zeros(self.config.factor)
                for followee in followees:
                    if self.rg.containsUser(user) and self.rg.containsUser(
                            followee):
                        vminus = len(
                            self.tg.get_followers(followee))  # ~ d - (k)
                        uplus = len(self.tg.get_followees(user))  # ~ d + (i)
                        import math
                        try:
                            weight = math.sqrt(vminus / (uplus + vminus + 0.0))
                        except ZeroDivisionError:
                            weight = 1
                        zid = self.rg.user[followee]
                        z = self.Z[zid]
                        err = weight - z.dot(p)
                        self.loss += err**2
                        zs += -1.0 * err * p
                        self.Z[zid] += self.config.lr * (
                            self.config.alpha * err * p -
                            self.config.lambdaZ * z)

                self.P[u] += self.config.lr * (error * q - self.config.alpha *
                                               zs - self.config.lambdaP * p)
                self.Q[i] += self.config.lr * (error * p -
                                               self.config.lambdaQ * q)

            self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \
                         + self.config.lambdaZ * (self.Z * self.Z).sum()

            iteration += 1
            if self.isConverged(iteration):
                break
class SocialMF(MF):
    """
    docstring for SocialMF

    Jamali M, Ester M. A matrix factorization technique with trust propagation for recommendation in social networks[C]//Proceedings of the fourth ACM conference on Recommender systems. ACM, 2010: 135-142.
    """
    def __init__(self):
        super(SocialMF, self).__init__()
        # self.config.lr=0.0001
        self.config.alpha = 1  # 0.8 rmse=0.87605
        self.tg = TrustGetter()  # loading trust data
        self.init_model()

    def train_model(self):
        iteration = 0
        while iteration < self.config.maxIter:
            self.loss = 0
            for index, line in enumerate(self.rg.trainSet()):
                user, item, rating = line
                u = self.rg.user[user]
                i = self.rg.item[item]
                error = rating - self.predict(user, item)
                self.loss += error**2
                p, q = self.P[u], self.Q[i]

                total_weight = 0.0
                social_term = np.zeros(self.config.factor)
                followees = self.tg.get_followees(user)  # 获得u所关注的用户列表
                for followee in followees:
                    weight = followees[followee]
                    if self.rg.containsUser(followee):
                        uk = self.P[self.rg.user[followee]]
                        social_term += weight * uk
                        total_weight += weight

                if total_weight != 0:
                    social_term = p - social_term / total_weight

                social_term_a = np.zeros(self.config.factor)
                total_count = 0
                followers = self.tg.get_followers(user)
                for follower in followers:
                    if self.rg.containsUser(follower):
                        total_count += 1
                        uv = self.P[self.rg.user[follower]]
                        social_term_m = np.zeros(self.config.factor)
                        total_weight = 0.0
                        followees = self.tg.get_followees(
                            follower)  # 获得u所关注的用户列表
                        for followee in followees:
                            weight = followees[followee]
                            if self.rg.containsUser(followee):
                                uw = self.P[self.rg.user[followee]]
                                social_term_m += weight * uw
                                total_weight += weight
                        if total_weight != 0:
                            social_term_a += uv - social_term_m / total_weight
                if total_count != 0:
                    social_term_a /= total_count

                # update latent vectors
                self.P[u] += self.config.lr * (
                    error * q - self.config.alpha * social_term +
                    self.config.alpha * social_term_a - self.config.lambdaP * p
                )  #
                self.Q[i] += self.config.lr * (error * p -
                                               self.config.lambdaQ * q)

                self.loss += self.config.alpha * social_term.dot(
                    social_term).sum()

            self.loss += self.config.lambdaP * (self.P * self.P).sum(
            ) + self.config.lambdaQ * (self.Q * self.Q).sum()

            iteration += 1
            if self.isConverged(iteration):
                break