예제 #1
0
class TopN(BaseEstimator):
    def __init__(self, n=5):
        print 'topN begin'
        self.n = n

    def gen_items_popular(self, trainSamples, trainTargets, hasTimes=False):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        itempopular = np.zeros(self.dataModel.getItemsNum())
        uids = self.dataModel.getData().nonzero()[0]
        iids = self.dataModel.getData().nonzero()[1]
        for i in range(len(iids)):
            iid = iids[i]
            itempopular[iid] += 1
        self.popItems = itempopular

    def predict(self, testSamples):
        recommend_lists = []
        for user_item in testSamples:
            if self.dataModel.getIidByItem(user_item[1]) in self.topN[:self.n]:
                recommend_lists.append(1)
            else:
                recommend_lists.append(0)
        return recommend_lists

    def fit(self, trainSamples, trainTargets):
        #print trainSamples, trainTargets
        #print len(trainSamples), len(trainTargets)
        self.gen_items_popular(trainSamples, trainTargets)
        self.topN = np.argsort(np.array(self.popItems))[-1::-1]
        return self

    def recommend(self, uid):
        return [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]]

    def score(self, testSamples, trueLabels):
        #print testSamples
        #print len(testSamples)
        trueList = []
        recommendList = []

        user_unique = list(set(np.array(testSamples)[:, 0]))
        for u in user_unique:
            uTrueIndex = np.argwhere(np.array(testSamples)[:, 0] == u)[:, 0]
            #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])]
            true = list(np.array(testSamples)[uTrueIndex][:, 1])
            trueList.append(true)
            pre = [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]]
            recommendList.append(pre)
        e = Eval()
        result = e.evalAll(recommendList, trueList)
        print 'TopN result:' + '(' + str(self.get_params()) + ')' + str(
            (result)['F1'])
        return (result)['F1']
예제 #2
0
class TopN(BaseEstimator):

    def __init__(self, n=5):
        print 'topN begin'
        self.n = n

    def gen_items_popular(self, trainSamples, trainTargets, hasTimes=False):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        itempopular = np.zeros(self.dataModel.getItemsNum())
        uids = self.dataModel.getData().nonzero()[0]
        iids = self.dataModel.getData().nonzero()[1]
        for i in range(len(iids)):
            iid = iids[i]
            itempopular[iid] += 1
        self.popItems = itempopular

    def predict(self, testSamples):
        recommend_lists = []
        for user_item in testSamples:
            if self.dataModel.getIidByItem(user_item[1]) in self.topN[:self.n]:
                recommend_lists.append(1)
            else:
                recommend_lists.append(0)
        return recommend_lists

    def fit(self, trainSamples, trainTargets):
        #print trainSamples, trainTargets
        #print len(trainSamples), len(trainTargets)
        self.gen_items_popular(trainSamples, trainTargets)
        self.topN = np.argsort(np.array(self.popItems))[-1::-1]
        return self

    def recommend(self, uid):
        return [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]]
    def score(self, testSamples, trueLabels):
        #print testSamples
        #print len(testSamples)
        trueList = []
        recommendList= []

        user_unique = list(set(np.array(testSamples)[:,0]))
        for u in user_unique:
            uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0]
            #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])]
            true = list(np.array(testSamples)[uTrueIndex][:,1])
            trueList.append(true)
            pre = [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]]
            recommendList.append(pre)
        e = Eval()
        result = e.evalAll(recommendList, trueList)
        print 'TopN result:'+'('+str(self.get_params())+')'+str((result)['F1'])
        return (result)['F1']
예제 #3
0
class LFM(BaseEstimator):
    def __init__(self, n=5, factors=25, learningrate=0.05, userregular=0.0001, itemregular=0.0001, iter = 10):
        print 'lfm begin'
        self.factors = factors
        self.n = n
        self.learningrate = learningrate
        self.userregular = userregular
        self.itemregular = itemregular
        self.iter = iter

    def predict(self, testSamples):
        recList = []
        for user_item in testSamples:
            uid = self.dataModel.getUidByUser(user_item[0])
            recList.append(self.recommend(uid))
        return recList

    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        self.mu = np.array(trainTargets).mean()
        self.bu = np.zeros(self.dataModel.getUsersNum())
        self.bi = np.zeros(self.dataModel.getItemsNum())
        temp = math.sqrt(self.factors)
        self.qi = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getItemsNum())]
        self.pu = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getUsersNum())]
        lineData = self.dataModel.getLineData()
        lengthOfTrain = len(lineData)

        for step in range(self.iter):
            rmse_sum = 0.0
            hash = np.random.permutation(lengthOfTrain)
            for j in range(lengthOfTrain):
                n = hash[j]
                row = lineData[n]
                uid = self.dataModel.getUidByUser(row[0])
                iid = self.dataModel.getIidByItem(row[1])
                rating = row[2]
                #rating = 1
                eui = rating - self.predict_single(uid, iid)
                rmse_sum += eui**2
                self.bu[uid] += self.learningrate*(eui-self.userregular*self.bu[uid])
                self.bi[iid] += self.learningrate*(eui-self.itemregular*self.bi[iid])
                temp = self.qi[iid]
                self.qi[iid] += self.learningrate*(np.dot(eui, self.pu[uid]) - np.dot(self.itemregular, self.qi[iid]))
                self.pu[uid] += self.learningrate*(np.dot(eui, temp) - np.dot(self.userregular, self.pu[uid]))
            self.learningrate = self.learningrate * 0.93

    def predict_single(self, uid, iid):
        ans = self.mu + self.bi[iid] + self.bu[uid] + np.dot(self.qi[iid], self.pu[uid])
        if ans > 5:
            return 5
        elif ans < 1:
            return 1
        return ans
    def recommend(self, u):
        uid = self.dataModel.getUidByUser(u)
        if uid == -1:
            print 'not in test'
            return []
        else:
            predict_scores = []
            for i in range(self.dataModel.getItemsNum()):
                predict_scores.append(self.predict_single(uid, i))
            topN = np.argsort(np.array(predict_scores))[-1:-self.n-1:-1]
            return [self.dataModel.getItemByIid(i) for i in topN]
    def score(self, testSamples, trueLabels):
        print 'LFM scoring ...'
        trueList = []
        recommendList= []
        user_unique = list(set(np.array(testSamples)[:,0]))
        for u in user_unique:
            uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0]
            #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])]
            true = list(np.array(testSamples)[uTrueIndex][:,1])
            trueList.append(true)
            pre = self.recommend(u)
            recommendList.append(pre)
        e = Eval()
        result = e.evalAll(recommendList, trueList)
        print 'LFM result:'+ '('+str(self.get_params())+')'+str((result)['F1'])
        return (result)['F1']
예제 #4
0
class LFM(BaseEstimator):
    def __init__(self,
                 n=5,
                 factors=25,
                 learningrate=0.05,
                 userregular=0.0001,
                 itemregular=0.0001,
                 iter=10):
        print 'lfm begin'
        self.factors = factors
        self.n = n
        self.learningrate = learningrate
        self.userregular = userregular
        self.itemregular = itemregular
        self.iter = iter

    def predict(self, testSamples):
        recList = []
        for user_item in testSamples:
            uid = self.dataModel.getUidByUser(user_item[0])
            recList.append(self.recommend(uid))
        return recList

    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        self.mu = np.array(trainTargets).mean()
        self.bu = np.zeros(self.dataModel.getUsersNum())
        self.bi = np.zeros(self.dataModel.getItemsNum())
        temp = math.sqrt(self.factors)
        self.qi = [[(0.1 * random.random() / temp)
                    for j in range(self.factors)]
                   for i in range(self.dataModel.getItemsNum())]
        self.pu = [[(0.1 * random.random() / temp)
                    for j in range(self.factors)]
                   for i in range(self.dataModel.getUsersNum())]
        lineData = self.dataModel.getLineData()
        lengthOfTrain = len(lineData)

        for step in range(self.iter):
            rmse_sum = 0.0
            hash = np.random.permutation(lengthOfTrain)
            for j in range(lengthOfTrain):
                n = hash[j]
                row = lineData[n]
                uid = self.dataModel.getUidByUser(row[0])
                iid = self.dataModel.getIidByItem(row[1])
                rating = row[2]
                #rating = 1
                eui = rating - self.predict_single(uid, iid)
                rmse_sum += eui**2
                self.bu[uid] += self.learningrate * (
                    eui - self.userregular * self.bu[uid])
                self.bi[iid] += self.learningrate * (
                    eui - self.itemregular * self.bi[iid])
                temp = self.qi[iid]
                self.qi[iid] += self.learningrate * (
                    np.dot(eui, self.pu[uid]) -
                    np.dot(self.itemregular, self.qi[iid]))
                self.pu[uid] += self.learningrate * (
                    np.dot(eui, temp) - np.dot(self.userregular, self.pu[uid]))
            self.learningrate = self.learningrate * 0.93

    def predict_single(self, uid, iid):
        ans = self.mu + self.bi[iid] + self.bu[uid] + np.dot(
            self.qi[iid], self.pu[uid])
        if ans > 5:
            return 5
        elif ans < 1:
            return 1
        return ans

    def recommend(self, u):
        uid = self.dataModel.getUidByUser(u)
        if uid == -1:
            print 'not in test'
            return []
        else:
            predict_scores = []
            for i in range(self.dataModel.getItemsNum()):
                predict_scores.append(self.predict_single(uid, i))
            topN = np.argsort(np.array(predict_scores))[-1:-self.n - 1:-1]
            return [self.dataModel.getItemByIid(i) for i in topN]

    def score(self, testSamples, trueLabels):
        print 'LFM scoring ...'
        trueList = []
        recommendList = []
        user_unique = list(set(np.array(testSamples)[:, 0]))
        for u in user_unique:
            uTrueIndex = np.argwhere(np.array(testSamples)[:, 0] == u)[:, 0]
            #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])]
            true = list(np.array(testSamples)[uTrueIndex][:, 1])
            trueList.append(true)
            pre = self.recommend(u)
            recommendList.append(pre)
        e = Eval()
        result = e.evalAll(recommendList, trueList)
        print 'LFM result:' + '(' + str(self.get_params()) + ')' + str(
            (result)['F1'])
        return (result)['F1']