Ejemplo n.º 1
0
 def gen_items_popular(self, trainSamples, trainTargets, hasTimes=False):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     itempopular = np.zeros(self.dataModel.getItemsNum())
     uids = self.dataModel.getData().nonzero()[0]
     iids = self.dataModel.getData().nonzero()[1]
     for i in range(len(iids)):
         iid = iids[i]
         itempopular[iid] += 1
     self.popItems = itempopular
Ejemplo n.º 2
0
 def fit(self, trainSamples, trainTargets):
     #print len(trainSamples)
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     usersNum = self.dataModel.getUsersNum()
     self.simiMatrix = np.zeros((usersNum, usersNum))
     for i in range(usersNum):
         for j in range(i+1, usersNum):
             s = self.similarity.compute(self.dataModel.getItemIDsFromUid(i), self.dataModel.getItemIDsFromUid(j))
             self.simiMatrix[i][j] = self.simiMatrix[j][i] = s
Ejemplo n.º 3
0
 def fit(self, trainSamples, trainTargets):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     #print 'train user:' + str(self.dataModel.getUsersNum())
     V = self.dataModel.getData()
     model = ProjectedGradientNMF(n_components=self.factors,
                                  max_iter=1000,
                                  nls_max_iter=1000)
     self.pu = model.fit_transform(V)
     self.qi = model.fit(V).components_.transpose()
Ejemplo n.º 4
0
 def fit(self, trainSamples, trainTargets):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     itemsNum = self.dataModel.getItemsNum()
     self.simiMatrix = np.zeros((itemsNum, itemsNum))
     for i in range(itemsNum):
         for j in range(i + 1, itemsNum):
             s = self.similarity.compute(
                 self.dataModel.getUserIDsFromIid(i),
                 self.dataModel.getUserIDsFromIid(j))
             self.simiMatrix[i][j] = self.simiMatrix[j][i] = s
Ejemplo n.º 5
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets, isRating=True)
        usersNum = self.dataModel.getUsersNum()
        itemsNum = self.dataModel.getItemsNum()
        self.T = [{} for i in range(usersNum)]
        for uid in range(usersNum):
            purchased_items = self.dataModel.getItemIDsFromUid(uid)
            for i in range(len(purchased_items)):
                for j in range(i+1, len(purchased_items)):
                    rating_i = self.dataModel.getRating(uid, purchased_items[i])
                    rating_j = self.dataModel.getRating(uid, purchased_items[j])
                    if rating_i > rating_j:
                        key = str(purchased_items[i]) + " " + str(purchased_items[j])
                    elif rating_i < rating_j:
                        key = str(purchased_items[j]) + " " + str(purchased_items[i])
                    else:
                        continue
                    self.T[uid][key] = 1

        for uid in range(usersNum):
            print self.dataModel.getUserByUid(uid), len(self.T[uid])

        idf = {}
        pair_sum = [[0]*itemsNum for i in range(itemsNum)]
        for uid in range(usersNum):
            for t, times in self.T[uid].iteritems():
                i1, i2 = t.split(" ")
                pair_sum[int(i1)][int(i2)] += 1
        for i1 in range(itemsNum):
            for i2 in range(itemsNum):
                if pair_sum[i1][i2] != 0:
                    key = str(i1) + ' ' + str(i2)
                    sum = pair_sum[i1][i2] + pair_sum[i2][i1]
                    alpha = log10(1+9.0*sum/usersNum)
                    idf[key] = alpha*log2(sum*1.0/pair_sum[i1][i2])+(1-alpha)

        W = [{} for i in range(usersNum)]
        for uid in range(usersNum):
            for t, times in self.T[uid].iteritems():
                i1, i2 = t.split(" ")
                diff = self.dataModel.getRating(uid, int(i1))-self.dataModel.getRating(uid, int(i2))
                tf = log2(1+abs(diff))
                W[uid][t] = tf * idf[t]

        self.simiMatrix = np.zeros((usersNum, usersNum))
        for i in range(usersNum):
            for j in range(i+1, usersNum):
                s = self.cos(W[i], W[j])
                self.simiMatrix[i][j] = self.simiMatrix[j][i] = s
Ejemplo n.º 6
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        temp = math.sqrt(self.factors)
        self.item_bias = np.zeros(self.dataModel.getItemsNum())
        self.user_factors = np.array([[
            (0.1 * random.random() / temp) for j in range(self.factors)
        ] for i in range(self.dataModel.getUsersNum())])
        self.item_factors = np.array([[
            (0.1 * random.random() / temp) for j in range(self.factors)
        ] for i in range(self.dataModel.getItemsNum())])
        '''
        user_file = 'pu'
        item_file = 'qi'
        self.user_factors = np.array(pd.read_csv(user_file).values)[:, 1:]
        self.item_factors = np.array(pd.read_csv(item_file).values)[:, 1:]
        '''
        num_loss_samples = int(100 * self.dataModel.getUsersNum()**0.5)
        #print 'sampling {0} <user,item i,item j> triples...'.format(num_loss_samples)
        loss_sampler = UniformUserUniformItem(True)
        self.loss_samples = [
            t for t in loss_sampler.generate_samples(self.dataModel,
                                                     num_loss_samples)
        ]
        old_loss = self.loss()

        update_sampler = UniformPairWithoutReplacement(True)
        #print 'initial loss = {0}'.format(self.loss())
        for it in xrange(self.iter):
            #print 'starting iteration {0}'.format(it)
            for u, i, j in update_sampler.generate_samples(self.dataModel):
                self.update_factors(u, i, j)
            if abs(self.loss() -
                   old_loss) < 0.01 or self.loss() - old_loss > 0:
                #print 'iteration {0}: loss = {1}'.format(it, self.loss())
                #print 'converge!!'
                break
            else:
                old_loss = self.loss()
                self.learning_rate *= 0.9
Ejemplo n.º 7
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        self.mu = np.array(trainTargets).mean()
        self.bu = np.zeros(self.dataModel.getUsersNum())
        self.bi = np.zeros(self.dataModel.getItemsNum())
        temp = math.sqrt(self.factors)
        self.qi = [[(0.1 * random.random() / temp)
                    for j in range(self.factors)]
                   for i in range(self.dataModel.getItemsNum())]
        self.pu = [[(0.1 * random.random() / temp)
                    for j in range(self.factors)]
                   for i in range(self.dataModel.getUsersNum())]
        lineData = self.dataModel.getLineData()
        lengthOfTrain = len(lineData)

        for step in range(self.iter):
            rmse_sum = 0.0
            hash = np.random.permutation(lengthOfTrain)
            for j in range(lengthOfTrain):
                n = hash[j]
                row = lineData[n]
                uid = self.dataModel.getUidByUser(row[0])
                iid = self.dataModel.getIidByItem(row[1])
                rating = row[2]
                #rating = 1
                eui = rating - self.predict_single(uid, iid)
                rmse_sum += eui**2
                self.bu[uid] += self.learningrate * (
                    eui - self.userregular * self.bu[uid])
                self.bi[iid] += self.learningrate * (
                    eui - self.itemregular * self.bi[iid])
                temp = self.qi[iid]
                self.qi[iid] += self.learningrate * (
                    np.dot(eui, self.pu[uid]) -
                    np.dot(self.itemregular, self.qi[iid]))
                self.pu[uid] += self.learningrate * (
                    np.dot(eui, temp) - np.dot(self.userregular, self.pu[uid]))
            self.learningrate = self.learningrate * 0.93
Ejemplo n.º 8
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples,
                                         trainTargets,
                                         hasTimes=True)
        usersNum = self.dataModel.getUsersNum()
        itemsNum = self.dataModel.getItemsNum()
        all_item_set = set(range(itemsNum))

        self.T = [{} for i in range(usersNum)]
        for uid in range(usersNum):
            purchased_items = self.dataModel.getItemIDsFromUid(uid)
            for i in range(len(purchased_items)):
                for j in range(i + 1, len(purchased_items)):
                    rating_i = self.dataModel.getRating(
                        uid, purchased_items[i])
                    rating_j = self.dataModel.getRating(
                        uid, purchased_items[j])
                    if rating_i > rating_j:
                        key = str(purchased_items[i]) + " " + str(
                            purchased_items[j])
                    elif rating_i < rating_j:
                        key = str(purchased_items[j]) + " " + str(
                            purchased_items[i])
                    else:
                        continue
                    self.T[uid][key] = 1
            # for i in purchased_items:
            #     purchased_items = self.dataModel.getItemIDsFromUid(uid)
            # unpurchased_items = random.sample(all_item_set.difference(purchased_items), self.sample_rate)
            # for j in unpurchased_items:
            #     key = str(i) + " " + str(j)
            #     self.T[uid][key] = 1

        idf = {}
        pair_sum = [[0] * itemsNum for i in range(itemsNum)]
        for uid in range(usersNum):
            for t, times in self.T[uid].iteritems():
                i1, i2 = t.split(" ")
                pair_sum[int(i1)][int(i2)] += 1
        for i1 in range(itemsNum):
            for i2 in range(itemsNum):
                if pair_sum[i1][i2] != 0:
                    key = str(i1) + ' ' + str(i2)
                    sum = pair_sum[i1][i2] + pair_sum[i2][i1]
                    alpha = log10(1 + 9.0 * sum / usersNum)
                    idf[key] = alpha * log2(sum * 1.0 / pair_sum[i1][i2]) + (
                        1 - alpha)

        W = [{} for i in range(usersNum)]
        for uid in range(usersNum):
            for t, times in self.T[uid].iteritems():
                i1, i2 = t.split(" ")
                diff = self.dataModel.getRating(
                    uid, int(i1)) - self.dataModel.getRating(uid, int(i2))
                # if diff != 1:
                #     print 'error!'
                tf = log2(1 + abs(diff))
                if diff < 0:
                    tf = -tf
                W[uid][t] = tf * idf[t]

        self.simiMatrix = np.zeros((usersNum, usersNum))
        for i in range(usersNum):
            for j in range(i + 1, usersNum):
                s = self.cos(W[i], W[j])
                self.simiMatrix[i][j] = self.simiMatrix[j][i] = s