Example #1
0
    def build_user_item_sim_CF(self):
        from collections import defaultdict
        self.user_sim = SimMatrix()
        self.item_sim = SimMatrix()
        self.user_k_neibor = defaultdict(dict)
        self.item_k_neibor = defaultdict(dict)

        # compute item-item similarity matrix
        print('constructing user-user similarity matrix...')
        # self.user_sim = util.load_data('../data/sim/ft_08_uu_tricf.pkl')
        for u1 in self.rg.user:
            for u2 in self.rg.user:
                if u1 != u2:
                    if self.user_sim.contains(u1, u2):
                        continue
                    sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2))
                    sim = round(sim, 5)
                    self.user_sim.set(u1, u2, sim)
        util.save_data(self.user_sim, '../data/sim/ft_08_uu_tricf_cv0.pkl')

        # compute the k neighbors of user
        # self.user_k_neibor = util.load_data(
        #     '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf.pkl')
        for user in self.rg.user:
            matchUsers = sorted(self.user_sim[user].items(),
                                key=lambda x: x[1],
                                reverse=True)[:self.config.user_near_num]
            matchUsers = matchUsers[:self.config.user_near_num]
            self.user_k_neibor[user] = dict(matchUsers)
        util.save_data(
            self.user_k_neibor, '../data/neibor/ft_08_uu_' +
            str(self.config.user_near_num) + '_neibor_tricf_cv0.pkl')

        # compute item-item similarity matrix
        print('constructing item-item similarity matrix...')
        # self.item_sim = util.load_data('../data/sim/ft_08_ii_tricf.pkl')
        for i1 in self.rg.item:
            for i2 in self.rg.item:
                if i1 != i2:
                    if self.item_sim.contains(i1, i2):
                        continue
                    sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2))
                    sim = round(sim, 5)
                    self.item_sim.set(i1, i2, sim)
        util.save_data(self.item_sim, '../data/sim/ft_08_ii_tricf_cv0.pkl')

        # compute the k neighbors of item
        # self.item_k_neibor = util.load_data(
        #     '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf.pkl')
        for item in self.rg.item:
            matchItems = sorted(self.item_sim[item].items(),
                                key=lambda x: x[1],
                                reverse=True)[:self.config.item_near_num]
            matchItems = matchItems[:self.config.item_near_num]
            self.item_k_neibor[item] = dict(matchItems)
        util.save_data(
            self.item_k_neibor, '../data/neibor/ft_08_ii_' +
            str(self.config.item_near_num) + '_neibor_tricf_cv0.pkl')
        pass
Example #2
0
    def init_model(self):
        self.item_sim = SimMatrix()

        for i_test in self.rg.testSet_i:
            for i_train in self.rg.item:
                if i_test != i_train:
                    if self.item_sim.contains(i_test, i_train):
                        continue
                    sim = pearson_sp(self.rg.get_col(i_test),
                                     self.rg.get_col(i_train))
                    self.item_sim.set(i_test, i_train, sim)
Example #3
0
    def init_model(self):
        self.user_sim = SimMatrix()

        for u_test in self.rg.testSet_u:
            for u_train in self.rg.user:
                if u_test != u_train:
                    if self.user_sim.contains(u_test, u_train):
                        continue
                    sim = pearson_sp(self.rg.get_row(u_test),
                                     self.rg.get_row(u_train))
                    self.user_sim.set(u_test, u_train, sim)
 def get_neighbor(self, user, item):
     if user in self.user_item_nei and item in self.user_item_nei[user]:
         return self.user_item_nei[user][item]
     items = self.rg.user_rated_items(user)
     u_item_d = {}
     for u_item in items:
         if item != u_item:
             sim = pearson_sp(self.rg.get_col(item),
                              self.rg.get_col(u_item))
             u_item_d[u_item] = round(sim, 4)
     matchItems = sorted(u_item_d.items(), key=lambda x: x[1],
                         reverse=True)[:self.config.item_near_num]
     matchItems = list(zip(*matchItems))
     if len(matchItems) > 0:
         self.user_item_nei[user][item] = matchItems[0]
         return matchItems[0]
     else:
         return []
Example #5
0
 def get_sim(self, u, k):
     sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) +
            1.0) / 2.0  # fit the value into range [0.0,1.0]
     return sim
Example #6
0
    def build_user_item_sim_CF(self,
                               kfold,
                               user_near_num=50,
                               item_near_num=50,
                               load_save_sim=False):

        self.rg = RatingGetter(kfold)
        self.mg = MetaGetter(kfold)

        from collections import defaultdict

        # compute item-item similarity matrix
        print('构建 item-item 相似度矩阵  ...')
        if load_save_sim:
            self.item_sim = util.load_data(
                '../data/sim/%s_08_ii_cucmemf_cv0.pkl' %
                self.config.dataset_name)
        else:
            # 封装 item 相似度计算
            self.item_sim = self.mg.getSimMatrix(jaccard_sim)
            util.save_data(
                self.item_sim, '../data/sim/%s_08_ii_cucmemf_cv0.pkl' %
                self.config.dataset_name)

        # compute the k neighbors of item
        if load_save_sim:
            self.item_k_neibor = util.load_data(
                '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' %
                (self.config.dataset_name, item_near_num))
        for item in self.mg.item:
            matchItems = sorted(self.item_sim[item].items(),
                                key=lambda x: x[1],
                                reverse=True)[:item_near_num]
            matchItems = matchItems[:item_near_num]
            self.item_k_neibor[item] = dict(matchItems)
        util.save_data(
            self.item_k_neibor,
            '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' %
            (self.config.dataset_name, item_near_num))

        # compute user-user similarity matrix
        print('构建 user-user 相似度矩阵 ...')
        if load_save_sim:
            # if True:
            self.user_sim = util.load_data(
                '../data/sim/%s_08_uu_cucmemf_cv0.pkl' %
                self.config.dataset_name)
        else:
            for u1 in tqdm(self.rg.user):
                for u2 in self.rg.user:
                    if u1 != u2:
                        if self.user_sim.contains(u1, u2):
                            continue
                        # 皮尔逊相似度? 修改为余弦相似度?;
                        sim = pearson_sp(self.rg.get_row(u1),
                                         self.rg.get_row(u2))
                        sim = round(sim, 5)
                        self.user_sim.set(u1, u2, sim)
            if not os.path.exists('../data/sim'):
                os.makedirs('../data/sim')
                print('../data/sim folder has been established.')
            util.save_data(
                self.user_sim, '../data/sim/%s_08_uu_cucmemf_cv0.pkl' %
                self.config.dataset_name)

        # compute the k neighbors of user
        if load_save_sim:
            self.user_k_neibor = util.load_data(
                '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' %
                (self.config.dataset_name, user_near_num))
        for user in self.rg.user:
            matchUsers = sorted(self.user_sim[user].items(),
                                key=lambda x: x[1],
                                reverse=True)[kfold:user_near_num]
            matchUsers = matchUsers[:user_near_num]
            self.user_k_neibor[user] = dict(matchUsers)

        if not os.path.exists('../data/neibor'):
            os.makedirs('../data/neibor')
            print('../data/neibor folder has been established.')

        util.save_data(
            self.user_k_neibor,
            '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' %
            (self.config.dataset_name, user_near_num))
Example #7
0
    def build_user_item_sim_CF(self,
                               kfold,
                               user_near_num=50,
                               item_near_num=50,
                               load_save_sim=False):

        self.rg = RatingGetter(kfold)

        from collections import defaultdict

        # compute item-item similarity matrix

        print('构建 user-user 相似度矩阵 ...')
        if load_save_sim:
            self.user_sim = util.load_data(
                '../data/sim/db_08_uu_tricf_cv0.pkl')
        else:
            for u1 in self.rg.user:
                for u2 in self.rg.user:
                    if u1 != u2:
                        if self.user_sim.contains(u1, u2):
                            continue
                        # 皮尔逊相似度? 修改为余弦相似度;
                        sim = pearson_sp(self.rg.get_row(u1),
                                         self.rg.get_row(u2))
                        sim = round(sim, 5)
                        self.user_sim.set(u1, u2, sim)
            if not os.path.exists('../data/sim'):
                os.makedirs('../data/sim')
                print('../data/sim folder has been established.')
            util.save_data(self.user_sim, '../data/sim/db_08_uu_tricf_cv0.pkl')

        # compute the k neighbors of user
        # self.user_k_neibor = util.load_data(
        #     '../data/neibor/db_08_uu_' + str(user_near_num) + '_neibor_tricf.pkl')
        for user in self.rg.user:
            matchUsers = sorted(self.user_sim[user].items(),
                                key=lambda x: x[1],
                                reverse=True)[kfold:user_near_num]
            matchUsers = matchUsers[:user_near_num]
            self.user_k_neibor[user] = dict(matchUsers)

        if not os.path.exists('../data/neibor'):
            os.makedirs('../data/neibor')
            print('../data/neibor folder has been established.')

        util.save_data(
            self.user_k_neibor, '../data/neibor/db_08_uu_' +
            str(user_near_num) + '_neibor_tricf_cv0.pkl')

        # compute item-item similarity matrix
        print('构建 item-item 相似度矩阵  ...')
        if load_save_sim:
            self.item_sim = util.load_data(
                '../data/sim/db_08_ii_tricf_cv0.pkl')
        else:
            for i1 in self.rg.item:
                for i2 in self.rg.item:
                    if i1 != i2:
                        if self.item_sim.contains(i1, i2):
                            continue
                        # 皮尔逊相似度? 修改为余弦相似度;
                        sim = pearson_sp(self.rg.get_col(i1),
                                         self.rg.get_col(i2))
                        sim = round(sim, 5)
                        self.item_sim.set(i1, i2, sim)
            util.save_data(self.item_sim, '../data/sim/db_08_ii_tricf_cv0.pkl')

        # compute the k neighbors of item
        # self.item_k_neibor = util.load_data(
        #     '../data/neibor/db_08_ii_' + str(item_near_num) + '_neibor_tricf.pkl')
        for item in self.rg.item:
            matchItems = sorted(self.item_sim[item].items(),
                                key=lambda x: x[1],
                                reverse=True)[:item_near_num]
            matchItems = matchItems[:item_near_num]
            self.item_k_neibor[item] = dict(matchItems)
        util.save_data(
            self.item_k_neibor, '../data/neibor/db_08_ii_' +
            str(item_near_num) + '_neibor_tricf_cv0.pkl')
        pass
Example #8
0
 def get_sim(self, u, k):
     sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) + 1.0) / 2.0  # 为了让范围在[0,1] +1.0)/2.0 0.83626
     return sim