Example #1
0
 def Search(self):
     profiles = set()
     # 随机加一个点进入
     # seed = random.randint(0,self.num)
     # 加入相邻最多的一个点进来
     seed = random.choice(self.id_list)
     # results = {i:len(set(all_neighbors(self.g,str(i)))) for i in self.id_list if i in self.g}
     # seed = max(results.items(),key=lambda key:key[1])[0]
     # neighbours = set([i for i in self.Neighbours(seed)])
     neighbours = set([
         i for i in (all_neighbors(self.g, str(seed)) if str(seed) in
                     self.g else set())
     ])
     profiles.add(seed)
     neighbours.add(seed)
     while len(profiles) < self.k:
         # 每次加入使得|N(v) - (N(S) | S)|最大的元素
         # results = {i:len(set((all_neighbors(self.g,i) if i in self.g else set())) - profiles - neighbours ) for i in range(self.num) if i not in profiles}
         results = {
             i: len(
                 set((all_neighbors(self.g, str(i)) if str(i) in
                      self.g else set())) - profiles - neighbours)
             for i in self.id_list if i not in profiles
         }
         to_add = max(results.items(), key=lambda key: key[1])[0]
         profiles.add(to_add)
         # 更新neighbours
         neighbours |= set([
             i for i in (all_neighbors(self.g, to_add) if to_add in
                         self.g else set())
         ])
         neighbours.add(to_add)
         # print len(profiles)
     profiles = [int(i) for i in profiles]
     return profiles
Example #2
0
 def Search(self):
     profiles = set()
     # 随机加一个点进入
     seed = random.randint(0, self.num)
     # neighbours = set([i for i in self.Neighbours(seed)])
     neighbours = set([
         i
         for i in (all_neighbors(self.g, seed) if seed in self.g else set())
     ])
     while len(profiles) < self.k:
         # 每次加入使得|N(v) - (N(S) | S)|最大的元素
         results = {
             i: len(
                 set((all_neighbors(self.g, i) if i in self.g else set())) -
                 profiles - neighbours | profiles)
             for i in range(self.num) if i not in profiles
         }
         to_add = max(results.items(), key=lambda key: key[1])[0]
         profiles.add(to_add)
         # 更新neighbours
         neighbours |= set([
             i for i in (all_neighbors(self.g, to_add) if to_add in
                         self.g else set())
         ])
         print len(profiles)
     profiles = [int(self.id_list[i]) for i in profiles]
     return profiles
Example #3
0
 def Rt(self, profiles):
     neighbours = reduce(lambda x, y: x | y, [
         set(all_neighbors(self.g, self.id_dic[str(u)]))
         if self.id_dic[str(u)] in self.g else set() for u in profiles
     ])
     NS = len(neighbours - set(profiles))
     VS = self.num - len(profiles)
     return 1.0 * NS / VS
Example #4
0
 def GetNeighbours(self, u):
     if self.id_dic[str(u)] in self.g:
         return set([
             int(self.id_list[i])
             for i in all_neighbors(self.g, self.id_dic[str(u)])
         ])
     else:
         return set()
Example #5
0
 def Sample(self, gamma, clusters, id_cluster):
     num = len(self.users)
     profiles = set()
     # 初始化每个cluster的Rc和Rt
     Rc = {i: 0 for i in clusters.keys()}
     Rt = {i: 0 for i in clusters.keys()}
     neighbours = {i: set() for i in id_cluster.keys()}
     while len(profiles) < self.need:
         results = {}
         # 对于不在profiles中的元素继续加入,加入使得F最大的元素
         # 从每个聚类簇中选取元素加入
         for key in clusters.keys():
             temp = {
                 i: len(clusters[key]) * 1.0 / num *
                 (gamma * self.CalcRc(profiles | set([i]), clusters[key]) -
                  gamma * Rc[key] + (1 - gamma) * (len(
                      set(
                          all_neighbors(self.g, self.users.iloc[i]
                                        ['userid'])) & id_cluster[key] -
                      neighbours[key]) if self.users.iloc[i]['userid']
                                                   in self.g else 0))
                 for i in clusters[key] if i not in profiles
             }
             # 找到temp中增长最大的值和结果
             item = max(temp.items(), key=lambda key: key[1])
             results[item[0]] = item[1]
         # 从results找到增长最大的加入
         to_add = max(results.items(), key=lambda key: key[1])[0]
         # to_add = results.keys()[to_add]
         profiles.add(to_add)
         # 确定to_add属于哪个聚类簇
         for key in clusters.keys():
             if to_add in clusters[key]:
                 belong = key
                 break
         print len(profiles)
         # 更新belong的Rc和Rt,neigbours
         neighbours[belong] |= set(
             all_neighbors(self.g, self.users.iloc[to_add]['userid'])
         ) if self.users.iloc[to_add]['userid'] in self.g else set()
         Rc[belong] = self.CalcRc(profiles, clusters[belong])
         Rt[belong] = self.CalcRt(profiles, clusters[belong])
     profiles = [self.users.iloc[i]['userid'] for i in profiles]
     return profiles
Example #6
0
 def Rt(self, users, profiles):
     # neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,self.id_dic[str(u)])) if self.id_dic[str(u)] in self.g else set() for u in profiles])
     neighbours = reduce(lambda x, y: x | y, [
         set(all_neighbors(self.g, str(u))) if str(u) in self.g else set()
         for u in profiles
     ])
     # 所有相邻的
     NS = len(neighbours - set(profiles) & set(users))
     VS = len(users)
     return 1.0 * NS / VS
Example #7
0
    def CalcRt(self, profiles, cluster):
        '''

        :param profiles: 代表性子集
        :param cluster: 某个聚类簇
        :return:
        '''
        clusteri = reduce(
            lambda x, y: x | y,
            [set([self.users.iloc[i]['userid']]) for i in cluster])
        neighbours = reduce(lambda x, y: x | y, [
            set(all_neighbors(self.g, str(self.users.iloc[u]['userid'])))
            if str(u) in self.g else set() for u in profiles
        ])
        return len(neighbours & clusteri) * 1.0 / len(clusteri)
Example #8
0
 def GetNeighbours(self, u):
     if str(u) in self.g:
         return set([int(i) for i in all_neighbors(self.g, str(u))])
     else:
         return set()