def Search(self): profiles = set() # 随机加一个点进入 # seed = random.randint(0,self.num) # 加入相邻最多的一个点进来 seed = random.choice(self.id_list) # results = {i:len(set(all_neighbors(self.g,str(i)))) for i in self.id_list if i in self.g} # seed = max(results.items(),key=lambda key:key[1])[0] # neighbours = set([i for i in self.Neighbours(seed)]) neighbours = set([ i for i in (all_neighbors(self.g, str(seed)) if str(seed) in self.g else set()) ]) profiles.add(seed) neighbours.add(seed) while len(profiles) < self.k: # 每次加入使得|N(v) - (N(S) | S)|最大的元素 # results = {i:len(set((all_neighbors(self.g,i) if i in self.g else set())) - profiles - neighbours ) for i in range(self.num) if i not in profiles} results = { i: len( set((all_neighbors(self.g, str(i)) if str(i) in self.g else set())) - profiles - neighbours) for i in self.id_list if i not in profiles } to_add = max(results.items(), key=lambda key: key[1])[0] profiles.add(to_add) # 更新neighbours neighbours |= set([ i for i in (all_neighbors(self.g, to_add) if to_add in self.g else set()) ]) neighbours.add(to_add) # print len(profiles) profiles = [int(i) for i in profiles] return profiles
def Search(self): profiles = set() # 随机加一个点进入 seed = random.randint(0, self.num) # neighbours = set([i for i in self.Neighbours(seed)]) neighbours = set([ i for i in (all_neighbors(self.g, seed) if seed in self.g else set()) ]) while len(profiles) < self.k: # 每次加入使得|N(v) - (N(S) | S)|最大的元素 results = { i: len( set((all_neighbors(self.g, i) if i in self.g else set())) - profiles - neighbours | profiles) for i in range(self.num) if i not in profiles } to_add = max(results.items(), key=lambda key: key[1])[0] profiles.add(to_add) # 更新neighbours neighbours |= set([ i for i in (all_neighbors(self.g, to_add) if to_add in self.g else set()) ]) print len(profiles) profiles = [int(self.id_list[i]) for i in profiles] return profiles
def Rt(self, profiles): neighbours = reduce(lambda x, y: x | y, [ set(all_neighbors(self.g, self.id_dic[str(u)])) if self.id_dic[str(u)] in self.g else set() for u in profiles ]) NS = len(neighbours - set(profiles)) VS = self.num - len(profiles) return 1.0 * NS / VS
def GetNeighbours(self, u): if self.id_dic[str(u)] in self.g: return set([ int(self.id_list[i]) for i in all_neighbors(self.g, self.id_dic[str(u)]) ]) else: return set()
def Sample(self, gamma, clusters, id_cluster): num = len(self.users) profiles = set() # 初始化每个cluster的Rc和Rt Rc = {i: 0 for i in clusters.keys()} Rt = {i: 0 for i in clusters.keys()} neighbours = {i: set() for i in id_cluster.keys()} while len(profiles) < self.need: results = {} # 对于不在profiles中的元素继续加入,加入使得F最大的元素 # 从每个聚类簇中选取元素加入 for key in clusters.keys(): temp = { i: len(clusters[key]) * 1.0 / num * (gamma * self.CalcRc(profiles | set([i]), clusters[key]) - gamma * Rc[key] + (1 - gamma) * (len( set( all_neighbors(self.g, self.users.iloc[i] ['userid'])) & id_cluster[key] - neighbours[key]) if self.users.iloc[i]['userid'] in self.g else 0)) for i in clusters[key] if i not in profiles } # 找到temp中增长最大的值和结果 item = max(temp.items(), key=lambda key: key[1]) results[item[0]] = item[1] # 从results找到增长最大的加入 to_add = max(results.items(), key=lambda key: key[1])[0] # to_add = results.keys()[to_add] profiles.add(to_add) # 确定to_add属于哪个聚类簇 for key in clusters.keys(): if to_add in clusters[key]: belong = key break print len(profiles) # 更新belong的Rc和Rt,neigbours neighbours[belong] |= set( all_neighbors(self.g, self.users.iloc[to_add]['userid']) ) if self.users.iloc[to_add]['userid'] in self.g else set() Rc[belong] = self.CalcRc(profiles, clusters[belong]) Rt[belong] = self.CalcRt(profiles, clusters[belong]) profiles = [self.users.iloc[i]['userid'] for i in profiles] return profiles
def Rt(self, users, profiles): # neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,self.id_dic[str(u)])) if self.id_dic[str(u)] in self.g else set() for u in profiles]) neighbours = reduce(lambda x, y: x | y, [ set(all_neighbors(self.g, str(u))) if str(u) in self.g else set() for u in profiles ]) # 所有相邻的 NS = len(neighbours - set(profiles) & set(users)) VS = len(users) return 1.0 * NS / VS
def CalcRt(self, profiles, cluster): ''' :param profiles: 代表性子集 :param cluster: 某个聚类簇 :return: ''' clusteri = reduce( lambda x, y: x | y, [set([self.users.iloc[i]['userid']]) for i in cluster]) neighbours = reduce(lambda x, y: x | y, [ set(all_neighbors(self.g, str(self.users.iloc[u]['userid']))) if str(u) in self.g else set() for u in profiles ]) return len(neighbours & clusteri) * 1.0 / len(clusteri)
def GetNeighbours(self, u): if str(u) in self.g: return set([int(i) for i in all_neighbors(self.g, str(u))]) else: return set()