def consecutive_sn_similarity(dynamicCommunity: tn.DynCommunitiesSN, score=None): """ Similarity between partitions in consecutive snapshots. Compute the average of a similarity score between all pair of successive partitions :param dynamicCommunity: the dynamic partition to evaluate :param score: the score to use for computing the similarity between each pair of snapshots. default: Overlapping NMI :return: pair (list of scores, list of partition sizes (avg both partitions)) """ if score == None: score = onmi #We use onmi because the number of labels can be different scores = [] sizes = [] #for each step com_snapshots = list(dynamicCommunity.snapshot_communities().values()) #print(com_snapshots) for i in range(len(com_snapshots) - 1): partition_before = list(com_snapshots[i].values()) partition_after = list(com_snapshots[i + 1].values()) elts_before = sum([len(x) for x in partition_before]) elts_after = sum([len(x) for x in partition_after]) scores.append(score(partition_before, partition_after)) sizes.append((elts_after + elts_before) / 2) return scores, sizes
def quality_at_each_step(dynamicCommunities: tn.DynCommunitiesSN, dynamicGraph: tn.DynGraphSN, score=None): """ Compute a community quality at each step :param dynamicCommunities: dynamic communities as SN :param score: score to use, default: Modularity :return: pair(scores, sizes) """ if score == None: score = nx.algorithms.community.modularity scores = [] sizes = [] #for each step for t, affils in dynamicCommunities.snapshot_communities().items(): g = dynamicGraph.snapshots(t) partition = list(affils.values()) try: sc = score(g, partition) scores.append(sc) except: #print("problem to compute with partition: ",partition," nodes",g.nodes()) scores.append(None) sizes.append(len(g.nodes)) return scores, sizes
def similarity_at_each_step(dynamicCommunityReference: tn.DynCommunitiesSN, dynamicCommunityObserved: tn.DynCommunitiesSN, score=None): """ Compute similarity at each step It takes into account the fact that the reference might by incomplete. (remove from the observations all nodes/time not present in the reference) :param dynamicCommunityReference: the dynamic partition to use as reference :param dynamicCommunityObserved: the dynamic partition to evaluate :param score: score to use, default adjusted NMI :return: pair (list of scores, list of sizes) """ if score == None: score = sklearn.metrics.adjusted_mutual_info_score scores = [] sizes = [] comsToEvaluate = dynamicCommunityObserved.snapshot_affiliations() #for each step for t, affils in dynamicCommunityReference.snapshot_affiliations().items(): affilReference = [] affilToEvaluate = [] #for each node for n, comId in affils.items(): affilReference.append(list(comId)[0]) if n in comsToEvaluate[t]: affilToEvaluate.append(list(comsToEvaluate[t][n])[0]) else: affilToEvaluate.append("-1") scores.append(score(affilReference, affilToEvaluate)) sizes.append(len(affilReference)) return scores, sizes
def write_com_SN(dyn_communities: tn.DynCommunitiesSN, output_dir, asNodeSet=True): """ Write directory, 1 file = snapshot_affiliations of a snaphshot Write dynamic snapshot_affiliations as a directory containing one file for each snapshot. Two possible formats: **Affiliations:** :: node1 com1 com2 node2 com1 node3 com2 com3 com4 **Node Sets:** :: com:com1 n1 n2 n3 com:another_com n1 n4 n5 :param dynGraph: a dynamic graph :param outputDir: address of the directory to write :param asNodeSet: if True, node sets, otherwise, snapshot_affiliations """ os.makedirs(output_dir, exist_ok=True) all_partitions = dyn_communities.snapshot_communities() for t, p in all_partitions.items(): if asNodeSet: write_communities_as_nodeset(p, os.path.join(output_dir, str(t))) else: p = nodesets2affiliations(p) write_communities_as_affiliations(p, os.path.join(output_dir, str(t)))
def nb_node_change(dyn_com: tn.DynCommunitiesSN): """ Compute the total number of node changes Measure of smoothness at the level of nodes, adapated to evaluate glitches :param dyn_com: The dynamic community :return: total number of node changes """ coms_by_nodes = {} for t, coms in dyn_com.snapshot_communities().items(): #print(t,coms) for com, nodes in coms.items(): #print(n,com) for n in nodes: coms_by_nodes.setdefault(n, [com]) if coms_by_nodes[n][-1] != com: coms_by_nodes[n].append(com) nb_changes = 0 for n in coms_by_nodes: #print(n,coms_by_nodes[n]) nb_changes += len(coms_by_nodes[n]) - 1 return nb_changes
def rollingCPM(dynNetSN: DynGraphSN, k=3): """ This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the union graph. [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007). Quantifying social group evolution. Nature, 446(7136), 664. :param dynNetSN: a dynamic network (DynGraphSN) :param k: the size of cliques used as snapshot_communities building blocks :return: DynCommunitiesSN """ DynCom = DynCommunitiesSN() old_communities = None old_graph = nx.Graph() graphs = dynNetSN.snapshots() for (date, graph) in graphs.items(): communitiesAtT = list( _get_percolated_cliques(graph, k) ) #get the percolated cliques (snapshot_affiliations) as a list of set of nodes for c in communitiesAtT: DynCom.add_community(date, c) if old_communities == None: #if first snapshot old_graph = graph dateOld = date old_communities = communitiesAtT else: if len(communitiesAtT) > 0: #if there is at least one community union_graph = nx.compose( old_graph, graph ) #create the union graph of the current and the previous communities_union = list( _get_percolated_cliques( union_graph, k)) #get the snapshot_affiliations of the union graph jaccardBeforeAndUnion = _included( old_communities, communities_union) #we only care if the value is above 0 jaccardUnionAndAfter = _included( communitiesAtT, communities_union) #we only care if the value is above 0 for c in jaccardBeforeAndUnion: #for each community in the union graph matched = [] born = [] killed = [] allJaccards = set() for oldC in jaccardBeforeAndUnion[c]: for newC in jaccardUnionAndAfter[c]: allJaccards.add( ((oldC, newC), _singleJaccard(oldC, newC)) ) #compute jaccard between candidates before and after allJaccards = sorted(allJaccards, key=itemgetter(1), reverse=True) sortedMatches = [k[0] for k in allJaccards] oldCToMatch = dict( jaccardBeforeAndUnion[c]) #get all coms before newCToMatch = dict( jaccardUnionAndAfter[c]) #get all new coms while len( sortedMatches ) > 0: #as long as there are couples of unmatched snapshot_affiliations matchedKeys = sortedMatches[ 0] #pair of snapshot_affiliations of highest jaccard matched.append(matchedKeys) #this pair will be matched del oldCToMatch[matchedKeys[ 0]] #delete chosen com from possible to match del newCToMatch[matchedKeys[1]] sortedMatches = [ k for k in sortedMatches if len(set(matchedKeys) & set(k)) == 0 ] #keep only pairs of unmatched snapshot_affiliations if len(oldCToMatch) > 0: killed.append(list(oldCToMatch.keys())[0]) if len(newCToMatch) > 0: born.append(list(newCToMatch.keys())[0]) for aMatch in matched: DynCom.events.add_event( (dateOld, DynCom._com_ID(dateOld, aMatch[0])), (date, DynCom._com_ID(date, aMatch[1])), dateOld, date, "continue") for kil in killed: #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones) for com in jaccardUnionAndAfter[c]: DynCom.events.add_event( (dateOld, DynCom._com_ID(dateOld, kil)), (date, DynCom._com_ID(date, com)), dateOld, date, "merged") for b in born: #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones) for com in jaccardBeforeAndUnion[c]: DynCom.events.add_event( (dateOld, DynCom._com_ID(dateOld, com)), (date, DynCom._com_ID(date, b)), dateOld, date, "split") old_graph = graph dateOld = date old_communities = communitiesAtT print(DynCom.snapshots) print(DynCom.events.nodes) DynCom._relabel_coms_from_continue_events() return (DynCom)
def rollingCPM(dynNetSN: DynGraphSN, k=3, elapsed_time=False): """ This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the union graph. [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007). Quantifying social group evolution. Nature, 446(7136), 664. :param dynNetSN: a dynamic network (DynGraphSN) :param k: the size of cliques used as snapshot_communities building blocks :param elapsed_time: if True, will return a tuple (communities,time_elapsed) :return: DynCommunitiesSN """ DynCom = DynCommunitiesSN() old_communities = None old_graph = nx.Graph() graphs = dynNetSN.snapshots() time_Steps = {} start = time.time() step2 = start total_percolation = 0 total_match = 0 pool = mp.Pool(mp.cpu_count()) allComs = pool.starmap_async(__compute_communities, [(SNt, dynNetSN.snapshots(SNt), k) for SNt in graphs]).get() print("CD detection done", len(allComs)) pool.close() com_ids = dict() for (date, communitiesAtT) in allComs: #print("------------",date) #for (date, graph) in graphs.items(): #communitiesAtT = list(_get_percolated_cliques(graph, k)) #get the percolated cliques (snapshot_affiliations) as a list of set of nodes step1 = time.time() total_percolation += step1 - step2 for current_com in communitiesAtT: id = DynCom.add_community(date, current_com) com_ids[(date, current_com)] = id if old_communities == None: #if first snapshot old_graph = graphs[date] dateOld = date old_communities = communitiesAtT else: if len(communitiesAtT) > 0: #if there is at least one community union_graph = nx.compose( old_graph, graphs[date] ) #create the union graph of the current and the previous communities_union = list( _get_percolated_cliques( union_graph, k)) #get the snapshot_affiliations of the union graph jaccardBeforeAndUnion = _included( old_communities, communities_union) #we only care if the value is above 0 jaccardUnionAndAfter = _included( communitiesAtT, communities_union) #we only care if the value is above 0 already_assigned = set() for current_com in jaccardBeforeAndUnion: #for each community in the union graph matched = [] born = [] killed = [] allJaccards = set() for oldC in jaccardBeforeAndUnion[ current_com]: #for communities included in it in t-1 for newC in jaccardUnionAndAfter[ current_com]: # and t+1 if not oldC in already_assigned and not newC in already_assigned: allJaccards.add( ((oldC, newC), _singleJaccard( oldC, newC))) #compute jaccard between those allJaccards = sorted(allJaccards, key=itemgetter(1), reverse=True) sortedMatches = [ k[0] for k in allJaccards ] #list of pairs of communities in t-1 and t+1 ordered by decreasing jaccard oldCToMatch = dict(jaccardBeforeAndUnion[current_com] ) #get all coms before newCToMatch = dict( jaccardUnionAndAfter[current_com]) #get all new coms while len( sortedMatches ) > 0: #as long as there are couples of unmatched communities (t-1,t+1)included in the current com matchedKeys = sortedMatches[ 0] #pair of snapshot_affiliations of highest jaccard matched.append(matchedKeys) #this pair will be matched del oldCToMatch[matchedKeys[ 0]] #delete chosen com from possible to match del newCToMatch[matchedKeys[1]] sortedMatches = [ k for k in sortedMatches if len(set(matchedKeys) & set(k)) == 0 ] #keep only pairs of unmatched snapshot_affiliations if len(oldCToMatch) > 0: killed.append(list(oldCToMatch.keys())[0]) if len(newCToMatch) > 0: born.append(list(newCToMatch.keys())[0]) for aMatch in matched: #print("--",aMatch) already_assigned.add(aMatch[0]) already_assigned.add(aMatch[1]) DynCom.events.add_event( (dateOld, com_ids[(dateOld, aMatch[0])]), (date, com_ids[(date, aMatch[1])]), dateOld, date, "continue") for kil in killed: #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones) for com in jaccardUnionAndAfter[current_com]: DynCom.events.add_event( (dateOld, com_ids[(dateOld, kil)]), (date, com_ids[(date, com)]), dateOld, date, "merged") for b in born: #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones) for com in jaccardBeforeAndUnion[current_com]: DynCom.events.add_event( (dateOld, com_ids[(dateOld, com)]), (date, com_ids[(date, b)]), dateOld, date, "split") step2 = time.time() total_match += step2 - step1 old_graph = graphs[date] dateOld = date old_communities = communitiesAtT end = time.time() time_Steps["total"] = end - start time_Steps["CD"] = total_percolation time_Steps["match"] = total_match DynCom._relabel_coms_from_continue_events() if elapsed_time: return (DynCom, time_Steps) return (DynCom)
def longitudinal_similarity(dynamicCommunityReference: tn.DynCommunitiesSN, dynamicCommunityObserved: tn.DynCommunitiesSN, score=None, convert_coms_sklearn_format=True): """ Longitudinal similarity The longitudinal similarity between two dynamic clusters is computed by considering each couple (node,time) as an element belong to a cluster, a cluster containing therefore nodes in differnt times It takes into account the fact that the reference might by incomplete by removing from the partition to evaluate all (node,time) not present in the reference. :param dynamicCommunityReference: the dynamic partition used as reference (ground truth) :param dynamicCommunityObserved: the dynamic partition to evaluate (result of an algorithm) :param score: community comparison score, by default the adjsted NMI. (sklearn) :param convert_coms_sklearn_format: if the score expect in input clusters represented as in sklearn, True. if False, score will receive in input lists of sets of nodes :return: score """ if score == None: score = lambda x, y: sklearn.metrics.adjusted_mutual_info_score( x, y, average_method="arithmetic") affilReference = [] affilToEvaluate = [] if convert_coms_sklearn_format: comsToEvaluate = dynamicCommunityObserved.snapshot_affiliations() #for each step for t, affils in dynamicCommunityReference.snapshot_affiliations( ).items(): #for each node for n, comId in affils.items(): affilReference.append(str(list(comId)[0])) if n in comsToEvaluate[t]: affilToEvaluate.append(str(list(comsToEvaluate[t][n])[0])) else: print("node not in partition to evaluate: ", str(n), " ", str(t)) affilToEvaluate.append("-1") else: affilReference = {} affilToEvaluate = {} for t, coms in dynamicCommunityReference.snapshot_communities().items( ): all_nodes = set() for id, nodes in coms.items(): node_sn = {(n, t) for n in nodes} all_nodes.update(node_sn) affilReference.setdefault(id, set()).update(node_sn) for id, nodes in dynamicCommunityObserved.snapshot_communities( t).items(): node_sn = {(n, t) for n in nodes} affilToEvaluate.setdefault(id, set()).update(node_sn & all_nodes) affilReference = list(affilReference.values()) affilToEvaluate = list(affilToEvaluate.values()) return score(affilReference, affilToEvaluate)