Beispiel #1
0
def consecutive_sn_similarity(dynamicCommunity: tn.DynCommunitiesSN,
                              score=None):
    """
       Similarity between partitions in consecutive snapshots.

        Compute the average of a similarity score between all pair of successive partitions

       :param dynamicCommunity: the dynamic partition to evaluate
       :param score: the score to use for computing the similarity between each pair of snapshots. default: Overlapping NMI
       :return: pair (list of scores, list of partition sizes (avg both partitions))
       """
    if score == None:
        score = onmi  #We use onmi because the number of labels can be different
    scores = []
    sizes = []

    #for each step
    com_snapshots = list(dynamicCommunity.snapshot_communities().values())
    #print(com_snapshots)
    for i in range(len(com_snapshots) - 1):

        partition_before = list(com_snapshots[i].values())
        partition_after = list(com_snapshots[i + 1].values())

        elts_before = sum([len(x) for x in partition_before])
        elts_after = sum([len(x) for x in partition_after])

        scores.append(score(partition_before, partition_after))
        sizes.append((elts_after + elts_before) / 2)

    return scores, sizes
Beispiel #2
0
def quality_at_each_step(dynamicCommunities: tn.DynCommunitiesSN,
                         dynamicGraph: tn.DynGraphSN,
                         score=None):
    """
    Compute a community quality at each step

    :param dynamicCommunities: dynamic communities as SN
    :param score: score to use, default: Modularity
    :return: pair(scores, sizes)
    """

    if score == None:
        score = nx.algorithms.community.modularity
    scores = []
    sizes = []

    #for each step
    for t, affils in dynamicCommunities.snapshot_communities().items():
        g = dynamicGraph.snapshots(t)
        partition = list(affils.values())
        try:
            sc = score(g, partition)
            scores.append(sc)
        except:
            #print("problem to compute with partition: ",partition," nodes",g.nodes())
            scores.append(None)
        sizes.append(len(g.nodes))

    return scores, sizes
Beispiel #3
0
def similarity_at_each_step(dynamicCommunityReference: tn.DynCommunitiesSN,
                            dynamicCommunityObserved: tn.DynCommunitiesSN,
                            score=None):
    """
    Compute similarity at each step

    It takes into account the fact that the reference might by incomplete. (remove from the observations all nodes/time not present in the reference)

    :param dynamicCommunityReference: the dynamic partition to use as reference
    :param dynamicCommunityObserved: the dynamic partition to evaluate
    :param score: score to use, default adjusted NMI
    :return: pair (list of scores, list of sizes)
    """

    if score == None:
        score = sklearn.metrics.adjusted_mutual_info_score
    scores = []
    sizes = []

    comsToEvaluate = dynamicCommunityObserved.snapshot_affiliations()

    #for each step
    for t, affils in dynamicCommunityReference.snapshot_affiliations().items():
        affilReference = []
        affilToEvaluate = []

        #for each node
        for n, comId in affils.items():
            affilReference.append(list(comId)[0])
            if n in comsToEvaluate[t]:
                affilToEvaluate.append(list(comsToEvaluate[t][n])[0])
            else:
                affilToEvaluate.append("-1")
        scores.append(score(affilReference, affilToEvaluate))
        sizes.append(len(affilReference))

    return scores, sizes
Beispiel #4
0
def write_com_SN(dyn_communities: tn.DynCommunitiesSN,
                 output_dir,
                 asNodeSet=True):
    """
    Write directory, 1 file = snapshot_affiliations of a snaphshot

    Write dynamic snapshot_affiliations as a directory containing one file for each snapshot.

    Two possible formats:

    **Affiliations:**
    ::

            node1   com1    com2
            node2   com1
            node3   com2    com3    com4

    **Node Sets:**
    ::

            com:com1    n1  n2  n3
            com:another_com    n1   n4  n5


    :param dynGraph: a dynamic graph
    :param outputDir: address of the directory to write
    :param asNodeSet: if True, node sets, otherwise, snapshot_affiliations

    """
    os.makedirs(output_dir, exist_ok=True)
    all_partitions = dyn_communities.snapshot_communities()
    for t, p in all_partitions.items():
        if asNodeSet:
            write_communities_as_nodeset(p, os.path.join(output_dir, str(t)))
        else:
            p = nodesets2affiliations(p)
            write_communities_as_affiliations(p,
                                              os.path.join(output_dir, str(t)))
Beispiel #5
0
def nb_node_change(dyn_com: tn.DynCommunitiesSN):
    """
    Compute the total number of node changes

    Measure of smoothness at the level of nodes, adapated to evaluate glitches

    :param dyn_com: The dynamic community
    :return: total number of node changes
    """
    coms_by_nodes = {}
    for t, coms in dyn_com.snapshot_communities().items():
        #print(t,coms)
        for com, nodes in coms.items():
            #print(n,com)
            for n in nodes:
                coms_by_nodes.setdefault(n, [com])
                if coms_by_nodes[n][-1] != com:
                    coms_by_nodes[n].append(com)
    nb_changes = 0
    for n in coms_by_nodes:
        #print(n,coms_by_nodes[n])
        nb_changes += len(coms_by_nodes[n]) - 1
    return nb_changes
Beispiel #6
0
def rollingCPM(dynNetSN: DynGraphSN, k=3):
    """

    This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the
    clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the
    union graph.

    [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007).
    Quantifying social group evolution.
    Nature, 446(7136), 664.

    :param dynNetSN: a dynamic network (DynGraphSN)
    :param k: the size of cliques used as snapshot_communities building blocks
    :return: DynCommunitiesSN
    """

    DynCom = DynCommunitiesSN()
    old_communities = None
    old_graph = nx.Graph()

    graphs = dynNetSN.snapshots()

    for (date, graph) in graphs.items():
        communitiesAtT = list(
            _get_percolated_cliques(graph, k)
        )  #get the percolated cliques (snapshot_affiliations) as a list of set of nodes
        for c in communitiesAtT:
            DynCom.add_community(date, c)

        if old_communities == None:  #if first snapshot
            old_graph = graph
            dateOld = date
            old_communities = communitiesAtT

        else:
            if len(communitiesAtT) > 0:  #if there is at least one community
                union_graph = nx.compose(
                    old_graph, graph
                )  #create the union graph of the current and the previous
                communities_union = list(
                    _get_percolated_cliques(
                        union_graph,
                        k))  #get the snapshot_affiliations of the union graph

                jaccardBeforeAndUnion = _included(
                    old_communities,
                    communities_union)  #we only care if the value is above 0
                jaccardUnionAndAfter = _included(
                    communitiesAtT,
                    communities_union)  #we only care if the value is above 0

                for c in jaccardBeforeAndUnion:  #for each community in the union graph
                    matched = []
                    born = []
                    killed = []

                    allJaccards = set()
                    for oldC in jaccardBeforeAndUnion[c]:
                        for newC in jaccardUnionAndAfter[c]:
                            allJaccards.add(
                                ((oldC, newC), _singleJaccard(oldC, newC))
                            )  #compute jaccard between candidates before and after
                    allJaccards = sorted(allJaccards,
                                         key=itemgetter(1),
                                         reverse=True)
                    sortedMatches = [k[0] for k in allJaccards]

                    oldCToMatch = dict(
                        jaccardBeforeAndUnion[c])  #get all coms before
                    newCToMatch = dict(
                        jaccardUnionAndAfter[c])  #get all new coms
                    while len(
                            sortedMatches
                    ) > 0:  #as long as there are couples of unmatched snapshot_affiliations
                        matchedKeys = sortedMatches[
                            0]  #pair of snapshot_affiliations of highest jaccard
                        matched.append(matchedKeys)  #this pair will be matched

                        del oldCToMatch[matchedKeys[
                            0]]  #delete chosen com from possible to match
                        del newCToMatch[matchedKeys[1]]
                        sortedMatches = [
                            k for k in sortedMatches
                            if len(set(matchedKeys) & set(k)) == 0
                        ]  #keep only pairs of unmatched snapshot_affiliations

                    if len(oldCToMatch) > 0:
                        killed.append(list(oldCToMatch.keys())[0])
                    if len(newCToMatch) > 0:
                        born.append(list(newCToMatch.keys())[0])

                    for aMatch in matched:
                        DynCom.events.add_event(
                            (dateOld, DynCom._com_ID(dateOld, aMatch[0])),
                            (date, DynCom._com_ID(date, aMatch[1])), dateOld,
                            date, "continue")

                    for kil in killed:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardUnionAndAfter[c]:
                            DynCom.events.add_event(
                                (dateOld, DynCom._com_ID(dateOld, kil)),
                                (date, DynCom._com_ID(date, com)), dateOld,
                                date, "merged")

                    for b in born:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardBeforeAndUnion[c]:
                            DynCom.events.add_event(
                                (dateOld, DynCom._com_ID(dateOld, com)),
                                (date, DynCom._com_ID(date, b)), dateOld, date,
                                "split")

            old_graph = graph
            dateOld = date
            old_communities = communitiesAtT
    print(DynCom.snapshots)
    print(DynCom.events.nodes)
    DynCom._relabel_coms_from_continue_events()

    return (DynCom)
Beispiel #7
0
def rollingCPM(dynNetSN: DynGraphSN, k=3, elapsed_time=False):
    """

    This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the
    clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the
    union graph.

    [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007).
    Quantifying social group evolution.
    Nature, 446(7136), 664.

    :param dynNetSN: a dynamic network (DynGraphSN)
    :param k: the size of cliques used as snapshot_communities building blocks
    :param elapsed_time: if True, will return a tuple (communities,time_elapsed)
    :return: DynCommunitiesSN
    """

    DynCom = DynCommunitiesSN()
    old_communities = None
    old_graph = nx.Graph()

    graphs = dynNetSN.snapshots()

    time_Steps = {}
    start = time.time()
    step2 = start

    total_percolation = 0
    total_match = 0

    pool = mp.Pool(mp.cpu_count())

    allComs = pool.starmap_async(__compute_communities,
                                 [(SNt, dynNetSN.snapshots(SNt), k)
                                  for SNt in graphs]).get()
    print("CD detection done", len(allComs))
    pool.close()

    com_ids = dict()
    for (date, communitiesAtT) in allComs:
        #print("------------",date)
        #for (date, graph) in graphs.items():

        #communitiesAtT = list(_get_percolated_cliques(graph, k)) #get the percolated cliques (snapshot_affiliations) as a list of set of nodes
        step1 = time.time()
        total_percolation += step1 - step2
        for current_com in communitiesAtT:
            id = DynCom.add_community(date, current_com)
            com_ids[(date, current_com)] = id

        if old_communities == None:  #if first snapshot
            old_graph = graphs[date]
            dateOld = date
            old_communities = communitiesAtT

        else:
            if len(communitiesAtT) > 0:  #if there is at least one community
                union_graph = nx.compose(
                    old_graph, graphs[date]
                )  #create the union graph of the current and the previous
                communities_union = list(
                    _get_percolated_cliques(
                        union_graph,
                        k))  #get the snapshot_affiliations of the union graph

                jaccardBeforeAndUnion = _included(
                    old_communities,
                    communities_union)  #we only care if the value is above 0
                jaccardUnionAndAfter = _included(
                    communitiesAtT,
                    communities_union)  #we only care if the value is above 0

                already_assigned = set()
                for current_com in jaccardBeforeAndUnion:  #for each community in the union graph
                    matched = []
                    born = []
                    killed = []

                    allJaccards = set()
                    for oldC in jaccardBeforeAndUnion[
                            current_com]:  #for communities included in it in t-1
                        for newC in jaccardUnionAndAfter[
                                current_com]:  # and t+1
                            if not oldC in already_assigned and not newC in already_assigned:
                                allJaccards.add(
                                    ((oldC, newC), _singleJaccard(
                                        oldC,
                                        newC)))  #compute jaccard between those

                    allJaccards = sorted(allJaccards,
                                         key=itemgetter(1),
                                         reverse=True)
                    sortedMatches = [
                        k[0] for k in allJaccards
                    ]  #list of pairs of communities in t-1 and t+1 ordered by decreasing jaccard

                    oldCToMatch = dict(jaccardBeforeAndUnion[current_com]
                                       )  #get all coms before
                    newCToMatch = dict(
                        jaccardUnionAndAfter[current_com])  #get all new coms
                    while len(
                            sortedMatches
                    ) > 0:  #as long as there are couples of unmatched communities (t-1,t+1)included in the current com
                        matchedKeys = sortedMatches[
                            0]  #pair of snapshot_affiliations of highest jaccard
                        matched.append(matchedKeys)  #this pair will be matched

                        del oldCToMatch[matchedKeys[
                            0]]  #delete chosen com from possible to match
                        del newCToMatch[matchedKeys[1]]
                        sortedMatches = [
                            k for k in sortedMatches
                            if len(set(matchedKeys) & set(k)) == 0
                        ]  #keep only pairs of unmatched snapshot_affiliations

                    if len(oldCToMatch) > 0:
                        killed.append(list(oldCToMatch.keys())[0])
                    if len(newCToMatch) > 0:
                        born.append(list(newCToMatch.keys())[0])

                    for aMatch in matched:
                        #print("--",aMatch)
                        already_assigned.add(aMatch[0])
                        already_assigned.add(aMatch[1])

                        DynCom.events.add_event(
                            (dateOld, com_ids[(dateOld, aMatch[0])]),
                            (date, com_ids[(date, aMatch[1])]), dateOld, date,
                            "continue")

                    for kil in killed:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardUnionAndAfter[current_com]:
                            DynCom.events.add_event(
                                (dateOld, com_ids[(dateOld, kil)]),
                                (date, com_ids[(date, com)]), dateOld, date,
                                "merged")

                    for b in born:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardBeforeAndUnion[current_com]:
                            DynCom.events.add_event(
                                (dateOld, com_ids[(dateOld, com)]),
                                (date, com_ids[(date, b)]), dateOld, date,
                                "split")
            step2 = time.time()
            total_match += step2 - step1

            old_graph = graphs[date]
            dateOld = date
            old_communities = communitiesAtT

    end = time.time()
    time_Steps["total"] = end - start
    time_Steps["CD"] = total_percolation
    time_Steps["match"] = total_match

    DynCom._relabel_coms_from_continue_events()

    if elapsed_time:
        return (DynCom, time_Steps)
    return (DynCom)
Beispiel #8
0
def longitudinal_similarity(dynamicCommunityReference: tn.DynCommunitiesSN,
                            dynamicCommunityObserved: tn.DynCommunitiesSN,
                            score=None,
                            convert_coms_sklearn_format=True):
    """
    Longitudinal similarity

    The longitudinal similarity between two dynamic clusters is computed by considering each couple (node,time) as an element belong to a cluster, a cluster containing therefore nodes in differnt times
    It takes into account the fact that the reference might by incomplete by removing from the partition to evaluate all (node,time) not present in the reference.

    :param dynamicCommunityReference: the dynamic partition used as reference (ground truth)
    :param dynamicCommunityObserved: the dynamic partition to evaluate (result of an algorithm)
    :param score: community comparison score, by default the adjsted NMI. (sklearn)
    :param convert_coms_sklearn_format: if the score expect in input clusters represented as in sklearn, True. if False, score will receive in input lists of sets of nodes
    :return: score
    """

    if score == None:
        score = lambda x, y: sklearn.metrics.adjusted_mutual_info_score(
            x, y, average_method="arithmetic")

    affilReference = []
    affilToEvaluate = []

    if convert_coms_sklearn_format:

        comsToEvaluate = dynamicCommunityObserved.snapshot_affiliations()

        #for each step
        for t, affils in dynamicCommunityReference.snapshot_affiliations(
        ).items():

            #for each node
            for n, comId in affils.items():
                affilReference.append(str(list(comId)[0]))
                if n in comsToEvaluate[t]:
                    affilToEvaluate.append(str(list(comsToEvaluate[t][n])[0]))
                else:
                    print("node not in partition to evaluate: ", str(n), " ",
                          str(t))
                    affilToEvaluate.append("-1")
    else:

        affilReference = {}
        affilToEvaluate = {}
        for t, coms in dynamicCommunityReference.snapshot_communities().items(
        ):
            all_nodes = set()
            for id, nodes in coms.items():
                node_sn = {(n, t) for n in nodes}
                all_nodes.update(node_sn)
                affilReference.setdefault(id, set()).update(node_sn)

            for id, nodes in dynamicCommunityObserved.snapshot_communities(
                    t).items():
                node_sn = {(n, t) for n in nodes}

                affilToEvaluate.setdefault(id,
                                           set()).update(node_sn & all_nodes)

        affilReference = list(affilReference.values())
        affilToEvaluate = list(affilToEvaluate.values())

    return score(affilReference, affilToEvaluate)