Exemple #1
0
    def average_internal_degree(self, **kwargs):
        """
        The average internal degree of the algorithms set.

        .. math:: f(S) = \\frac{2m_S}{n_S}

        where :math:`m_S` is the number of algorithms internal edges and :math:`n_S` is the number of algorithms nodes.

        :param summary: (optional, default True) if **True**, an overall summary is returned for the partition (min, max, avg, std); if **False** a list of community-wise score
        :return: a FitnessResult object/a list of community-wise score

        :Example:

        >>> from cdlib.algorithms import louvain
        >>> g = nx.karate_club_graph()
        >>> communities = louvain(g)
        >>> mod = communities.average_internal_degree()


        """
        if self.__check_graph():
            return evaluation.average_internal_degree(self.graph, self,
                                                      **kwargs)
        else:
            raise ValueError("Graph instance not specified")
def community_evaluation_metrics(cdlib_coms):
    uG = cdlib_coms.graph

    # https://cdlib.readthedocs.io/en/latest/reference/evaluation.html
    embeddedness = evaluation.avg_embeddedness(uG, cdlib_coms, summary=False)
    average_internal_degree = evaluation.average_internal_degree(uG,
                                                                 cdlib_coms,
                                                                 summary=False)
    conductance = evaluation.conductance(uG, cdlib_coms, summary=False)
    transitivity = evaluation.avg_transitivity(uG, cdlib_coms, summary=False)
    cut_ratio = evaluation.cut_ratio(uG, cdlib_coms, summary=False)
    expansion = evaluation.expansion(uG, cdlib_coms, summary=False)
    edges_inside = evaluation.edges_inside(uG, cdlib_coms, summary=False)
    fraction_over_median_degree = evaluation.fraction_over_median_degree(
        uG, cdlib_coms, summary=False)
    hub_dominance = evaluation.hub_dominance(uG, cdlib_coms, summary=False)
    internal_edge_density = evaluation.internal_edge_density(uG,
                                                             cdlib_coms,
                                                             summary=False)
    max_odf = evaluation.max_odf(uG, cdlib_coms, summary=False)
    avg_odf = evaluation.avg_odf(uG, cdlib_coms, summary=False)
    flake_odf = evaluation.flake_odf(uG, cdlib_coms, summary=False)
    size = evaluation.size(uG, cdlib_coms, summary=False)
    triangle_participation_ratio = evaluation.triangle_participation_ratio(
        uG, cdlib_coms, summary=False)

    eval_dict = {
        'embeddedness': embeddedness,
        'average_internal_degree': average_internal_degree,
        'conductance': conductance,
        'transitivity': transitivity,
        'cut_ratio': cut_ratio,
        'expansion': expansion,
        'edges_inside': edges_inside,
        'fraction_over_median_degree': fraction_over_median_degree,
        'hub_dominance': hub_dominance,
        'internal_edge_density': internal_edge_density,
        'max_odf': max_odf,
        'avg_odf': avg_odf,
        'flake_odf': flake_odf,
        'size': size,
        'triangle_participation_ratio': triangle_participation_ratio
    }

    com_eval_df = pd.DataFrame(eval_dict)\
        .reset_index()\
        .rename({'index':'community_id'}, axis = 1)
    com_eval_df['community_id'] = com_eval_df['community_id'] + 1
    return com_eval_df
def getAllScoresDict(g, _reference, _communities, executionTime):
    scores = {}
    scores['time'] = executionTime
    reference = copy.deepcopy(_reference)
    reference.communities = complete_partition(reference.communities, g, mode='new_cluster')
    communities = copy.deepcopy(_communities)
    communities.communities = complete_partition(communities.communities, g, mode='new_cluster')



    # scores['adjusted_mutual_information'] = evaluation.adjusted_mutual_information(reference,communities).score

    # returns MatchingResult object
    # scores['adjusted_rand_index'] = evaluation.adjusted_rand_index(reference,communities).score
    # Compute the average F1 score of the optimal algorithms matches among the partitions in input.
    try:
        scores['f1'] = evaluation.f1(reference, communities).score
    except:
        scores['f1'] = np.nan
    # Compute the Normalized F1 score of the optimal algorithms matches among the partitions in input.
    try:
        scores['nf1'] = evaluation.nf1(reference, communities).score
    except:
        scores['nf1'] = np.nan
    # Normalized Mutual Information between two clusterings.
    # scores['normalized_mutual_information'] = evaluation.normalized_mutual_information(reference, communities)[0]
    # Index of resemblance for overlapping, complete coverage, network clusterings.
    try:
        scores['omega'] = evaluation.omega(reference, communities).score
    except:
        scores['omega'] = np.nan
    # Overlapping Normalized Mutual Information between two clusterings.
    try:
        scores['overlapping_normalized_mutual_information_LFK'] = evaluation.overlapping_normalized_mutual_information_LFK(reference, communities)[0]
    except:
        scores['overlapping_normalized_mutual_information_LFK']  = np.nan
    # Overlapping Normalized Mutual Information between two clusterings.
    # scores['overlapping_normalized_mutual_information_MGH'] =  evaluation.overlapping_normalized_mutual_information_MGH(reference, communities)[0]
    # Variation of Information among two nodes partitions.
    # scores['variation_of_information'] =  evaluation.variation_of_information(reference, communities)[0]

    # scores['avg_distance'] = evaluation.avg_distance(g,communities, summary=True)
    try:
        scores['avg_embeddedness'] = evaluation.avg_embeddedness(g,communities, summary=True).score
    except:
        scores['avg_embeddedness'] = np.nan
    try:
        scores['average_internal_degree'] = evaluation.average_internal_degree(g,communities, summary=True).score
    except:
        scores['average_internal_degree'] = np.nan
    # scores['avg_transitivity']  = evaluation.avg_transitivity(g,communities, summary=True)
    # Fraction of total edge volume that points outside the community.
    try:
        scores['conductance']  = evaluation.conductance(g,communities, summary=True).score
    except:
        scores['conductance'] = np.nan
    # Fraction of existing edges (out of all possible edges) leaving the community.
    try:
        scores['cut_ratio']  = evaluation.cut_ratio(g,communities, summary=True).score
    except:
        scores['cut_ratio'] = np.nan

    # Number of edges internal to the community
    try:
        scores['edges_inside']  = evaluation.edges_inside(g,communities, summary=True).score
    except:
        scores['edges_inside'] = np.nan
    # Number of edges per community node that point outside the cluster
    try:
        scores['expansion']  = evaluation.expansion(g,communities, summary=True).score
    except:
        scores['expansion'] = np.nan
    # Fraction of community nodes of having internal degree higher than the median degree value.
    try:
        scores['fraction_over_median_degree']  = evaluation.fraction_over_median_degree(g,communities, summary=True).score
    except:
        scores['fraction_over_median_degree'] = np.nan
    # The hub dominance of a community is defined as the ratio of the degree of its most connected node w.r.t. the theoretically maximal degree within the community.
    # scores['hub_dominance']  = evaluation.hub_dominance(g,communities, summary=True)
    # The internal density of the community set.
    try:
        scores['internal_edge_density'] = evaluation.internal_edge_density(g,communities, summary=True).score
    except:
        scores['internal_edge_density'] = np.nan
    # Normalized variant of the Cut-Ratio
    try:
        scores['normalized_cut']  = evaluation.normalized_cut(g,communities, summary=True).score
    except:
        scores['normalized_cut'] = np.nan
    # Maximum fraction of edges of a node of a community that point outside the community itself.
    # scores['max_odf']  = evaluation.max_odf(g,communities, summary=True)
    # Average fraction of edges of a node of a community that point outside the community itself.
    # scores['avg_odf']  = evaluation.avg_odf(g,communities, summary=True)
    # Fraction of nodes in S that have fewer edges pointing inside than to the outside of the community.
    # scores['flake_odf']  = evaluation.flake_odf(g,communities, summary=True)
    # The scaled density of a community is defined as the ratio of the community density w.r.t. the complete graph density.
    try:
        scores['scaled_density']  = evaluation.scaled_density(g,communities, summary=True).score
    except:
        scores['scaled_density'] = np.nan
    # Significance estimates how likely a partition of dense communities appear in a random graph.
    try:
        scores['significance'] = evaluation.significance(g,communities).score
    except:
        scores['significance'] = np.nan
    # Size is the number of nodes in the community
    try:
        scores['size']  = evaluation.size(g,communities, summary=True).score
    except:
        scores['size'] = np.nan
    # Surprise is statistical approach proposes a quality metric assuming that edges between vertices emerge randomly according to a hyper-geometric distribution.
    # According to the Surprise metric, the higher the score of a partition, the less likely it is resulted from a random realization, the better the quality of the community structure.
    try:
        scores['surprise'] = evaluation.surprise(g,communities).score
    except:
        scores['surprise'] = np.nan

    try:
        scores['modularity_density'] = evaluation.modularity_density(g,communities).score
    except:
        scores['modularity_density'] = np.nan

    # Fraction of community nodes that belong to a triad.
    # scores['triangle_participation_ratio']  = evaluation.triangle_participation_ratio(g,communities, summary=True)
    # Purity is the product of the frequencies of the most frequent labels carried by the nodes within the communities
    # scores['purity'] = evaluation.purity(communities)
    return scores
            forDf['score'].append(score)

    df = pd.DataFrame(forDf)
    df.to_csv('cdlib_results_legacy_part.csv')

    graphStatistics = {'graph': [], 'statistic': [], 'value':[]}
    statistics = {
        '$|V|$': lambda graph, reference: len(graph.nodes),
        '$|E|$': lambda graph, reference: len(graph.edges),
        '$\\hat{k}$': lambda graph, reference: len(reference.communities),
        '$\\hat{o}$': lambda graph, reference: getOverlappingNumber(reference.communities),
        '$\\hat{c}$': lambda graph, reference: nx.average_clustering(graph),
        '\\shortstack[l]{normalized \\\\ cut}': lambda graph, reference: evaluation.normalized_cut(graph,reference, summary=True).score,
        '\\shortstack[l]{internal \\\\ edge \\\\density}': lambda graph, reference: evaluation.internal_edge_density(graph,reference, summary=True).score,
        'significance': lambda graph, reference: evaluation.significance(graph,reference, summary=True).score,
        '\\shortstack[l]{avarage \\\\ internal \\\\degree}': lambda graph, reference: evaluation.average_internal_degree(graph,reference, summary=True).score,
        '\\shortstack[l]{modularity \\\\ density}': lambda graph, reference: evaluation.modularity_density(graph,reference).score,
    }

    for graph, name, reference in zip(graphs, names, references):
        for statistic_name, statistic in statistics.items():
            graphStatistics['graph'].append(name)
            graphStatistics['statistic'].append(statistic_name)
            graphStatistics['value'].append(statistic(graph, reference) )

    graphStatisticsDF = pd.DataFrame(graphStatistics)
    graphStatisticsDF.to_csv('../Results/graphs_stats_legacy.csv')

    # method that were not used, or don't work by some reason

    # wrong implementaion of amplified commute distance